Commit 0bde5356 authored by dasharatha.vamshi's avatar dasharatha.vamshi

added models for fy676a and fy664g

parent c0847703
...@@ -4,7 +4,6 @@ import numpy as np ...@@ -4,7 +4,6 @@ import numpy as np
import pandas as pd import pandas as pd
from loguru import logger from loguru import logger
from sklearn import metrics from sklearn import metrics
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from scripts.constants.constants import RawConstants from scripts.constants.constants import RawConstants
...@@ -19,7 +18,9 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section ...@@ -19,7 +18,9 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
def model_trainer(df_grouped): def model_trainer(df_grouped, index_no):
cols_x, cols_y, saved_model = None, None, None
if index_no == 1250:
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean', cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
'_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean', '_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std', 'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
...@@ -29,20 +30,39 @@ def model_trainer(df_grouped): ...@@ -29,20 +30,39 @@ def model_trainer(df_grouped):
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type', 'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std'] 'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity" cols_y = "viscosity"
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
elif index_no == 3294:
cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
'drilled_side_left_inlet_side_cooling_water_temperature_std',
'seat_temperature_immediately_after_bof_mean',
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean', 'ram_position_std',
'drilled_side_right_exit_side_cooling_water_temperature_std',
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
'Weighted_Temperature during transportation_type[℃]']
cols_y = "viscosity"
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy664g"
}).load_model()
req_cols = cols_x + ['viscosity'] req_cols = cols_x + ['viscosity']
features = df_grouped[cols_x] features = df_grouped[cols_x]
labels = df_grouped[cols_y] labels = df_grouped[cols_y]
df_grouped[req_cols].to_csv('final.csv') # df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets # Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25) x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
print(f'x_train shape - {x_train.shape}') print(f'x_train shape - {x_train.shape}')
print(f'x_test shape - {x_test.shape}') print(f'x_test shape - {x_test.shape}')
print(f'y_train shape - {y_train.shape}') print(f'y_train shape - {y_train.shape}')
print(f'y_test shape - {y_test.shape}') print(f'y_test shape - {y_test.shape}')
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
y_pred = saved_model.predict(x_test) y_pred = saved_model.predict(x_test)
predictions = [round(value, 2) for value in y_pred] predictions = [round(value, 2) for value in y_pred]
...@@ -72,7 +92,10 @@ def model_trainer(df_grouped): ...@@ -72,7 +92,10 @@ def model_trainer(df_grouped):
def read_raw_data(raw_path, raw_skip_rows): def read_raw_data(raw_path, raw_skip_rows):
try:
df = pd.read_excel(raw_path, skiprows=raw_skip_rows) df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
except Exception as e:
df = pd.read_csv(raw_path)
if len(df.columns) == len(RawConstants.columns): if len(df.columns) == len(RawConstants.columns):
logger.info(f"Total cols are {len(RawConstants.columns)} and are same as the df cols length") logger.info(f"Total cols are {len(RawConstants.columns)} and are same as the df cols length")
df.columns = RawConstants.columns df.columns = RawConstants.columns
...@@ -141,12 +164,37 @@ def load_and_predict(df_grouped, index_no): ...@@ -141,12 +164,37 @@ def load_and_predict(df_grouped, index_no):
y_pred_full = saved_model.predict(features) y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']] final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv('final_predicted_viscosity.csv') final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')
elif index_no == 3294: elif index_no == 3294:
logger.info(f"Loading model for {index_no}") logger.info(f"Loading model for {index_no}")
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy664g"
}).load_model()
cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
'drilled_side_left_inlet_side_cooling_water_temperature_std',
'seat_temperature_immediately_after_bof_mean',
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean',
'ram_position_std',
'drilled_side_right_exit_side_cooling_water_temperature_std',
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
'Weighted_Temperature during transportation_type[℃]']
cols_y = "viscosity"
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows): def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info(f"Starting prediction for {index_no}")
logger.info("Reading raw file data") logger.info("Reading raw file data")
df = read_raw_data(raw_path, raw_skip_rows) df = read_raw_data(raw_path, raw_skip_rows)
logger.info(f"Shape of raw df is {df.shape}") logger.info(f"Shape of raw df is {df.shape}")
...@@ -194,7 +242,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit ...@@ -194,7 +242,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_pickup_grouped, viscosity_df) df_pickup_grouped, viscosity_df)
load_and_predict(df_grouped, index_no) load_and_predict(df_grouped, index_no)
# model_trainer(df_grouped) # model_trainer(df_grouped, index_no)
if __name__ == "__main__": if __name__ == "__main__":
...@@ -206,5 +254,11 @@ if __name__ == "__main__": ...@@ -206,5 +254,11 @@ if __name__ == "__main__":
viscosity_file_path = 'viscosity_natural_rubber_data.xlsx' viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
viscosity_file_skip_rows = 3 viscosity_file_skip_rows = 3
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows) start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
index_number = 3294
raw_file_path = 'fy664g_raw.csv'
raw_file_skip_rows = 0
viscosity_file_path = 'fy664g-viscosity.xlsx'
viscosity_file_skip_rows = 2
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
except Exception as e: except Exception as e:
logger.exception(f"Module failed because of error {e}") logger.exception(f"Module failed because of error {e}")
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.13
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-20 11:58:08.254129'
channels:
- conda-forge
dependencies:
- python=3.10.13
- pip
- pip:
- mlflow
- cloudpickle==3.0.0
- scikit-learn==1.2.2
name: mlflow-env
mlflow
cloudpickle==3.0.0
scikit-learn==1.2.2
\ No newline at end of file
...@@ -185,7 +185,7 @@ class ViscosityConstants: ...@@ -185,7 +185,7 @@ class ViscosityConstants:
'Humidity during transportation__type2[%]' 'Humidity during transportation__type2[%]'
] ]
req_cols = [ req_cols = [
'Rubber No.', 'Batch No.', 'Index No', 'Rubber No.', 'Batch No.', 'Index No', 'Chemical weight (g)',
'Input rubber weight(0.1kg)', 'date', 'batch-date', 'Input rubber weight(0.1kg)', 'date', 'batch-date',
'Weight_type1', 'Weight_type2', 'Weighted_PO_type', 'Weight_type1', 'Weight_type2', 'Weighted_PO_type',
'Weighted_DIRT_type', 'Weighted_ASH_type', 'Weighted_VM_type', 'Weighted_DIRT_type', 'Weighted_ASH_type', 'Weighted_VM_type',
......
...@@ -213,7 +213,7 @@ def preprocess_bof_section(df, index_number, vis_df): ...@@ -213,7 +213,7 @@ def preprocess_bof_section(df, index_number, vis_df):
date_dict = mixer_section_start_end_time(df, index_number) date_dict = mixer_section_start_end_time(df, index_number)
bof_merged_df_final = return_batch_no_df(df, vis_df, date_dict, index_number) bof_merged_df_final = return_batch_no_df(df, vis_df, date_dict, index_number)
bof_merged_df_final = bof_merged_df_final[bof_merged_df_final['bof_batch_number'] != 0] bof_merged_df_final = bof_merged_df_final[bof_merged_df_final['bof_batch_number'] != 0]
print(bof_merged_df_final.columns) # print(bof_merged_df_final.columns)
grouped_cols = ['batch-date'] grouped_cols = ['batch-date']
aggregate_dict = BofConstants.bof_aggregate_dict aggregate_dict = BofConstants.bof_aggregate_dict
df_bof_grouped = bof_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index() df_bof_grouped = bof_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index()
......
...@@ -82,7 +82,7 @@ def return_batch_no_df( ...@@ -82,7 +82,7 @@ def return_batch_no_df(
value = day_df["discharge length"].max() - day_df["discharge length"].min() value = day_df["discharge length"].max() - day_df["discharge length"].min()
day_length_dic[each_day] = value day_length_dic[each_day] = value
print(day_length_dic) # print(day_length_dic)
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True) sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date
......
...@@ -455,10 +455,17 @@ def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no ...@@ -455,10 +455,17 @@ def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no
extruder_flag_list.append('false') extruder_flag_list.append('false')
extrud_flg_vms.append(0) extrud_flg_vms.append(0)
else: else:
start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time')) # start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time')) # end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")
if (value['Time Stamp'] > start_time) & (value['Time Stamp'] < end_time): if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0],
'%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('false') extruder_flag_list.append('false')
extrud_flg_vms.append(0) extrud_flg_vms.append(0)
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment