Commit 0bde5356 authored by dasharatha.vamshi's avatar dasharatha.vamshi

added models for fy676a and fy664g

parent c0847703
......@@ -4,7 +4,6 @@ import numpy as np
import pandas as pd
from loguru import logger
from sklearn import metrics
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from scripts.constants.constants import RawConstants
......@@ -19,30 +18,51 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings.filterwarnings("ignore")
def model_trainer(df_grouped):
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
'_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity"
def model_trainer(df_grouped, index_no):
cols_x, cols_y, saved_model = None, None, None
if index_no == 1250:
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
'_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity"
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
elif index_no == 3294:
cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
'drilled_side_left_inlet_side_cooling_water_temperature_std',
'seat_temperature_immediately_after_bof_mean',
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean', 'ram_position_std',
'drilled_side_right_exit_side_cooling_water_temperature_std',
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
'Weighted_Temperature during transportation_type[℃]']
cols_y = "viscosity"
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy664g"
}).load_model()
req_cols = cols_x + ['viscosity']
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
df_grouped[req_cols].to_csv('final.csv')
# df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
print(f'x_train shape - {x_train.shape}')
print(f'x_test shape - {x_test.shape}')
print(f'y_train shape - {y_train.shape}')
print(f'y_test shape - {y_test.shape}')
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
y_pred = saved_model.predict(x_test)
predictions = [round(value, 2) for value in y_pred]
......@@ -72,7 +92,10 @@ def model_trainer(df_grouped):
def read_raw_data(raw_path, raw_skip_rows):
df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
try:
df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
except Exception as e:
df = pd.read_csv(raw_path)
if len(df.columns) == len(RawConstants.columns):
logger.info(f"Total cols are {len(RawConstants.columns)} and are same as the df cols length")
df.columns = RawConstants.columns
......@@ -141,12 +164,37 @@ def load_and_predict(df_grouped, index_no):
y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv('final_predicted_viscosity.csv')
final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')
elif index_no == 3294:
logger.info(f"Loading model for {index_no}")
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy664g"
}).load_model()
cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
'drilled_side_left_inlet_side_cooling_water_temperature_std',
'seat_temperature_immediately_after_bof_mean',
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean',
'ram_position_std',
'drilled_side_right_exit_side_cooling_water_temperature_std',
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
'Weighted_Temperature during transportation_type[℃]']
cols_y = "viscosity"
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info(f"Starting prediction for {index_no}")
logger.info("Reading raw file data")
df = read_raw_data(raw_path, raw_skip_rows)
logger.info(f"Shape of raw df is {df.shape}")
......@@ -194,7 +242,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_pickup_grouped, viscosity_df)
load_and_predict(df_grouped, index_no)
# model_trainer(df_grouped)
# model_trainer(df_grouped, index_no)
if __name__ == "__main__":
......@@ -206,5 +254,11 @@ if __name__ == "__main__":
viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
viscosity_file_skip_rows = 3
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
index_number = 3294
raw_file_path = 'fy664g_raw.csv'
raw_file_skip_rows = 0
viscosity_file_path = 'fy664g-viscosity.xlsx'
viscosity_file_skip_rows = 2
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
except Exception as e:
logger.exception(f"Module failed because of error {e}")
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.13
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-20 11:58:08.254129'
channels:
- conda-forge
dependencies:
- python=3.10.13
- pip
- pip:
- mlflow
- cloudpickle==3.0.0
- scikit-learn==1.2.2
name: mlflow-env
mlflow
cloudpickle==3.0.0
scikit-learn==1.2.2
\ No newline at end of file
......@@ -185,7 +185,7 @@ class ViscosityConstants:
'Humidity during transportation__type2[%]'
]
req_cols = [
'Rubber No.', 'Batch No.', 'Index No',
'Rubber No.', 'Batch No.', 'Index No', 'Chemical weight (g)',
'Input rubber weight(0.1kg)', 'date', 'batch-date',
'Weight_type1', 'Weight_type2', 'Weighted_PO_type',
'Weighted_DIRT_type', 'Weighted_ASH_type', 'Weighted_VM_type',
......
......@@ -213,7 +213,7 @@ def preprocess_bof_section(df, index_number, vis_df):
date_dict = mixer_section_start_end_time(df, index_number)
bof_merged_df_final = return_batch_no_df(df, vis_df, date_dict, index_number)
bof_merged_df_final = bof_merged_df_final[bof_merged_df_final['bof_batch_number'] != 0]
print(bof_merged_df_final.columns)
# print(bof_merged_df_final.columns)
grouped_cols = ['batch-date']
aggregate_dict = BofConstants.bof_aggregate_dict
df_bof_grouped = bof_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index()
......
......@@ -82,7 +82,7 @@ def return_batch_no_df(
value = day_df["discharge length"].max() - day_df["discharge length"].min()
day_length_dic[each_day] = value
print(day_length_dic)
# print(day_length_dic)
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date
......
......@@ -455,10 +455,17 @@ def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
# start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
# end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
if (value['Time Stamp'] > start_time) & (value['Time Stamp'] < end_time):
start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0],
'%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment