Commit 685bff63 authored by dasharatha.vamshi's avatar dasharatha.vamshi

tested the pipeline for Fy676A

parent a752455a
......@@ -19,4 +19,5 @@ scipy==1.10.1
six==1.16.0
statsmodels==0.14.1
xlrd==2.0.1
azure-storage-blob==12.14.1
......@@ -21,6 +21,12 @@ def model_trainer():
bof_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\bof-agg.csv')
pickup_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\pickup-agg.csv')
viscosity_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\viscosity-agg.csv')
sheet_df.describe().to_csv("sheet_desc.csv")
mixer_df.describe().to_csv("mixer_desc.csv")
extruder_df.describe().to_csv("extru_desc.csv")
bof_df.describe().to_csv("bof_desc.csv")
pickup_df.describe().to_csv("pickup_desc.csv")
viscosity_df.describe().to_csv("visc_desc.csv")
# viscosity_df = viscosity_df[['batch-date', 'viscosity']]
merged_df = pd.merge(sheet_df, mixer_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, extruder_df, on='batch-date', how='left')
......@@ -30,6 +36,18 @@ def model_trainer():
print(df_grouped.columns)
selected_cols = df_grouped.columns
df_grouped = df_grouped[selected_cols]
viscosity_rubber_cols = ['Weight_type1', 'Weight_type2',
'Weighted_PO_type', 'Weighted_DIRT_type', 'Weighted_ASH_type',
'Weighted_VM_type', 'Weighted_PRI_type', 'Weighted_NITROGEN_type',
'Weighted_Temperature during transportation_type[℃]',
'Weighted_Humidity during transportation__type[%]', 'Weighted Sum',
'viscosity']
# Replace 0 values with NaN
for col in viscosity_rubber_cols:
df_grouped[col] = df_grouped[col].replace(0, np.nan)
df_grouped[col] = df_grouped[col].fillna(df_grouped[col].mean())
# Extract batch number and date
batch_number = df_grouped['batch-date'].str.extract(r'Batch_(\d+\.\d+)_')[0].astype(float)
date = pd.to_datetime(df_grouped['batch-date'].str.extract(r'_(\d{4}-\d{2}-\d{2})$')[0])
......@@ -40,10 +58,10 @@ def model_trainer():
# Sort by 'Batch Number' and 'Date'
df_grouped = df_grouped.sort_values(by=['Date', 'Batch Number'])
df_grouped = pd.read_csv(r"D:\kalypso\bsj-model-inference\test-agg-data.csv")
# df_grouped = pd.read_csv(r"D:\kalypso\bsj-model-inference\test-agg-data.csv")
df_grouped = round(df_grouped, 6)
df_grouped.to_csv('grouped.csv')
cols_x = ['temperature_ws_side_std', 'Weighted_VM_type', 'electric_energy_mean', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean', '_calendar_current_mean', 'Weighted_NITROGEN_type', 'ram_pressure_mean', 'seat_temperature_immediately_after_bof_mean', 'surface_temperature_center_std', 'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type', 'drilled_side_left_exit_side_cooling_water_temperature_mean', 'sheet_temperature_immediately_after_calendering_mean', 'calender_roll_lower_side_inlet_side_cooling_water_temperature_mean', 'temperature_mean', 'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'temperature_ds_side_mean', 'Weighted_PRI_type', 'residence_time_max']
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean', '_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean', 'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std', 'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type', 'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type', 'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max', 'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type', 'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity"
req_cols = cols_x + ['viscosity']
# df_grouped = round(df_grouped, 2)
......@@ -52,7 +70,7 @@ def model_trainer():
# print(features.describe().to_csv('feature.csv'))
# print(df_grouped[req_cols].isnull().sum())
# df_grouped = round(df_grouped,2)
# df_grouped = pd.read_csv(r'D:\kalypso\bsj-model-inference\test-gr.csv')
# df_grouped = pd.read_csv(r'D:\kalypso\bsj-model-inference\final-fy676a.csv')
labels = df_grouped[cols_y]
# print(df_grouped[cols_y].describe())
df_grouped[req_cols].to_csv('final.csv')
......@@ -173,3 +191,9 @@ def model_trainer():
print(metric_dictionary)
model_trainer()
# {'Mean Absolute Error (MAE)': 1.4711585365853663, 'Mean Squared Error (MSE)': 3.193666768292685, 'Root Mean Squared Error (RMSE)': 1.7870833131929482,
# 'Mean Absolute Percentage Error (MAPE)': 0.015400607504235945, 'Explained Variance Score': 0.5937040328784624, 'Max Error': 4.709999999999994,
# 'Median Absolute Error': 1.4399999999999977, 'R2 Score': 0.5936331791226861, 'Mean Gamma Deviance': 0.0003503027256495745,
# 'Mean Poisson Deviance': 0.03343612041755939}
......@@ -8,4 +8,4 @@ flavors:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-18 12:12:42.185881'
utc_time_created: '2023-12-20 06:24:57.321465'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment