Commit 59870962 authored by sagar.shee's avatar sagar.shee

Merge branch 'master' into 'fy664g_pipeline'

tested the pipeline for Fy676A

See merge request !1
parents 685bff63 648a1daf
import warnings
from loguru import logger
import numpy as np
import pandas as pd
from loguru import logger
from scripts.constants.constants import RawConstants
from scripts.core.model_loader import ModelLoader
from scripts.section_utils.bof_section import preprocess_bof_section
from scripts.section_utils.extruder_section import preprocess_extruder_section
from scripts.section_utils.material_section import preprocess_viscosity_section
......@@ -28,6 +31,66 @@ def read_raw_data(raw_path, raw_skip_rows):
return df
def merged_all_sections(sheet_df, mixer_df, extruder_df, bof_df, pickup_df, viscosity_df):
merged_df = pd.merge(sheet_df, mixer_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, extruder_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, bof_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, pickup_df, on='batch-date', how='left')
df_grouped = pd.merge(merged_df, viscosity_df, on='batch-date', how='left')
selected_cols = df_grouped.columns
df_grouped = df_grouped[selected_cols]
viscosity_rubber_cols = ['Weight_type1', 'Weight_type2',
'Weighted_PO_type', 'Weighted_DIRT_type', 'Weighted_ASH_type',
'Weighted_VM_type', 'Weighted_PRI_type', 'Weighted_NITROGEN_type',
'Weighted_Temperature during transportation_type[℃]',
'Weighted_Humidity during transportation__type[%]', 'Weighted Sum',
'viscosity']
# Replace 0 values with NaN
for col in viscosity_rubber_cols:
df_grouped[col] = df_grouped[col].replace(0, np.nan)
df_grouped[col] = df_grouped[col].fillna(df_grouped[col].mean())
# Extract batch number and date
batch_number = df_grouped['batch-date'].str.extract(r'Batch_(\d+\.\d+)_')[0].astype(float)
date = pd.to_datetime(df_grouped['batch-date'].str.extract(r'_(\d{4}-\d{2}-\d{2})$')[0])
# Add extracted data as separate columns
df_grouped['Batch Number'] = batch_number
df_grouped['Date'] = date
# Sort by 'Batch Number' and 'Date'
df_grouped = df_grouped.sort_values(by=['Date', 'Batch Number'])
df_grouped = round(df_grouped, 6)
return df_grouped
def load_and_predict(df_grouped, index_no):
if index_no == 1250:
logger.info(f"Loading model for {index_no}")
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
'_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity"
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv('final_predicted_viscosity.csv')
elif index_no == 3294:
logger.info(f"Loading model for {index_no}")
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info("Reading raw file data")
df = read_raw_data(raw_path, raw_skip_rows)
......@@ -35,7 +98,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
logger.info("Starting preprocessing material section")
viscosity_df, raw_viscosity_df = preprocess_viscosity_section(viscosity_path, index_no, viscosity_skip_rows)
viscosity_df.to_csv('viscosity-agg.csv')
# viscosity_df.to_csv('viscosity-agg.csv')
logger.info(f"The shape of the viscosity df is {viscosity_df.shape}")
logger.info("Completed material section preprocessing")
......@@ -43,35 +106,39 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_sheet_grouped = preprocess_sheet_section(df, index_no)
logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}")
logger.info("Completed sheet section preprocessing")
df_sheet_grouped.to_csv('sheet-agg.csv')
# df_sheet_grouped.to_csv('sheet-agg.csv')
logger.info("Starting preprocessing mixer section")
df_mixer_grouped = preprocess_mixer_section(df, index_no)
logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}")
logger.info("Completed mixer section preprocessing")
df_mixer_grouped.to_csv('mixer-agg.csv')
# df_mixer_grouped.to_csv('mixer-agg.csv')
logger.info("Starting preprocessing extruder section")
df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}")
logger.info("Completed extruder section preprocessing")
df_extruder_grouped.to_csv('extruder-agg.csv')
# df_extruder_grouped.to_csv('extruder-agg.csv')
logger.info("Starting preprocessing bof section")
df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}")
logger.info("Completed bof section preprocessing")
df_bof_grouped.to_csv('bof-agg.csv')
bof_desc = df_bof_grouped.describe()
bof_desc.to_csv('bof-describe.csv')
# df_bof_grouped.to_csv('bof-agg.csv')
# bof_desc = df_bof_grouped.describe()
# bof_desc.to_csv('bof-describe.csv')
logger.info("Starting preprocessing pickup section")
df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}")
logger.info("Completed pickup section preprocessing")
df_pickup_grouped.to_csv('pickup-agg.csv')
df = pd.read_csv('pickup-agg.csv')
print(df.describe())
# df_pickup_grouped.to_csv('pickup-agg.csv')
# df = pd.read_csv('pickup-agg.csv')
# print(df.describe())
df_grouped = merged_all_sections(df_sheet_grouped, df_mixer_grouped, df_extruder_grouped, df_bof_grouped,
df_pickup_grouped, viscosity_df)
load_and_predict(df_grouped, index_no)
if __name__ == "__main__":
......
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.13
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-20 06:24:57.321465'
channels:
- conda-forge
dependencies:
- python=3.10.13
- pip
- pip:
- mlflow
- cloudpickle==3.0.0
- scikit-learn==1.2.2
name: mlflow-env
mlflow
cloudpickle==3.0.0
scikit-learn==1.2.2
\ No newline at end of file
......@@ -33,7 +33,7 @@ def model_trainer():
merged_df = pd.merge(merged_df, bof_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, pickup_df, on='batch-date', how='left')
df_grouped = pd.merge(merged_df, viscosity_df, on='batch-date', how='left')
print(df_grouped.columns)
# print(df_grouped.columns)
selected_cols = df_grouped.columns
df_grouped = df_grouped[selected_cols]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment