Commit 648a1daf authored by dasharatha.vamshi's avatar dasharatha.vamshi

tested the pipeline for Fy676A

parent 685bff63
import warnings import warnings
from loguru import logger
import numpy as np
import pandas as pd import pandas as pd
from loguru import logger
from scripts.constants.constants import RawConstants from scripts.constants.constants import RawConstants
from scripts.core.model_loader import ModelLoader
from scripts.section_utils.bof_section import preprocess_bof_section from scripts.section_utils.bof_section import preprocess_bof_section
from scripts.section_utils.extruder_section import preprocess_extruder_section from scripts.section_utils.extruder_section import preprocess_extruder_section
from scripts.section_utils.material_section import preprocess_viscosity_section from scripts.section_utils.material_section import preprocess_viscosity_section
...@@ -28,6 +31,66 @@ def read_raw_data(raw_path, raw_skip_rows): ...@@ -28,6 +31,66 @@ def read_raw_data(raw_path, raw_skip_rows):
return df return df
def merged_all_sections(sheet_df, mixer_df, extruder_df, bof_df, pickup_df, viscosity_df):
merged_df = pd.merge(sheet_df, mixer_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, extruder_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, bof_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, pickup_df, on='batch-date', how='left')
df_grouped = pd.merge(merged_df, viscosity_df, on='batch-date', how='left')
selected_cols = df_grouped.columns
df_grouped = df_grouped[selected_cols]
viscosity_rubber_cols = ['Weight_type1', 'Weight_type2',
'Weighted_PO_type', 'Weighted_DIRT_type', 'Weighted_ASH_type',
'Weighted_VM_type', 'Weighted_PRI_type', 'Weighted_NITROGEN_type',
'Weighted_Temperature during transportation_type[℃]',
'Weighted_Humidity during transportation__type[%]', 'Weighted Sum',
'viscosity']
# Replace 0 values with NaN
for col in viscosity_rubber_cols:
df_grouped[col] = df_grouped[col].replace(0, np.nan)
df_grouped[col] = df_grouped[col].fillna(df_grouped[col].mean())
# Extract batch number and date
batch_number = df_grouped['batch-date'].str.extract(r'Batch_(\d+\.\d+)_')[0].astype(float)
date = pd.to_datetime(df_grouped['batch-date'].str.extract(r'_(\d{4}-\d{2}-\d{2})$')[0])
# Add extracted data as separate columns
df_grouped['Batch Number'] = batch_number
df_grouped['Date'] = date
# Sort by 'Batch Number' and 'Date'
df_grouped = df_grouped.sort_values(by=['Date', 'Batch Number'])
df_grouped = round(df_grouped, 6)
return df_grouped
def load_and_predict(df_grouped, index_no):
if index_no == 1250:
logger.info(f"Loading model for {index_no}")
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
'_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity"
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv('final_predicted_viscosity.csv')
elif index_no == 3294:
logger.info(f"Loading model for {index_no}")
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows): def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info("Reading raw file data") logger.info("Reading raw file data")
df = read_raw_data(raw_path, raw_skip_rows) df = read_raw_data(raw_path, raw_skip_rows)
...@@ -35,7 +98,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit ...@@ -35,7 +98,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
logger.info("Starting preprocessing material section") logger.info("Starting preprocessing material section")
viscosity_df, raw_viscosity_df = preprocess_viscosity_section(viscosity_path, index_no, viscosity_skip_rows) viscosity_df, raw_viscosity_df = preprocess_viscosity_section(viscosity_path, index_no, viscosity_skip_rows)
viscosity_df.to_csv('viscosity-agg.csv') # viscosity_df.to_csv('viscosity-agg.csv')
logger.info(f"The shape of the viscosity df is {viscosity_df.shape}") logger.info(f"The shape of the viscosity df is {viscosity_df.shape}")
logger.info("Completed material section preprocessing") logger.info("Completed material section preprocessing")
...@@ -43,35 +106,39 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit ...@@ -43,35 +106,39 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_sheet_grouped = preprocess_sheet_section(df, index_no) df_sheet_grouped = preprocess_sheet_section(df, index_no)
logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}") logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}")
logger.info("Completed sheet section preprocessing") logger.info("Completed sheet section preprocessing")
df_sheet_grouped.to_csv('sheet-agg.csv') # df_sheet_grouped.to_csv('sheet-agg.csv')
logger.info("Starting preprocessing mixer section") logger.info("Starting preprocessing mixer section")
df_mixer_grouped = preprocess_mixer_section(df, index_no) df_mixer_grouped = preprocess_mixer_section(df, index_no)
logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}") logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}")
logger.info("Completed mixer section preprocessing") logger.info("Completed mixer section preprocessing")
df_mixer_grouped.to_csv('mixer-agg.csv') # df_mixer_grouped.to_csv('mixer-agg.csv')
logger.info("Starting preprocessing extruder section") logger.info("Starting preprocessing extruder section")
df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df) df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}") logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}")
logger.info("Completed extruder section preprocessing") logger.info("Completed extruder section preprocessing")
df_extruder_grouped.to_csv('extruder-agg.csv') # df_extruder_grouped.to_csv('extruder-agg.csv')
logger.info("Starting preprocessing bof section") logger.info("Starting preprocessing bof section")
df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df) df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}") logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}")
logger.info("Completed bof section preprocessing") logger.info("Completed bof section preprocessing")
df_bof_grouped.to_csv('bof-agg.csv') # df_bof_grouped.to_csv('bof-agg.csv')
bof_desc = df_bof_grouped.describe() # bof_desc = df_bof_grouped.describe()
bof_desc.to_csv('bof-describe.csv') # bof_desc.to_csv('bof-describe.csv')
logger.info("Starting preprocessing pickup section") logger.info("Starting preprocessing pickup section")
df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df) df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}") logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}")
logger.info("Completed pickup section preprocessing") logger.info("Completed pickup section preprocessing")
df_pickup_grouped.to_csv('pickup-agg.csv') # df_pickup_grouped.to_csv('pickup-agg.csv')
df = pd.read_csv('pickup-agg.csv') # df = pd.read_csv('pickup-agg.csv')
print(df.describe()) # print(df.describe())
df_grouped = merged_all_sections(df_sheet_grouped, df_mixer_grouped, df_extruder_grouped, df_bof_grouped,
df_pickup_grouped, viscosity_df)
load_and_predict(df_grouped, index_no)
if __name__ == "__main__": if __name__ == "__main__":
......
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.13
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-20 06:24:57.321465'
channels:
- conda-forge
dependencies:
- python=3.10.13
- pip
- pip:
- mlflow
- cloudpickle==3.0.0
- scikit-learn==1.2.2
name: mlflow-env
mlflow
cloudpickle==3.0.0
scikit-learn==1.2.2
\ No newline at end of file
...@@ -33,7 +33,7 @@ def model_trainer(): ...@@ -33,7 +33,7 @@ def model_trainer():
merged_df = pd.merge(merged_df, bof_df, on='batch-date', how='left') merged_df = pd.merge(merged_df, bof_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, pickup_df, on='batch-date', how='left') merged_df = pd.merge(merged_df, pickup_df, on='batch-date', how='left')
df_grouped = pd.merge(merged_df, viscosity_df, on='batch-date', how='left') df_grouped = pd.merge(merged_df, viscosity_df, on='batch-date', how='left')
print(df_grouped.columns) # print(df_grouped.columns)
selected_cols = df_grouped.columns selected_cols = df_grouped.columns
df_grouped = df_grouped[selected_cols] df_grouped = df_grouped[selected_cols]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment