added models for fy676a and fy664g

0bde5356 · dasharatha.vamshi · c0847703 · 0bde5356 · 0bde5356 · 0bde5356
Commit 0bde5356 authored Dec 20, 2023 by dasharatha.vamshi
9 changed files
--- a/app.py
+++ b/app.py
@@ -4,7 +4,6 @@ import numpy as np
 import pandas as pd
 from loguru import logger
 from sklearn import metrics
-from sklearn.ensemble import ExtraTreesRegressor
 from sklearn.model_selection import train_test_split

 from scripts.constants.constants import RawConstants
@@ -19,30 +18,51 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
 warnings.filterwarnings("ignore")


-def model_trainer(df_grouped):
-    cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
-              '_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
-              'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
-              'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
-              'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
-              'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
-              'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
-              'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
-    cols_y = "viscosity"
+def model_trainer(df_grouped, index_no):
+    cols_x, cols_y, saved_model = None, None, None
+    if index_no == 1250:
+        cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
+                  '_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
+                  'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
+                  'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
+                  'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
+                  'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
+                  'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
+                  'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
+        cols_y = "viscosity"
+        saved_model = ModelLoader({
+            "type": "mlflow.sklearn",
+            "path": "models/fy676a"
+        }).load_model()
+
+    elif index_no == 3294:
+        cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
+                  'drilled_side_left_inlet_side_cooling_water_temperature_std',
+                  'seat_temperature_immediately_after_bof_mean',
+                  'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
+                  'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
+                  'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
+                  'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
+                  'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
+                  'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean', 'ram_position_std',
+                  'drilled_side_right_exit_side_cooling_water_temperature_std',
+                  'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
+                  'Weighted_Temperature during transportation_type[℃]']
+        cols_y = "viscosity"
+        saved_model = ModelLoader({
+            "type": "mlflow.sklearn",
+            "path": "models/fy664g"
+        }).load_model()
    req_cols = cols_x + ['viscosity']
    features = df_grouped[cols_x]
    labels = df_grouped[cols_y]
-    df_grouped[req_cols].to_csv('final.csv')
+    # df_grouped[req_cols].to_csv('final.csv')
    # Split the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
    print(f'x_train shape - {x_train.shape}')
    print(f'x_test shape - {x_test.shape}')
    print(f'y_train shape - {y_train.shape}')
    print(f'y_test shape - {y_test.shape}')
-    saved_model = ModelLoader({
-        "type": "mlflow.sklearn",
-        "path": "models/fy676a"
-    }).load_model()
    y_pred = saved_model.predict(x_test)
    predictions = [round(value, 2) for value in y_pred]

@@ -72,7 +92,10 @@ def model_trainer(df_grouped):


 def read_raw_data(raw_path, raw_skip_rows):
-    df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
+    try:
+        df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
+    except Exception as e:
+        df = pd.read_csv(raw_path)
    if len(df.columns) == len(RawConstants.columns):
        logger.info(f"Total cols are {len(RawConstants.columns)} and are same as the df cols length")
        df.columns = RawConstants.columns
@@ -141,12 +164,37 @@ def load_and_predict(df_grouped, index_no):
        y_pred_full = saved_model.predict(features)
        df_grouped['predicted_viscosity'] = y_pred_full
        final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
-        final_df.to_csv('final_predicted_viscosity.csv')
+        final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')
    elif index_no == 3294:
        logger.info(f"Loading model for {index_no}")
+        saved_model = ModelLoader({
+            "type": "mlflow.sklearn",
+            "path": "models/fy664g"
+        }).load_model()
+        cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
+                  'drilled_side_left_inlet_side_cooling_water_temperature_std',
+                  'seat_temperature_immediately_after_bof_mean',
+                  'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
+                  'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
+                  'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
+                  'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
+                  'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
+                  'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean',
+                  'ram_position_std',
+                  'drilled_side_right_exit_side_cooling_water_temperature_std',
+                  'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
+                  'Weighted_Temperature during transportation_type[℃]']
+        cols_y = "viscosity"
+        features = df_grouped[cols_x]
+        labels = df_grouped[cols_y]
+        y_pred_full = saved_model.predict(features)
+        df_grouped['predicted_viscosity'] = y_pred_full
+        final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
+        final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')


 def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
+    logger.info(f"Starting prediction for {index_no}")
    logger.info("Reading raw file data")
    df = read_raw_data(raw_path, raw_skip_rows)
    logger.info(f"Shape of raw df is {df.shape}")
@@ -194,7 +242,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
                                     df_pickup_grouped, viscosity_df)

    load_and_predict(df_grouped, index_no)
-    # model_trainer(df_grouped)
+    # model_trainer(df_grouped, index_no)


 if __name__ == "__main__":
@@ -206,5 +254,11 @@ if __name__ == "__main__":
        viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
        viscosity_file_skip_rows = 3
        start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
+        index_number = 3294
+        raw_file_path = 'fy664g_raw.csv'
+        raw_file_skip_rows = 0
+        viscosity_file_path = 'fy664g-viscosity.xlsx'
+        viscosity_file_skip_rows = 2
+        start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
    except Exception as e:
        logger.exception(f"Module failed because of error {e}")
--- a/models/fy664g/MLmodel
+++ b/models/fy664g/MLmodel
+flavors:
+  python_function:
+    env: conda.yaml
+    loader_module: mlflow.sklearn
+    model_path: model.pkl
+    python_version: 3.10.13
+  sklearn:
+    pickled_model: model.pkl
+    serialization_format: cloudpickle
+    sklearn_version: 1.2.2
+utc_time_created: '2023-12-20 11:58:08.254129'
--- a/models/fy664g/conda.yaml
+++ b/models/fy664g/conda.yaml
+channels:
+- conda-forge
+dependencies:
+- python=3.10.13
+- pip
+- pip:
+  - mlflow
+  - cloudpickle==3.0.0
+  - scikit-learn==1.2.2
+name: mlflow-env
--- a/models/fy664g/model.pkl
+++ b/models/fy664g/model.pkl
--- a/models/fy664g/requirements.txt
+++ b/models/fy664g/requirements.txt
+mlflow
+cloudpickle==3.0.0
+scikit-learn==1.2.2
\ No newline at end of file
--- a/scripts/constants/constants.py
+++ b/scripts/constants/constants.py
@@ -185,7 +185,7 @@ class ViscosityConstants:
        'Humidity during transportation__type2[%]'
    ]
    req_cols = [
-        'Rubber No.', 'Batch No.', 'Index No',
+        'Rubber No.', 'Batch No.', 'Index No', 'Chemical weight (g)',
        'Input rubber weight(0.1kg)', 'date', 'batch-date',
        'Weight_type1', 'Weight_type2', 'Weighted_PO_type',
        'Weighted_DIRT_type', 'Weighted_ASH_type', 'Weighted_VM_type',

--- a/scripts/section_utils/bof_section.py
+++ b/scripts/section_utils/bof_section.py
@@ -213,7 +213,7 @@ def preprocess_bof_section(df, index_number, vis_df):
        date_dict = mixer_section_start_end_time(df, index_number)
        bof_merged_df_final = return_batch_no_df(df, vis_df, date_dict, index_number)
        bof_merged_df_final = bof_merged_df_final[bof_merged_df_final['bof_batch_number'] != 0]
-        print(bof_merged_df_final.columns)
+        # print(bof_merged_df_final.columns)
        grouped_cols = ['batch-date']
        aggregate_dict = BofConstants.bof_aggregate_dict
        df_bof_grouped = bof_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index()

--- a/scripts/section_utils/extruder_section.py
+++ b/scripts/section_utils/extruder_section.py
@@ -82,7 +82,7 @@ def return_batch_no_df(
            value = day_df["discharge length"].max() - day_df["discharge length"].min()
        day_length_dic[each_day] = value

-    print(day_length_dic)
+    # print(day_length_dic)

    sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
    sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date

--- a/scripts/section_utils/pickup_section.py
+++ b/scripts/section_utils/pickup_section.py
@@ -455,10 +455,17 @@ def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no
                extruder_flag_list.append('false')
                extrud_flg_vms.append(0)
            else:
-                start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
-                end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
+                # start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
+                # end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))

-                if (value['Time Stamp'] > start_time) & (value['Time Stamp'] < end_time):
+                start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
+                end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")
+
+                if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
+                        start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
+                        (datetime.strptime(str(value["Time Stamp"]).split('+')[0],
+                                           '%Y-%m-%d %H:%M:%S') < datetime.strptime(
+                            end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
                    extruder_flag_list.append('false')
                    extrud_flg_vms.append(0)
                else: