added pickup for Fy664G

c0847703 · dasharatha.vamshi · 648a1daf · c0847703 · c0847703
Commit c0847703 authored Dec 20, 2023 by dasharatha.vamshi
Show whitespace changes
Inline Side-by-side

Showing with 183 additions and 14 deletions

app.py app.py +56 -0

scripts/section_utils/pickup_section.py scripts/section_utils/pickup_section.py +127 -14

No files found.
--- a/app.py
+++ b/app.py
@@ -3,6 +3,9 @@ import warnings
 import numpy as np
 import pandas as pd
 from loguru import logger
+from sklearn import metrics
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.model_selection import train_test_split

 from scripts.constants.constants import RawConstants
 from scripts.core.model_loader import ModelLoader
@@ -16,6 +19,58 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
 warnings.filterwarnings("ignore")


+def model_trainer(df_grouped):
+    cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
+              '_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
+              'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
+              'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
+              'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
+              'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
+              'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
+              'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
+    cols_y = "viscosity"
+    req_cols = cols_x + ['viscosity']
+    features = df_grouped[cols_x]
+    labels = df_grouped[cols_y]
+    df_grouped[req_cols].to_csv('final.csv')
+    # Split the data into training and testing sets
+    x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
+    print(f'x_train shape - {x_train.shape}')
+    print(f'x_test shape - {x_test.shape}')
+    print(f'y_train shape - {y_train.shape}')
+    print(f'y_test shape - {y_test.shape}')
+    saved_model = ModelLoader({
+        "type": "mlflow.sklearn",
+        "path": "models/fy676a"
+    }).load_model()
+    y_pred = saved_model.predict(x_test)
+    predictions = [round(value, 2) for value in y_pred]
+
+    metric_dictionary = dict()
+    mae = metrics.mean_absolute_error(y_test, predictions)
+    mse = metrics.mean_squared_error(y_test, predictions)
+    mape = metrics.mean_absolute_percentage_error(y_test, predictions)
+    explained_variance_score = metrics.explained_variance_score(y_test, predictions)
+    max_error = metrics.max_error(y_test, predictions)
+    r2_score = metrics.r2_score(y_test, predictions)
+    median_absolute_error = metrics.median_absolute_error(y_test, predictions)
+    mean_poisson_deviance = metrics.mean_poisson_deviance(y_test, predictions)
+    mean_gamma_deviance = metrics.mean_gamma_deviance(y_test, predictions)
+
+    metric_dictionary["Mean Absolute Error (MAE)"] = mae
+    metric_dictionary["Mean Squared Error (MSE)"] = mse
+    metric_dictionary["Root Mean Squared Error (RMSE)"] = np.sqrt(mse)
+    metric_dictionary["Mean Absolute Percentage Error (MAPE)"] = mape
+    metric_dictionary["Explained Variance Score"] = explained_variance_score
+    metric_dictionary["Max Error"] = max_error
+    metric_dictionary["Median Absolute Error"] = median_absolute_error
+    metric_dictionary["R2 Score"] = r2_score
+    metric_dictionary["Mean Gamma Deviance"] = mean_gamma_deviance
+    metric_dictionary["Mean Poisson Deviance"] = mean_poisson_deviance
+
+    print(metric_dictionary)
+
+
 def read_raw_data(raw_path, raw_skip_rows):
    df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
    if len(df.columns) == len(RawConstants.columns):
@@ -139,6 +194,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
                                     df_pickup_grouped, viscosity_df)

    load_and_predict(df_grouped, index_no)
+    # model_trainer(df_grouped)


 if __name__ == "__main__":

--- a/scripts/section_utils/pickup_section.py
+++ b/scripts/section_utils/pickup_section.py
@@ -241,7 +241,7 @@ def get_bof_batch_date(bof_batch_df, index_number):
        raise Exception(str(err))


-def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_df, index_number):
+def return_fy676a_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_df, index_number):
    try:
        logger.info('Getting pickup batch date dataframe')
        raw_df['day'] = raw_df['Time Stamp'].dt.date
@@ -333,8 +333,10 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d
                start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
                end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")

-                if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
-                    (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
+                if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
+                        start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
+                    (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(
+                        end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
                    extruder_flag_list.append('false')
                    extrud_flg_vms.append(0)
                else:
@@ -361,8 +363,122 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d

        return test_pick_df
    except Exception as err:
-        logger.error(f'Error in adding batch to pick section: {str(err)}')
+        logger.error(f'Error in adding batch to fy676a pick section: {str(err)}')
        logger.error(traceback.format_exc())
+        raise Exception(str(err))
+
+
+def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no):
+    try:
+        raw_df['day'] = raw_df['Time Stamp'].dt.date
+        raw_df['day'] = raw_df['day'].astype('str')
+
+        raw_df['Mixing batch number'] = raw_df['Mixing batch number'].astype('float')
+        raw_df['batch-date'] = 'Batch_' + raw_df['Mixing batch number'].astype('str') + '_' + raw_df['day'].astype(
+            'str')
+        pick_add_cols = PickupConstants.pick_cols + PickupConstants.pick_additional_cols
+        pick_df = raw_df[pick_add_cols]
+
+        sorted_pick_df = pick_df.sort_values(by="Time Stamp", ascending=True)
+        sorted_pick_df = sorted_pick_df[sorted_pick_df['Size No (INDEX No).6'] == index_no]
+        dt_list = list(sorted_pick_df['day'].unique())
+
+        day_length_dic = {}
+        for each_day in dt_list:
+            day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
+            minimum = day_df['length passed through.1'].min()
+            if minimum <= 0:
+                minimum = 0
+            if day_df['length passed through.1'].max() - minimum <= 0:
+                value = 0
+            else:
+                value = day_df['length passed through.1'].max() - minimum
+            day_length_dic[each_day] = value
+
+        sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
+        sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
+        sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
+
+        extrud_visc_df = sorted_viscosity_df[['Batch No.', 'Input rubber weight(0.1kg)', 'day', 'Mixing date']]
+        extrud_visc_df['length_from_extruder'] = extrud_visc_df['day'].map(day_length_dic)
+        extrud_visc_df['length_from_extruder'] = extrud_visc_df['length_from_extruder'].fillna(0)
+        daily_sum_weight = extrud_visc_df.groupby('day')['Input rubber weight(0.1kg)'].sum() / 10
+        # Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
+        extrud_visc_df['m/kg'] = extrud_visc_df.apply(
+            lambda row: row['length_from_extruder'] / daily_sum_weight[row['day']], axis=1)
+        extrud_visc_df['batch_length'] = extrud_visc_df.apply(
+            lambda row: row['m/kg'] * row['Input rubber weight(0.1kg)'] / 10, axis=1).astype('float64')
+
+        extrud_visc_df['batch_length'] = extrud_visc_df['batch_length'].apply(math.ceil)
+        extrud_visc_df['cumulative_length'] = extrud_visc_df.groupby('day')['batch_length'].cumsum()
+
+        discharge_dict = extrud_visc_df.groupby('day').apply(
+            lambda group: group.set_index('Batch No.').to_dict()['cumulative_length']).to_dict()
+
+        test_sorted_extr_df = sorted_pick_df
+        test_pick_df = test_sorted_extr_df
+
+        # Initialize an empty list to store batch numbers
+        batch_numbers = []
+
+        # Iterate through each row in the DataFrame
+        for index, row in test_pick_df.iterrows():
+            day = row['day']
+            discharge_length = row['length passed through.1']
+            if discharge_length == 0:
+                batch_numbers.append(0)
+            else:
+                # Check if the day is in the dictionary
+                if day in discharge_dict:
+                    # Check if discharge length is less than or equal to the corresponding batch length
+                    batch_length_dict = discharge_dict[day]
+                    for batch_no, batch_length in batch_length_dict.items():
+                        if discharge_length <= batch_length:
+                            batch_numbers.append(batch_no)
+                            break
+                    else:
+                        # If no match is found in the dictionary, assign NaN to batch number
+                        batch_numbers.append(batch_numbers[-1])
+                else:
+                    # If day is not in the dictionary, assign NaN to batch number
+                    batch_numbers.append(np.nan)
+
+        # Add the 'batch_no' column to the DataFrame
+        test_pick_df['batch_no'] = batch_numbers
+        test_pick_df['batch_no'] = test_pick_df['batch_no'].astype('float')
+        test_pick_df['pickup_batch_date'] = 'Batch_' + test_pick_df['batch_no'].astype('str') + '_' + test_pick_df[
+            'day'].astype('str')
+        extruder_flag_list = []
+        extrud_flg_vms = []
+        for i, value in test_pick_df.iterrows():
+            if value['batch_no'] == 0.0:
+                extruder_flag_list.append('false')
+                extrud_flg_vms.append(0)
+            else:
+                start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
+                end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
+
+                if (value['Time Stamp'] > start_time) & (value['Time Stamp'] < end_time):
+                    extruder_flag_list.append('false')
+                    extrud_flg_vms.append(0)
+                else:
+                    extruder_flag_list.append('false')
+                    extrud_flg_vms.append(0)
+
+        test_pick_df['pickup_flag'] = extruder_flag_list
+        test_pick_df['pickup_batch_diff'] = extrud_flg_vms
+
+        test_pick_df['pickup_batch_number'] = test_pick_df['batch_no'] - test_pick_df['pickup_batch_diff'].astype(
+            'float')
+
+        test_pick_df['batch-date'] = 'Batch_' + test_pick_df['pickup_batch_number'].astype('str') + '_' + \
+                                     test_pick_df['day'].astype('str')
+
+        return test_pick_df
+    except Exception as err:
+        logger.error(f"Error while forming pickup batch number for fy664g: {str(err)}")
+        logger.error(traceback.format_exc())
+        raise Exception(str(err))


 def preprocess_pickup_section(raw_df, index_number, viscosity_df):
@@ -392,19 +508,16 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df):
            'float').astype(str) + '_' + sorted_viscosity_df['day'].astype(str)
        sorted_viscosity_df = sorted_viscosity_df[sorted_viscosity_df['Index No'] == index_number]

-        weight_date_dict = {}
-        weight_batch_dict = {}
-        for each_day in dt_list:
-            day_df = sorted_viscosity_df[sorted_viscosity_df['day'] == each_day]
-            summed = day_df['Input rubber weight(0.1kg)'].astype('float64').sum()
-            weight_date_dict[each_day] = summed
-            weight_batch_dict[each_day] = summed
-
        date_dict = get_mixer_batch_date(raw_df, index_number)
        bof_test_df = return_batch_no_bof_df(raw_df, sorted_viscosity_df, date_dict, index_number)
        bof_date_dict = get_bof_batch_date(bof_test_df, index_number)
-        pick_merged_batch_df = return_pick_batch_no_df(raw_df, sorted_viscosity_df, bof_date_dict,
+        pick_merged_batch_df = pd.DataFrame()
+        if index_number == 1250:
+            pick_merged_batch_df = return_fy676a_pick_batch_no_df(raw_df, sorted_viscosity_df, bof_date_dict,
                                                                  bof_test_df, index_number)
+        elif index_number == 3294:
+            pick_merged_batch_df = return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict,
+                                                                  index_number)

        # Merging pick data with viscosity data on date-batch column
        pickup_merged_df_final = pd.merge(pick_merged_batch_df, sorted_viscosity_df[['batch-date', 'viscosity']],