Commit c0847703 authored by dasharatha.vamshi's avatar dasharatha.vamshi

added pickup for Fy664G

parent 648a1daf
...@@ -3,6 +3,9 @@ import warnings ...@@ -3,6 +3,9 @@ import warnings
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from loguru import logger from loguru import logger
from sklearn import metrics
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from scripts.constants.constants import RawConstants from scripts.constants.constants import RawConstants
from scripts.core.model_loader import ModelLoader from scripts.core.model_loader import ModelLoader
...@@ -16,6 +19,58 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section ...@@ -16,6 +19,58 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
def model_trainer(df_grouped):
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
'_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity"
req_cols = cols_x + ['viscosity']
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
print(f'x_train shape - {x_train.shape}')
print(f'x_test shape - {x_test.shape}')
print(f'y_train shape - {y_train.shape}')
print(f'y_test shape - {y_test.shape}')
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
y_pred = saved_model.predict(x_test)
predictions = [round(value, 2) for value in y_pred]
metric_dictionary = dict()
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
mape = metrics.mean_absolute_percentage_error(y_test, predictions)
explained_variance_score = metrics.explained_variance_score(y_test, predictions)
max_error = metrics.max_error(y_test, predictions)
r2_score = metrics.r2_score(y_test, predictions)
median_absolute_error = metrics.median_absolute_error(y_test, predictions)
mean_poisson_deviance = metrics.mean_poisson_deviance(y_test, predictions)
mean_gamma_deviance = metrics.mean_gamma_deviance(y_test, predictions)
metric_dictionary["Mean Absolute Error (MAE)"] = mae
metric_dictionary["Mean Squared Error (MSE)"] = mse
metric_dictionary["Root Mean Squared Error (RMSE)"] = np.sqrt(mse)
metric_dictionary["Mean Absolute Percentage Error (MAPE)"] = mape
metric_dictionary["Explained Variance Score"] = explained_variance_score
metric_dictionary["Max Error"] = max_error
metric_dictionary["Median Absolute Error"] = median_absolute_error
metric_dictionary["R2 Score"] = r2_score
metric_dictionary["Mean Gamma Deviance"] = mean_gamma_deviance
metric_dictionary["Mean Poisson Deviance"] = mean_poisson_deviance
print(metric_dictionary)
def read_raw_data(raw_path, raw_skip_rows): def read_raw_data(raw_path, raw_skip_rows):
df = pd.read_excel(raw_path, skiprows=raw_skip_rows) df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
if len(df.columns) == len(RawConstants.columns): if len(df.columns) == len(RawConstants.columns):
...@@ -139,6 +194,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit ...@@ -139,6 +194,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_pickup_grouped, viscosity_df) df_pickup_grouped, viscosity_df)
load_and_predict(df_grouped, index_no) load_and_predict(df_grouped, index_no)
# model_trainer(df_grouped)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -241,7 +241,7 @@ def get_bof_batch_date(bof_batch_df, index_number): ...@@ -241,7 +241,7 @@ def get_bof_batch_date(bof_batch_df, index_number):
raise Exception(str(err)) raise Exception(str(err))
def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_df, index_number): def return_fy676a_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_df, index_number):
try: try:
logger.info('Getting pickup batch date dataframe') logger.info('Getting pickup batch date dataframe')
raw_df['day'] = raw_df['Time Stamp'].dt.date raw_df['day'] = raw_df['Time Stamp'].dt.date
...@@ -333,8 +333,10 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d ...@@ -333,8 +333,10 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d
start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time") start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time") end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \ if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')): start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('false') extruder_flag_list.append('false')
extrud_flg_vms.append(0) extrud_flg_vms.append(0)
else: else:
...@@ -361,8 +363,122 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d ...@@ -361,8 +363,122 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d
return test_pick_df return test_pick_df
except Exception as err: except Exception as err:
logger.error(f'Error in adding batch to pick section: {str(err)}') logger.error(f'Error in adding batch to fy676a pick section: {str(err)}')
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
raise Exception(str(err))
def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no):
try:
raw_df['day'] = raw_df['Time Stamp'].dt.date
raw_df['day'] = raw_df['day'].astype('str')
raw_df['Mixing batch number'] = raw_df['Mixing batch number'].astype('float')
raw_df['batch-date'] = 'Batch_' + raw_df['Mixing batch number'].astype('str') + '_' + raw_df['day'].astype(
'str')
pick_add_cols = PickupConstants.pick_cols + PickupConstants.pick_additional_cols
pick_df = raw_df[pick_add_cols]
sorted_pick_df = pick_df.sort_values(by="Time Stamp", ascending=True)
sorted_pick_df = sorted_pick_df[sorted_pick_df['Size No (INDEX No).6'] == index_no]
dt_list = list(sorted_pick_df['day'].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
minimum = day_df['length passed through.1'].min()
if minimum <= 0:
minimum = 0
if day_df['length passed through.1'].max() - minimum <= 0:
value = 0
else:
value = day_df['length passed through.1'].max() - minimum
day_length_dic[each_day] = value
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
extrud_visc_df = sorted_viscosity_df[['Batch No.', 'Input rubber weight(0.1kg)', 'day', 'Mixing date']]
extrud_visc_df['length_from_extruder'] = extrud_visc_df['day'].map(day_length_dic)
extrud_visc_df['length_from_extruder'] = extrud_visc_df['length_from_extruder'].fillna(0)
daily_sum_weight = extrud_visc_df.groupby('day')['Input rubber weight(0.1kg)'].sum() / 10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df['m/kg'] = extrud_visc_df.apply(
lambda row: row['length_from_extruder'] / daily_sum_weight[row['day']], axis=1)
extrud_visc_df['batch_length'] = extrud_visc_df.apply(
lambda row: row['m/kg'] * row['Input rubber weight(0.1kg)'] / 10, axis=1).astype('float64')
extrud_visc_df['batch_length'] = extrud_visc_df['batch_length'].apply(math.ceil)
extrud_visc_df['cumulative_length'] = extrud_visc_df.groupby('day')['batch_length'].cumsum()
discharge_dict = extrud_visc_df.groupby('day').apply(
lambda group: group.set_index('Batch No.').to_dict()['cumulative_length']).to_dict()
test_sorted_extr_df = sorted_pick_df
test_pick_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Iterate through each row in the DataFrame
for index, row in test_pick_df.iterrows():
day = row['day']
discharge_length = row['length passed through.1']
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
# Add the 'batch_no' column to the DataFrame
test_pick_df['batch_no'] = batch_numbers
test_pick_df['batch_no'] = test_pick_df['batch_no'].astype('float')
test_pick_df['pickup_batch_date'] = 'Batch_' + test_pick_df['batch_no'].astype('str') + '_' + test_pick_df[
'day'].astype('str')
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_pick_df.iterrows():
if value['batch_no'] == 0.0:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
if (value['Time Stamp'] > start_time) & (value['Time Stamp'] < end_time):
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
test_pick_df['pickup_flag'] = extruder_flag_list
test_pick_df['pickup_batch_diff'] = extrud_flg_vms
test_pick_df['pickup_batch_number'] = test_pick_df['batch_no'] - test_pick_df['pickup_batch_diff'].astype(
'float')
test_pick_df['batch-date'] = 'Batch_' + test_pick_df['pickup_batch_number'].astype('str') + '_' + \
test_pick_df['day'].astype('str')
return test_pick_df
except Exception as err:
logger.error(f"Error while forming pickup batch number for fy664g: {str(err)}")
logger.error(traceback.format_exc())
raise Exception(str(err))
def preprocess_pickup_section(raw_df, index_number, viscosity_df): def preprocess_pickup_section(raw_df, index_number, viscosity_df):
...@@ -392,19 +508,16 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df): ...@@ -392,19 +508,16 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df):
'float').astype(str) + '_' + sorted_viscosity_df['day'].astype(str) 'float').astype(str) + '_' + sorted_viscosity_df['day'].astype(str)
sorted_viscosity_df = sorted_viscosity_df[sorted_viscosity_df['Index No'] == index_number] sorted_viscosity_df = sorted_viscosity_df[sorted_viscosity_df['Index No'] == index_number]
weight_date_dict = {}
weight_batch_dict = {}
for each_day in dt_list:
day_df = sorted_viscosity_df[sorted_viscosity_df['day'] == each_day]
summed = day_df['Input rubber weight(0.1kg)'].astype('float64').sum()
weight_date_dict[each_day] = summed
weight_batch_dict[each_day] = summed
date_dict = get_mixer_batch_date(raw_df, index_number) date_dict = get_mixer_batch_date(raw_df, index_number)
bof_test_df = return_batch_no_bof_df(raw_df, sorted_viscosity_df, date_dict, index_number) bof_test_df = return_batch_no_bof_df(raw_df, sorted_viscosity_df, date_dict, index_number)
bof_date_dict = get_bof_batch_date(bof_test_df, index_number) bof_date_dict = get_bof_batch_date(bof_test_df, index_number)
pick_merged_batch_df = return_pick_batch_no_df(raw_df, sorted_viscosity_df, bof_date_dict, pick_merged_batch_df = pd.DataFrame()
if index_number == 1250:
pick_merged_batch_df = return_fy676a_pick_batch_no_df(raw_df, sorted_viscosity_df, bof_date_dict,
bof_test_df, index_number) bof_test_df, index_number)
elif index_number == 3294:
pick_merged_batch_df = return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict,
index_number)
# Merging pick data with viscosity data on date-batch column # Merging pick data with viscosity data on date-batch column
pickup_merged_df_final = pd.merge(pick_merged_batch_df, sorted_viscosity_df[['batch-date', 'viscosity']], pickup_merged_df_final = pd.merge(pick_merged_batch_df, sorted_viscosity_df[['batch-date', 'viscosity']],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment