Commit c0847703 authored by dasharatha.vamshi's avatar dasharatha.vamshi

added pickup for Fy664G

parent 648a1daf
......@@ -3,6 +3,9 @@ import warnings
import numpy as np
import pandas as pd
from loguru import logger
from sklearn import metrics
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from scripts.constants.constants import RawConstants
from scripts.core.model_loader import ModelLoader
......@@ -16,6 +19,58 @@ from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings.filterwarnings("ignore")
def model_trainer(df_grouped):
cols_x = ['temperature_ws_side_std', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean',
'_calendar_current_mean', 'electric_energy_mean', 'seat_temperature_immediately_after_bof_mean',
'Weighted_NITROGEN_type', 'ram_pressure_mean', 'surface_temperature_center_std',
'drilled_side_left_exit_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'residence_time_max',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'Weighted_ASH_type',
'Weighted_PO_type', 'drilled_side_right_exit_side_cooling_water_flow_rate_std']
cols_y = "viscosity"
req_cols = cols_x + ['viscosity']
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
print(f'x_train shape - {x_train.shape}')
print(f'x_test shape - {x_test.shape}')
print(f'y_train shape - {y_train.shape}')
print(f'y_test shape - {y_test.shape}')
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
y_pred = saved_model.predict(x_test)
predictions = [round(value, 2) for value in y_pred]
metric_dictionary = dict()
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
mape = metrics.mean_absolute_percentage_error(y_test, predictions)
explained_variance_score = metrics.explained_variance_score(y_test, predictions)
max_error = metrics.max_error(y_test, predictions)
r2_score = metrics.r2_score(y_test, predictions)
median_absolute_error = metrics.median_absolute_error(y_test, predictions)
mean_poisson_deviance = metrics.mean_poisson_deviance(y_test, predictions)
mean_gamma_deviance = metrics.mean_gamma_deviance(y_test, predictions)
metric_dictionary["Mean Absolute Error (MAE)"] = mae
metric_dictionary["Mean Squared Error (MSE)"] = mse
metric_dictionary["Root Mean Squared Error (RMSE)"] = np.sqrt(mse)
metric_dictionary["Mean Absolute Percentage Error (MAPE)"] = mape
metric_dictionary["Explained Variance Score"] = explained_variance_score
metric_dictionary["Max Error"] = max_error
metric_dictionary["Median Absolute Error"] = median_absolute_error
metric_dictionary["R2 Score"] = r2_score
metric_dictionary["Mean Gamma Deviance"] = mean_gamma_deviance
metric_dictionary["Mean Poisson Deviance"] = mean_poisson_deviance
print(metric_dictionary)
def read_raw_data(raw_path, raw_skip_rows):
df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
if len(df.columns) == len(RawConstants.columns):
......@@ -139,6 +194,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_pickup_grouped, viscosity_df)
load_and_predict(df_grouped, index_no)
# model_trainer(df_grouped)
if __name__ == "__main__":
......
......@@ -241,7 +241,7 @@ def get_bof_batch_date(bof_batch_df, index_number):
raise Exception(str(err))
def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_df, index_number):
def return_fy676a_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_df, index_number):
try:
logger.info('Getting pickup batch date dataframe')
raw_df['day'] = raw_df['Time Stamp'].dt.date
......@@ -333,8 +333,10 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d
start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
......@@ -361,8 +363,122 @@ def return_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_d
return test_pick_df
except Exception as err:
logger.error(f'Error in adding batch to pick section: {str(err)}')
logger.error(f'Error in adding batch to fy676a pick section: {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no):
try:
raw_df['day'] = raw_df['Time Stamp'].dt.date
raw_df['day'] = raw_df['day'].astype('str')
raw_df['Mixing batch number'] = raw_df['Mixing batch number'].astype('float')
raw_df['batch-date'] = 'Batch_' + raw_df['Mixing batch number'].astype('str') + '_' + raw_df['day'].astype(
'str')
pick_add_cols = PickupConstants.pick_cols + PickupConstants.pick_additional_cols
pick_df = raw_df[pick_add_cols]
sorted_pick_df = pick_df.sort_values(by="Time Stamp", ascending=True)
sorted_pick_df = sorted_pick_df[sorted_pick_df['Size No (INDEX No).6'] == index_no]
dt_list = list(sorted_pick_df['day'].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
minimum = day_df['length passed through.1'].min()
if minimum <= 0:
minimum = 0
if day_df['length passed through.1'].max() - minimum <= 0:
value = 0
else:
value = day_df['length passed through.1'].max() - minimum
day_length_dic[each_day] = value
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
extrud_visc_df = sorted_viscosity_df[['Batch No.', 'Input rubber weight(0.1kg)', 'day', 'Mixing date']]
extrud_visc_df['length_from_extruder'] = extrud_visc_df['day'].map(day_length_dic)
extrud_visc_df['length_from_extruder'] = extrud_visc_df['length_from_extruder'].fillna(0)
daily_sum_weight = extrud_visc_df.groupby('day')['Input rubber weight(0.1kg)'].sum() / 10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df['m/kg'] = extrud_visc_df.apply(
lambda row: row['length_from_extruder'] / daily_sum_weight[row['day']], axis=1)
extrud_visc_df['batch_length'] = extrud_visc_df.apply(
lambda row: row['m/kg'] * row['Input rubber weight(0.1kg)'] / 10, axis=1).astype('float64')
extrud_visc_df['batch_length'] = extrud_visc_df['batch_length'].apply(math.ceil)
extrud_visc_df['cumulative_length'] = extrud_visc_df.groupby('day')['batch_length'].cumsum()
discharge_dict = extrud_visc_df.groupby('day').apply(
lambda group: group.set_index('Batch No.').to_dict()['cumulative_length']).to_dict()
test_sorted_extr_df = sorted_pick_df
test_pick_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Iterate through each row in the DataFrame
for index, row in test_pick_df.iterrows():
day = row['day']
discharge_length = row['length passed through.1']
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
# Add the 'batch_no' column to the DataFrame
test_pick_df['batch_no'] = batch_numbers
test_pick_df['batch_no'] = test_pick_df['batch_no'].astype('float')
test_pick_df['pickup_batch_date'] = 'Batch_' + test_pick_df['batch_no'].astype('str') + '_' + test_pick_df[
'day'].astype('str')
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_pick_df.iterrows():
if value['batch_no'] == 0.0:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
if (value['Time Stamp'] > start_time) & (value['Time Stamp'] < end_time):
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
test_pick_df['pickup_flag'] = extruder_flag_list
test_pick_df['pickup_batch_diff'] = extrud_flg_vms
test_pick_df['pickup_batch_number'] = test_pick_df['batch_no'] - test_pick_df['pickup_batch_diff'].astype(
'float')
test_pick_df['batch-date'] = 'Batch_' + test_pick_df['pickup_batch_number'].astype('str') + '_' + \
test_pick_df['day'].astype('str')
return test_pick_df
except Exception as err:
logger.error(f"Error while forming pickup batch number for fy664g: {str(err)}")
logger.error(traceback.format_exc())
raise Exception(str(err))
def preprocess_pickup_section(raw_df, index_number, viscosity_df):
......@@ -392,19 +508,16 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df):
'float').astype(str) + '_' + sorted_viscosity_df['day'].astype(str)
sorted_viscosity_df = sorted_viscosity_df[sorted_viscosity_df['Index No'] == index_number]
weight_date_dict = {}
weight_batch_dict = {}
for each_day in dt_list:
day_df = sorted_viscosity_df[sorted_viscosity_df['day'] == each_day]
summed = day_df['Input rubber weight(0.1kg)'].astype('float64').sum()
weight_date_dict[each_day] = summed
weight_batch_dict[each_day] = summed
date_dict = get_mixer_batch_date(raw_df, index_number)
bof_test_df = return_batch_no_bof_df(raw_df, sorted_viscosity_df, date_dict, index_number)
bof_date_dict = get_bof_batch_date(bof_test_df, index_number)
pick_merged_batch_df = return_pick_batch_no_df(raw_df, sorted_viscosity_df, bof_date_dict,
bof_test_df, index_number)
pick_merged_batch_df = pd.DataFrame()
if index_number == 1250:
pick_merged_batch_df = return_fy676a_pick_batch_no_df(raw_df, sorted_viscosity_df, bof_date_dict,
bof_test_df, index_number)
elif index_number == 3294:
pick_merged_batch_df = return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict,
index_number)
# Merging pick data with viscosity data on date-batch column
pickup_merged_df_final = pd.merge(pick_merged_batch_df, sorted_viscosity_df[['batch-date', 'viscosity']],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment