Commit 5d708a91 authored by dasharatha.vamshi's avatar dasharatha.vamshi

added main

parent 1057b00c
import warnings
import sys
import numpy as np
import pandas as pd
from loguru import logger
......@@ -253,18 +253,24 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
if __name__ == "__main__":
try:
logger.info("Starting the model")
index_number = 1250
raw_file_path = 'FY676-A-WO_Visc.xlsx'
raw_file_skip_rows = 0
viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
viscosity_file_skip_rows = 3
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
index_number = 3294
raw_file_path = 'fy664g_raw.csv'
raw_file_skip_rows = 0
viscosity_file_path = 'fy664g-viscosity.xlsx'
viscosity_file_skip_rows = 2
# logger.info("Starting the model")
# index_number = 1250
# raw_file_path = 'FY676-A-WO_Visc.xlsx'
# raw_file_skip_rows = 0
# viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
# viscosity_file_skip_rows = 3
# start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
# index_number = 3294
# raw_file_path = 'fy664g_raw.csv'
# raw_file_skip_rows = 0
# viscosity_file_path = 'fy664g-viscosity.xlsx'
# viscosity_file_skip_rows = 2
# start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
raw_file_path = sys.argv[sys.argv.index('-r') + 1]
viscosity_file_path = sys.argv[sys.argv.index('-v') + 1]
raw_file_skip_rows = int(sys.argv[sys.argv.index('-sr') + 1])
viscosity_file_skip_rows = int(sys.argv[sys.argv.index('-sv') + 1])
index_number = int(sys.argv[sys.argv.index('-index') + 1])
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
except Exception as e:
logger.exception(f"Module failed because of error {e}")
import sys
import warnings
import mlflow
from sklearn import metrics
from sklearn.model_selection import train_test_split
# from scripts.constants.constants import RawConstants
# from scripts.core.model_loader import ModelLoader
# from scripts.section_utils.bof_section import preprocess_bof_section
# from scripts.section_utils.extruder_section import preprocess_extruder_section
# from scripts.section_utils.material_section import preprocess_viscosity_section
# from scripts.section_utils.mixer_section import preprocess_mixer_section
# from scripts.section_utils.pickup_section import preprocess_pickup_section
# from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings.filterwarnings("ignore")
import warnings
# from scripts.constants.constants import ExtruderConstants
warnings.filterwarnings("ignore")
import warnings
# from scripts.constants.constants import ViscosityConstants
warnings.filterwarnings("ignore")
import warnings
# from scripts.constants.constants import MixerConstants
warnings.filterwarnings("ignore")
import warnings
# from scripts.constants.constants import PickupConstants
warnings.filterwarnings("ignore")
import warnings
# from scripts.constants.constants import SheetConstants
warnings.filterwarnings("ignore")
def preprocess_sheet_section(df, index_number):
sheet_supply_column = SheetConstants.sheet_supply_column
sheet_supply_df = df[sheet_supply_column]
sheet_supply_df['Time Stamp'] = pd.to_datetime(sheet_supply_df['Time Stamp'])
sheet_supply_df = sheet_supply_df.sort_values(by='Time Stamp')
numeric_cols = sheet_supply_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
sheet_supply_df[numeric_cols] = sheet_supply_df[numeric_cols].astype(float)
sheet_supply_df['day'] = sheet_supply_df['Time Stamp'].dt.date
sheet_supply_df['batch-date'] = 'Batch_' + sheet_supply_df['Weighing times'].astype(str) + '_' + sheet_supply_df[
'day'].astype(str)
sheet_supply_df = sheet_supply_df[sheet_supply_df["Size No (INDEX No)"] == index_number]
sheet_supply_df = sheet_supply_df[sheet_supply_df["Weighing times"] != 0]
aggregation_dict = SheetConstants.aggregation_dict
group_by = ['day', 'Weighing times']
df_sheet_grouped = sheet_supply_df.groupby(group_by).agg(aggregation_dict).reset_index()
col_renamer = {}
for col, col_agg in aggregation_dict.items():
if col not in ['viscosity', 'time_min', 'time_max']:
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
col_renamer[col] = renamed_col
else:
col_renamer[col] = col
df_sheet_grouped = df_sheet_grouped.rename(columns=col_renamer)
df_sheet_grouped['batch-date'] = 'Batch_' + df_sheet_grouped['Weighing times'].astype(str) + '_' + df_sheet_grouped[
'day'].astype(str)
df_sheet_grouped = round(df_sheet_grouped, 6)
return df_sheet_grouped
def get_mixer_batch_date(raw_df, index_number):
try:
logger.info('Getting mixer batch date dictionary')
mixer_df = raw_df[PickupConstants.pick_imp_mixer_cols]
mixer_df = mixer_df.sort_values(by='Time Stamp')
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_number]
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['time_min'] = mixer_df['Time Stamp']
mixer_df['time_max'] = mixer_df['Time Stamp']
aggregation_dict = {
'time_min': 'min',
'time_max': 'max',
}
group_by = ['day', 'Mixing batch number']
df_mixer_grouped = mixer_df.groupby(group_by).agg(aggregation_dict).reset_index()
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped['time_max'] - df_mixer_grouped['time_min']
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped[
'mixer_section_time_diff_second'].dt.total_seconds()
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
df_mixer_grouped['day'].astype(str)
date_dict = {}
batch_lis = list(df_mixer_grouped['batch-date'].unique())
for each_bt in batch_lis:
df_nw = df_mixer_grouped[df_mixer_grouped['batch-date'] == each_bt]
date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]),
'end_time': str(list(df_nw['time_max'])[0])}
return date_dict
except Exception as err:
logger.error(f'Error while getting mixer time and forming date dict: {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
def return_batch_no_bof_df(raw_df, viscosity_df, date_dict, index_number):
try:
logger.info('Getting bof batch number')
raw_df['day'] = raw_df['Time Stamp'].dt.date
raw_df['day'] = raw_df['day'].astype('str')
raw_df['Mixing batch number'] = raw_df['Mixing batch number'].astype('float')
raw_df['batch-date'] = 'Batch_' + raw_df['Mixing batch number'].astype(
'str') + '_' + raw_df['day'].astype('str')
bof_add_cols = ['Size No (INDEX No).5', 'length passed through', 'Time Stamp', 'day', 'lower door open']
bof_df = raw_df[bof_add_cols]
sorted_bof_df = bof_df.sort_values(by="Time Stamp", ascending=True)
sorted_bof_df = sorted_bof_df[sorted_bof_df['Size No (INDEX No).5'] == index_number]
dt_list = list(sorted_bof_df['day'].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_bof_df[sorted_bof_df['day'] == each_day]
if (day_df['length passed through'].max() - day_df['length passed through'].min()) <= 0:
value = 0
else:
value = day_df['length passed through'].max() - day_df['length passed through'].min()
day_length_dic[each_day] = value
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
extrud_visc_df = sorted_viscosity_df[['Batch No.', 'Input rubber weight(0.1kg)', 'day', 'Mixing date']]
extrud_visc_df['length_from_extruder'] = extrud_visc_df['day'].map(day_length_dic)
extrud_visc_df['length_from_extruder'] = extrud_visc_df['length_from_extruder'].fillna(0)
daily_sum_weight = extrud_visc_df.groupby('day')['Input rubber weight(0.1kg)'].sum() / 10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df['m/kg'] = extrud_visc_df.apply(
lambda row: row['length_from_extruder'] / daily_sum_weight[row['day']], axis=1)
extrud_visc_df['batch_length'] = extrud_visc_df.apply(
lambda row: row['m/kg'] * row['Input rubber weight(0.1kg)'] / 10, axis=1).astype('float64')
extrud_visc_df['batch_length'] = extrud_visc_df['batch_length'].apply(math.ceil)
extrud_visc_df['cumulative_length'] = extrud_visc_df.groupby('day')['batch_length'].cumsum()
discharge_dict = extrud_visc_df.groupby('day').apply(
lambda group: group.set_index('Batch No.').to_dict()['cumulative_length']).to_dict()
test_sorted_extr_df = sorted_bof_df
test_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Iterate through each row in the DataFrame
for index, row in test_df.iterrows():
day = row['day']
discharge_length = row['length passed through']
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
# Add the 'batch_no' column to the DataFrame
test_df['batch_no'] = batch_numbers
batch_number = 0
batch_list = []
started_with_one = False
current_day = None
for value, day in zip(list(test_df['lower door open']), list(test_df['day'])):
if current_day != day:
current_day = day
batch_number = 0
if value == 1:
if not started_with_one:
batch_number += 1
started_with_one = True
batch_list.append(batch_number)
else:
batch_list.append(batch_number)
started_with_one = False
batch_number = 0
batch_list = []
started_with_one = False
for value in test_df['lower door open']:
if value == 1:
if not started_with_one:
batch_number += 1
started_with_one = True
batch_list.append(batch_number)
else:
batch_list.append(batch_number)
started_with_one = False
test_df['batch_no'] = test_df['batch_no'].astype('float')
test_df['bof_batch_date'] = 'Batch_' + test_df['batch_no'].astype('str') + '_' + test_df['day'].astype('str')
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_df.iterrows():
if value['batch_no'] == 0.0:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
# start_time = np.datetime64(date_dict.get(value['bof_batch_date']).get('start_time'))
# end_time = np.datetime64(date_dict.get(value['bof_batch_date']).get('end_time'))
start_time = date_dict.get(value["bof_batch_date"]).get("start_time")
end_time = date_dict.get(value["bof_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0],
'%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('true')
extrud_flg_vms.append(1)
else:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
test_df['bof_flag'] = extruder_flag_list
test_df['bof_batch_diff'] = extrud_flg_vms
test_df['updated_bt_list'] = batch_list
test_df['bof_batch_number'] = test_df['batch_no'] - test_df['bof_batch_diff'].astype('float')
test_df = test_df.rename(columns={'bof_batch_date': 'batch-date'})
return test_df
except Exception as err:
logger.error(f'Error while adding batch to bof section: {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
def get_bof_batch_date(bof_batch_df, index_number):
try:
logger.info('Getting bof batch date dictionary')
bof_cols = PickupConstants.pick_imp_bof_cols
bof_df = bof_batch_df[bof_cols]
bof_df = bof_df.sort_values(by='Time Stamp')
numeric_cols = bof_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
bof_df[numeric_cols] = bof_df[numeric_cols].astype(float)
bof_df['day'] = bof_df['Time Stamp'].dt.date
bof_df = bof_df[bof_df["Size No (INDEX No).5"] == index_number]
bof_df = bof_df[bof_df["bof_batch_number"] != 0]
bof_df['time_min'] = bof_df['Time Stamp']
bof_df['time_max'] = bof_df['Time Stamp']
aggregation_dict = {
'time_min': 'min',
'time_max': 'max',
}
group_by = ['day', 'bof_batch_number']
df_bof_grouped = bof_df.groupby(group_by).agg(aggregation_dict).reset_index()
df_bof_grouped['mixer_section_time_diff_second'] = df_bof_grouped['time_max'] - df_bof_grouped['time_min']
df_bof_grouped['mixer_section_time_diff_second'] = df_bof_grouped[
'mixer_section_time_diff_second'].dt.total_seconds()
df_bof_grouped['batch-date'] = 'Batch_' + df_bof_grouped['bof_batch_number'].astype(str) + '_' + df_bof_grouped[
'day'].astype(str)
bof_date_dict = {}
batch_lis = list(df_bof_grouped['batch-date'].unique())
for each_bt in batch_lis:
df_nw = df_bof_grouped[df_bof_grouped['batch-date'] == each_bt]
bof_date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]),
'end_time': str(list(df_nw['time_max'])[0])}
return bof_date_dict
except Exception as err:
logger.error(f'Error while getting bof time and forming bof date dict: {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
def return_fy676a_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batch_num_df, index_number):
try:
logger.info('Getting pickup batch date dataframe')
raw_df['day'] = raw_df['Time Stamp'].dt.date
raw_df['day'] = raw_df['day'].astype('str')
raw_df['Mixing batch number'] = raw_df['Mixing batch number'].astype('float')
raw_df['batch-date'] = 'Batch_' + raw_df['Mixing batch number'].astype('str') + '_' + raw_df['day'].astype(
'str')
pick_add_cols = PickupConstants.pick_cols + PickupConstants.pick_additional_cols
pick_df = raw_df[pick_add_cols]
sorted_pick_df = pick_df.sort_values(by="Time Stamp", ascending=True)
sorted_pick_df = sorted_pick_df[sorted_pick_df['Size No (INDEX No).6'] == index_number]
dt_list = list(sorted_pick_df['day'].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
if day_df['length passed through.1'].max() - day_df['length passed through.1'].min() <= 0:
value = 0
else:
value = day_df['length passed through.1'].max() - day_df['length passed through.1'].min()
day_length_dic[each_day] = value
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
extrud_visc_df = sorted_viscosity_df[['Batch No.', 'Input rubber weight(0.1kg)', 'day', 'Mixing date']]
extrud_visc_df['length_from_pickup'] = extrud_visc_df['day'].map(day_length_dic)
extrud_visc_df['length_from_pickup'] = extrud_visc_df['length_from_pickup'].fillna(0)
daily_sum_weight = extrud_visc_df.groupby('day')['Input rubber weight(0.1kg)'].sum() / 10
# Add a new column 'm/kg' by dividing 'length_from_pickup' by the sum for each day
extrud_visc_df['m/kg'] = extrud_visc_df.apply(
lambda row: row['length_from_pickup'] / daily_sum_weight[row['day']], axis=1)
extrud_visc_df['batch_length'] = extrud_visc_df.apply(
lambda row: row['m/kg'] * row['Input rubber weight(0.1kg)'] / 10, axis=1).astype('float64')
extrud_visc_df['batch_length'] = extrud_visc_df['batch_length'].apply(math.ceil)
extrud_visc_df['cumulative_length'] = extrud_visc_df.groupby('day')['batch_length'].cumsum()
discharge_dict = extrud_visc_df.groupby('day').apply(
lambda group: group.set_index('Batch No.').to_dict()['cumulative_length']).to_dict()
test_sorted_extr_df = sorted_pick_df
test_pick_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Iterate through each row in the DataFrame
for index, row in test_pick_df.iterrows():
day = row['day']
discharge_length = row['length passed through.1']
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
# Add the 'batch_no' column to the DataFrame
test_pick_df['batch_no'] = batch_numbers
test_pick_df['batch_no'] = test_pick_df['batch_no'].astype('float')
test_pick_df['pickup_batch_date'] = 'Batch_' + test_pick_df['batch_no'].astype('str') + '_' + test_pick_df[
'day'].astype('str')
test_pick_df['bof_batch_number'] = bof_batch_num_df['bof_batch_number']
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_pick_df.iterrows():
if value['batch_no'] == 0.0:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
# start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
# end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0],
'%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
test_pick_df['pickup_flag'] = extruder_flag_list
test_pick_df['pickup_batch_diff'] = extrud_flg_vms
test_pick_df['pickup_batch_verify_number'] = test_pick_df['batch_no'] - test_pick_df[
'pickup_batch_diff'].astype('float')
actual_pickup_bt_num = []
for i, value in test_pick_df.iterrows():
pickup_batch_number = value['pickup_batch_verify_number']
bof_batch_num = value['bof_batch_number']
if pickup_batch_number <= bof_batch_num:
actual_pickup_bt_num.append(pickup_batch_number)
else:
actual_pickup_bt_num.append(bof_batch_num)
test_pick_df['pickup_batch_number'] = actual_pickup_bt_num
test_pick_df['batch-date'] = 'Batch_' + test_pick_df['pickup_batch_number'].astype('str') + '_' + \
test_pick_df['day'].astype('str')
return test_pick_df
except Exception as err:
logger.error(f'Error in adding batch to fy676a pick section: {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no):
try:
raw_df['day'] = raw_df['Time Stamp'].dt.date
raw_df['day'] = raw_df['day'].astype('str')
raw_df['Mixing batch number'] = raw_df['Mixing batch number'].astype('float')
raw_df['batch-date'] = 'Batch_' + raw_df['Mixing batch number'].astype('str') + '_' + raw_df['day'].astype(
'str')
pick_add_cols = PickupConstants.pick_cols + PickupConstants.pick_additional_cols
pick_df = raw_df[pick_add_cols]
sorted_pick_df = pick_df.sort_values(by="Time Stamp", ascending=True)
sorted_pick_df = sorted_pick_df[sorted_pick_df['Size No (INDEX No).6'] == index_no]
dt_list = list(sorted_pick_df['day'].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
minimum = day_df['length passed through.1'].min()
if minimum <= 0:
minimum = 0
if day_df['length passed through.1'].max() - minimum <= 0:
value = 0
else:
value = day_df['length passed through.1'].max() - minimum
day_length_dic[each_day] = value
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
extrud_visc_df = sorted_viscosity_df[['Batch No.', 'Input rubber weight(0.1kg)', 'day', 'Mixing date']]
extrud_visc_df['length_from_extruder'] = extrud_visc_df['day'].map(day_length_dic)
extrud_visc_df['length_from_extruder'] = extrud_visc_df['length_from_extruder'].fillna(0)
daily_sum_weight = extrud_visc_df.groupby('day')['Input rubber weight(0.1kg)'].sum() / 10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df['m/kg'] = extrud_visc_df.apply(
lambda row: row['length_from_extruder'] / daily_sum_weight[row['day']], axis=1)
extrud_visc_df['batch_length'] = extrud_visc_df.apply(
lambda row: row['m/kg'] * row['Input rubber weight(0.1kg)'] / 10, axis=1).astype('float64')
extrud_visc_df['batch_length'] = extrud_visc_df['batch_length'].apply(math.ceil)
extrud_visc_df['cumulative_length'] = extrud_visc_df.groupby('day')['batch_length'].cumsum()
discharge_dict = extrud_visc_df.groupby('day').apply(
lambda group: group.set_index('Batch No.').to_dict()['cumulative_length']).to_dict()
test_sorted_extr_df = sorted_pick_df
test_pick_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Iterate through each row in the DataFrame
for index, row in test_pick_df.iterrows():
day = row['day']
discharge_length = row['length passed through.1']
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
# Add the 'batch_no' column to the DataFrame
test_pick_df['batch_no'] = batch_numbers
test_pick_df['batch_no'] = test_pick_df['batch_no'].astype('float')
test_pick_df['pickup_batch_date'] = 'Batch_' + test_pick_df['batch_no'].astype('str') + '_' + test_pick_df[
'day'].astype('str')
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_pick_df.iterrows():
if value['batch_no'] == 0.0:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
# start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
# end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
start_time = bof_date_dict.get(value["pickup_batch_date"]).get("start_time")
end_time = bof_date_dict.get(value["pickup_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0],
'%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
test_pick_df['pickup_flag'] = extruder_flag_list
test_pick_df['pickup_batch_diff'] = extrud_flg_vms
test_pick_df['pickup_batch_number'] = test_pick_df['batch_no'] - test_pick_df['pickup_batch_diff'].astype(
'float')
test_pick_df['batch-date'] = 'Batch_' + test_pick_df['pickup_batch_number'].astype('str') + '_' + \
test_pick_df['day'].astype('str')
return test_pick_df
except Exception as err:
logger.error(f"Error while forming pickup batch number for fy664g: {str(err)}")
logger.error(traceback.format_exc())
raise Exception(str(err))
def preprocess_pickup_section(raw_df, index_number, viscosity_df):
try:
logger.info('Preprocessing and getting aggregated pickup dataframe')
raw_df['Time Stamp'] = pd.to_datetime(raw_df['Time Stamp'])
raw_df['day'] = raw_df['Time Stamp'].dt.date
raw_df['day'] = raw_df['day'].astype('str')
extr_cols = PickupConstants.pick_cols + PickupConstants.pick_additional_cols
pick_df = raw_df[extr_cols]
sorted_pick_df = pick_df.sort_values(by="Time Stamp", ascending=True)
sorted_pick_df = sorted_pick_df[sorted_pick_df['Size No (INDEX No).6'] == index_number]
dt_list = list(sorted_pick_df['day'].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
day_length_dic[each_day] = day_df['length passed through.1'].max() - day_df['length passed through.1'].min()
''' Reading viscosity file with skipping 2 rows '''
viscosity_df['Mixing date'] = pd.to_datetime(viscosity_df['Mixing date'])
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
sorted_viscosity_df['batch-date'] = 'Batch_' + viscosity_df['Batch No.'].astype(
'float').astype(str) + '_' + sorted_viscosity_df['day'].astype(str)
sorted_viscosity_df = sorted_viscosity_df[sorted_viscosity_df['Index No'] == index_number]
date_dict = get_mixer_batch_date(raw_df, index_number)
bof_test_df = return_batch_no_bof_df(raw_df, sorted_viscosity_df, date_dict, index_number)
bof_date_dict = get_bof_batch_date(bof_test_df, index_number)
pick_merged_batch_df = pd.DataFrame()
if index_number == 1250:
pick_merged_batch_df = return_fy676a_pick_batch_no_df(raw_df, sorted_viscosity_df, bof_date_dict,
bof_test_df, index_number)
elif index_number == 3294:
pick_merged_batch_df = return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict,
index_number)
# Merging pick data with viscosity data on date-batch column
pickup_merged_df_final = pd.merge(pick_merged_batch_df, sorted_viscosity_df[['batch-date', 'viscosity']],
on='batch-date', how='left')
# Removing batch 0
pickup_merged_df_final = pickup_merged_df_final[pickup_merged_df_final['pickup_batch_number'] != 0]
# Grouping with aggregated data
df_pickup_grouped = pickup_merged_df_final.groupby(PickupConstants.pick_grouped_cols).agg(
PickupConstants.pick_aggregate_dict).reset_index()
col_renamer = {}
for col, col_agg in PickupConstants.pick_aggregate_dict.items():
if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff',
'max_rpm_count']:
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
col_renamer[col] = renamed_col
else:
col_renamer[col] = col
df_pickup_grouped = df_pickup_grouped.rename(columns=col_renamer)
df_pickup_grouped_visc = df_pickup_grouped.drop('viscosity', axis=1)
return df_pickup_grouped_visc
except Exception as err:
logger.error(f'Error while performing main function for pickup section {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
def preprocess(df):
logger.info("Starting Preprocessing the Data")
# Replace 'nan' with NaN
df = df.replace('nan', np.nan)
# Calculate the number of missing values in each column
missing_counts = df.isnull().sum()
# Get the column names where the number of missing values is equal to the number of rows
cols_to_remove = missing_counts[missing_counts == len(df)].index
df = df.drop(cols_to_remove, axis=1)
df = df.loc[df['Mixing batch number'] != 0]
# Drop rows where 'Batch Number' is NaN
df = df.dropna(subset=['Mixing batch number'])
# Identify constant columns
constant_columns = df.columns[df.nunique() == 1]
# Drop constant columns
df.drop(columns=constant_columns, inplace=True)
logger.info(f"Preprocessing completed and the final shape is {df.shape}")
columns_with_missing_values = df.columns[df.isnull().sum() > 0].tolist()
return df
def preprocess_mixer_section(df, index_number):
mixer_cols = MixerConstants.mixer_cols
mixer_df = df[mixer_cols]
mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp'])
mixer_df = mixer_df.sort_values(by='Time Stamp')
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_number]
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['Mixing Weight (Integrated Value)_diff'] = mixer_df.groupby(['day', 'Mixing batch number'])[
'Mixing Weight (Integrated Value)'].transform(lambda x: x.max() - x.min())
mixer_cleaned_df = preprocess(mixer_df)
mixer_cleaned_df["day"] = mixer_cleaned_df['Time Stamp'].dt.date
mixer_cleaned_df['mixer_on_or_off'] = mixer_cleaned_df['Mixing timer value'].apply(lambda x: 0 if x == 0 else 1)
mixer_cleaned_df['batch-date'] = 'Batch_' + mixer_cleaned_df['Mixing batch number'].astype(str) + '_' + \
mixer_cleaned_df['day'].astype(str)
mixer_cleaned_df = mixer_cleaned_df.sort_values(by='Time Stamp')
# Group by 'batch-date' and add a new column 'rubber_addition'
df['rubber_addition'] = 0
def apply_conditions(group):
max_value_index = group['Mixing timer value'].idxmax()
group.loc[group['Mixing timer value'] != group['Mixing timer value'].max(), 'rubber_addition'] = 1
group.loc[max_value_index, 'rubber_addition'] = 1
return group
mixer_cleaned_df = mixer_cleaned_df.groupby('batch-date').apply(apply_conditions)
# Add 'process_on_or_off' column based on conditions
mixer_cleaned_df['process_on_or_off'] = 0
mixer_cleaned_df.loc[(mixer_cleaned_df['mixer_on_or_off'] == 1) & (
mixer_cleaned_df['rubber_addition'] == 1), 'process_on_or_off'] = 1
numeric_cols = mixer_cleaned_df.select_dtypes(include=['number', 'float']).columns
process_on_df = mixer_cleaned_df[mixer_cleaned_df['process_on_or_off'] == 1]
df_full = process_on_df[process_on_df.columns]
# Define a dictionary for data type conversions
conversion_dict = {col: float for col in df_full.select_dtypes(include='number').columns}
# Apply the data type conversions
df_full = df_full.astype(conversion_dict)
rpm_count = df_full[df_full['Rotor actual rpm'] == 60.0].groupby('batch-date')['Rotor actual rpm'].count()
df_full = df_full.merge(rpm_count, left_on='batch-date', right_index=True, suffixes=('', '_count'))
df_full.rename(columns={'Rotor actual rpm_count': 'max_rpm_count'}, inplace=True)
aggregation_dict = MixerConstants.aggregation_dict
group_by = ['day', 'Mixing batch number']
df_mixer_grouped = df_full.groupby(group_by).agg(aggregation_dict).reset_index()
col_renamer = {}
for col, col_agg in aggregation_dict.items():
if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']:
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
col_renamer[col] = renamed_col
else:
col_renamer[col] = col
df_mixer_grouped = df_mixer_grouped.rename(columns=col_renamer)
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
df_mixer_grouped['day'].astype(str)
df_mixer_grouped = round(df_mixer_grouped, 6)
return df_mixer_grouped
def create_status_column(df, type_col_name, columns_list):
status_col = []
for i, val in enumerate(df[type_col_name]):
if val == 0:
status_col.append(False)
else:
if any(df[column].iloc[i] == 0 for column in columns_list):
status_col.append(False)
else:
status_col.append(True)
return status_col
def preprocess_viscosity_section(viscosity_df, index_number):
# adding date col to the viscosity df
viscosity_df = viscosity_df.sort_values(by='Mixing date')
raw_viscosity_df = viscosity_df.sort_values(by='Mixing date')
viscosity_df['date'] = viscosity_df['Mixing date'].dt.date
viscosity_df['batch-date'] = 'Batch_' + viscosity_df['Batch No.'].astype(str) + '_' + viscosity_df['date'].astype(
str)
viscosity_df = viscosity_df[viscosity_df['Index No'] == index_number]
rubber_cols = ViscosityConstants.rubber_cols
# Replace '-' with 0 for numerical and float columns
viscosity_df[rubber_cols] = viscosity_df[rubber_cols].replace('-', 0)
viscosity_df[rubber_cols] = viscosity_df[rubber_cols].apply(pd.to_numeric, errors='coerce')
# Identify numerical and float columns
numerical_cols = viscosity_df.columns[
viscosity_df.dtypes.apply(lambda x: pd.api.types.is_numeric_dtype(x) or pd.api.types.is_float_dtype(x))]
integer_cols = viscosity_df.columns[viscosity_df.dtypes == 'int64']
# Convert integer columns to float
viscosity_df[integer_cols] = viscosity_df[integer_cols].astype(float)
# Calculate weights
viscosity_df['Weight_type1'] = round(viscosity_df['Quantity using type1 bale'] / (
viscosity_df['Quantity using type1 bale'] + viscosity_df['Quantity using type2 bale']), 2)
viscosity_df['Weight_type2'] = round(viscosity_df['Quantity using type2 bale'] / (
viscosity_df['Quantity using type1 bale'] + viscosity_df['Quantity using type2 bale']), 2)
viscosity_df['Weighted_PO_type'] = (
viscosity_df['PO_type1'] * viscosity_df['Weight_type1'] + viscosity_df[f'PO_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_DIRT_type'] = (
viscosity_df['DIRT_type1'] * viscosity_df['Weight_type1'] + viscosity_df['DIRT_type1.1'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_ASH_type'] = (
viscosity_df['ASH_type1'] * viscosity_df['Weight_type1'] + viscosity_df['ASH_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_VM_type'] = (
viscosity_df['VM_type1'] * viscosity_df['Weight_type1'] + viscosity_df['VM_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_PRI_type'] = (
viscosity_df['PRI_type1'] * viscosity_df['Weight_type1'] + viscosity_df[f'PRI_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_NITROGEN_type'] = (
viscosity_df['NITROGEN_type1'] * viscosity_df['Weight_type1'] + viscosity_df['NITROGEN_type2'] *
viscosity_df['Weight_type2'])
viscosity_df['Weighted_Temperature during transportation_type[℃]'] = (
viscosity_df['Temperature during transportation_type1[℃]'] * viscosity_df['Weight_type1'] +
viscosity_df['Temperature during transportation_type2[℃]'] * viscosity_df['Weight_type2'])
viscosity_df['Weighted_Humidity during transportation__type[%]'] = (
viscosity_df['Humidity during transportation_type1[%]'] * viscosity_df['Weight_type1'] +
viscosity_df['Humidity during transportation__type2[%]'] * viscosity_df['Weight_type2'])
viscosity_df['Weighted Sum'] = viscosity_df['Weighted_PO_type'] + viscosity_df['Weighted_DIRT_type'] + viscosity_df[
'Weighted_ASH_type'] + viscosity_df['Weighted_VM_type'] + viscosity_df['Weighted_PRI_type'] + viscosity_df[
'Weighted_NITROGEN_type']
column_to_keep_at_end = 'viscosity'
# Reorder columns
new_order = [col for col in viscosity_df.columns if col != column_to_keep_at_end] + [column_to_keep_at_end]
viscosity_df = viscosity_df[new_order]
viscosity_df['batch-date'] = 'Batch_' + viscosity_df['Batch No.'].astype(str) + '_' + viscosity_df['date'].astype(
str)
# Added Status to check rubber
# Rubber Type 1
rubber_1_cols = [
'DIRT_type1',
'ASH_type1',
'VM_type1',
'PRI_type1',
'NITROGEN_type1'
]
# Rubber Type 2
rubber_2_cols = [
'PO_type2',
'DIRT_type1',
'ASH_type2',
'VM_type2',
'PRI_type2',
'NITROGEN_type2'
]
viscosity_df['rubber_status_1'] = create_status_column(viscosity_df, 'Weight_type1', rubber_1_cols)
viscosity_df['rubber_status_2'] = create_status_column(viscosity_df, 'Weight_type2', rubber_2_cols)
viscosity_df['status'] = viscosity_df['rubber_status_1'] | viscosity_df['rubber_status_2']
req_cols = ViscosityConstants.req_cols
final_viscosity_df = viscosity_df[req_cols]
final_viscosity_df = round(final_viscosity_df, 6)
return final_viscosity_df, raw_viscosity_df
def mixer_section_start_end_time(raw_df, index_no):
mixer_cols = ['Time Stamp',
'Size No (INDEX No).3',
'Size name',
'Mixing batch number',
'idle time between batches',
]
mixer_df = raw_df[mixer_cols]
mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp'])
mixer_df = mixer_df.sort_values(by='Time Stamp')
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_no]
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['time_min'] = mixer_df['Time Stamp']
mixer_df['time_max'] = mixer_df['Time Stamp']
aggregation_dict = {
'time_min': 'min',
'time_max': 'max',
}
group_by = ['day', 'Mixing batch number']
df_mixer_grouped = mixer_df.groupby(group_by).agg(aggregation_dict).reset_index()
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped['time_max'] - df_mixer_grouped['time_min']
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped[
'mixer_section_time_diff_second'].dt.total_seconds()
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
df_mixer_grouped['day'].astype(str)
date_dict = {}
batch_lis = list(df_mixer_grouped['batch-date'].unique())
for each_bt in batch_lis:
df_nw = df_mixer_grouped[df_mixer_grouped['batch-date'] == each_bt]
date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]),
'end_time': str(list(df_nw['time_max'])[0])}
return date_dict
def return_batch_no_df_1(
raw_df, viscosity_df, date_dict, bof_cols, additional_cols, index_no
):
raw_df = raw_df.sort_values(by='Time Stamp')
raw_df['Time Stamp'] = pd.to_datetime(raw_df['Time Stamp'])
raw_df["day"] = raw_df["Time Stamp"].dt.date
raw_df["day"] = raw_df["day"].astype("str")
raw_df["Mixing batch number"] = raw_df["Mixing batch number"].astype("float")
raw_df["batch-date"] = (
"Batch_"
+ raw_df["Mixing batch number"].astype("str")
+ "_"
+ raw_df["day"].astype("str")
)
bof_add_cols = bof_cols + additional_cols
bof_df = raw_df[bof_add_cols]
sorted_bof_df = bof_df.sort_values(by="Time Stamp", ascending=True)
sorted_bof_df = sorted_bof_df[sorted_bof_df["Size No (INDEX No).4"] == index_no]
dt_list = list(sorted_bof_df["day"].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_bof_df[sorted_bof_df["day"] == each_day]
if day_df["discharge length"].max() - day_df["discharge length"].min() <= 0:
value = 0
else:
value = day_df["discharge length"].max() - day_df["discharge length"].min()
day_length_dic[each_day] = value
# print(day_length_dic)
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date
sorted_viscosity_df["day"] = sorted_viscosity_df["day"].astype("str")
extrud_visc_df = sorted_viscosity_df[
["Batch No.", "Input rubber weight(0.1kg)", "day", "Mixing date"]
]
extrud_visc_df["length_from_extruder"] = extrud_visc_df["day"].map(day_length_dic)
extrud_visc_df["length_from_extruder"] = extrud_visc_df[
"length_from_extruder"
].fillna(0)
daily_sum_weight = (
extrud_visc_df.groupby("day")["Input rubber weight(0.1kg)"].sum() / 10
)
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df["m/kg"] = extrud_visc_df.apply(
lambda row: row["length_from_extruder"] / daily_sum_weight[row["day"]], axis=1
)
extrud_visc_df["batch_length"] = extrud_visc_df.apply(
lambda row: row["m/kg"] * row["Input rubber weight(0.1kg)"] / 10, axis=1
).astype("float64")
extrud_visc_df["batch_length"] = extrud_visc_df["batch_length"].apply(math.ceil)
extrud_visc_df["cumulative_length"] = extrud_visc_df.groupby("day")[
"batch_length"
].cumsum()
discharge_dict = (
extrud_visc_df.groupby("day")
.apply(
lambda group: group.set_index("Batch No.").to_dict()["cumulative_length"]
)
.to_dict()
)
test_sorted_extr_df = sorted_bof_df
test_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Iterate through each row in the DataFrame
for index, row in test_df.iterrows():
day = row["day"]
discharge_length = row["discharge length"]
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
# Add the 'batch_no' column to the DataFrame
test_df["batch_no"] = batch_numbers
batch_number = 0
batch_list = []
started_with_one = False
current_day = None
for value, day in zip(list(test_df["lower door open"]), list(test_df["day"])):
if current_day != day:
current_day = day
batch_number = 0
if value == 1:
if not started_with_one:
batch_number += 1
started_with_one = True
batch_list.append(batch_number)
else:
batch_list.append(batch_number)
started_with_one = False
test_df["batch_no"] = test_df["batch_no"].astype("float")
test_df["extruder_batch_date"] = (
"Batch_"
+ test_df["batch_no"].astype("str")
+ "_"
+ test_df["day"].astype("str")
)
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_df.iterrows():
if value["batch_no"] == 0.0:
extruder_flag_list.append("false")
extrud_flg_vms.append(0)
else:
start_time = date_dict.get(value["extruder_batch_date"]).get("start_time")
end_time = date_dict.get(value["extruder_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append("true")
extrud_flg_vms.append(1)
else:
extruder_flag_list.append("false")
extrud_flg_vms.append(0)
test_df["extruder_flag"] = extruder_flag_list
test_df["extruder_batch_diff"] = extrud_flg_vms
test_df["updtaed_bt_list"] = batch_list
test_df["extruder_batch_number"] = test_df["batch_no"] - test_df[
"extruder_batch_diff"
].astype("float")
test_df["batch-date"] = (
"Batch_"
+ test_df["extruder_batch_number"].astype("str")
+ "_"
+ test_df["day"].astype("str")
)
return test_df
def preprocess_extruder_section(df, index_number, vis_df):
extruder_cols = ExtruderConstants.extruder_cols
additional_columns = ['Time Stamp']
df_extruder = df[extruder_cols + additional_columns]
df_extruder['Time Stamp'] = pd.to_datetime(df_extruder['Time Stamp'])
df_extruder = df_extruder.sort_values(by='Time Stamp')
df_extruder['day'] = df_extruder['Time Stamp'].dt.date
df_extruder['day'] = df_extruder['day'].astype('str')
sorted_extrud_df = df_extruder.sort_values(by="Time Stamp", ascending=True)
sorted_extrud_df = sorted_extrud_df[sorted_extrud_df['Size No (INDEX No).4'] == index_number]
drop_col = ['spare.19',
'spare.20',
'spare.21',
'spare.22',
'spare.23',
'spare.24',
'spare.25', 'Hopper bank upper limit',
'middle of hopper bank',
'Hopper bank lower limit',
'Hopper bank below lower limit']
sorted_extrud_df.drop(columns=drop_col, inplace=True)
date_dict = mixer_section_start_end_time(df, index_number)
additional_cols = ['day', 'Time Stamp', 'lower door open']
# adding date col to the viscosity df
vis_df = vis_df.sort_values(by='Mixing date')
vis_df['date'] = vis_df['Mixing date'].dt.date
vis_df['batch-date'] = 'Batch_' + vis_df['Batch No.'].astype('float').astype(str) + '_' + vis_df[
'date'].astype(str)
vis_df = vis_df[vis_df['Index No'] == index_number]
extruder_merged_df_final = return_batch_no_df_1(df, vis_df, date_dict, extruder_cols, additional_cols,
index_number)
extruder_merged_df_final = extruder_merged_df_final[extruder_merged_df_final['extruder_batch_number'] != 0]
grouped_cols = ['batch-date']
aggregate_dict = ExtruderConstants.aggregate_dict
df_extruder_grouped = extruder_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index()
col_renamer = {}
for col, col_agg in aggregate_dict.items():
if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']:
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
col_renamer[col] = renamed_col
else:
col_renamer[col] = col
df_extruder_grouped = df_extruder_grouped.rename(columns=col_renamer)
df_extruder_grouped = df_extruder_grouped.fillna(df_extruder_grouped.mean())
df_extruder_grouped = round(df_extruder_grouped, 6)
return df_extruder_grouped
import math
import warnings
import traceback
from datetime import datetime
import numpy as np
import pandas as pd
from loguru import logger
from scripts.constants.constants import BofConstants
warnings.filterwarnings("ignore")
def mixer_section_start_end_time(raw_df, index_no):
try:
mixer_cols = BofConstants.bof_mixer_cols
mixer_df = raw_df[mixer_cols]
mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp'])
mixer_df = mixer_df.sort_values(by='Time Stamp')
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_no]
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['time_min'] = mixer_df['Time Stamp']
mixer_df['time_max'] = mixer_df['Time Stamp']
aggregation_dict = {
'time_min': 'min',
'time_max': 'max',
}
group_by = ['day', 'Mixing batch number']
df_mixer_grouped = mixer_df.groupby(group_by).agg(aggregation_dict).reset_index()
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped['time_max'] - df_mixer_grouped['time_min']
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped[
'mixer_section_time_diff_second'].dt.total_seconds()
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
df_mixer_grouped['day'].astype(str)
date_dict = {}
batch_lis = list(df_mixer_grouped['batch-date'].unique())
for each_bt in batch_lis:
df_nw = df_mixer_grouped[df_mixer_grouped['batch-date'] == each_bt]
date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]),
'end_time': str(list(df_nw['time_max'])[0])}
return date_dict
except Exception as err:
logger.error(f'Error in fetching mixer batch date dictionary: {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
def return_batch_no_df(raw_df, viscosity_df, date_dict, index_number):
try:
logger.info('Getting bof batch number')
raw_df['day'] = raw_df['Time Stamp'].dt.date
raw_df['day'] = raw_df['day'].astype('str')
raw_df['Mixing batch number'] = raw_df['Mixing batch number'].astype('float')
raw_df['batch-date'] = 'Batch_' + raw_df['Mixing batch number'].astype(
'str') + '_' + raw_df['day'].astype('str')
bof_add_cols = BofConstants.bof_add_cols
bof_df = raw_df[BofConstants.bof_cols + bof_add_cols]
sorted_bof_df = bof_df.sort_values(by="Time Stamp", ascending=True)
sorted_bof_df = sorted_bof_df[sorted_bof_df['Size No (INDEX No).5'] == index_number]
dt_list = list(sorted_bof_df['day'].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_bof_df[sorted_bof_df['day'] == each_day]
if (day_df['length passed through'].max() - day_df['length passed through'].min()) <= 0:
value = 0
else:
value = day_df['length passed through'].max() - day_df['length passed through'].min()
day_length_dic[each_day] = value
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df['day'] = sorted_viscosity_df['Mixing date'].dt.date
sorted_viscosity_df['day'] = sorted_viscosity_df['day'].astype('str')
extrud_visc_df = sorted_viscosity_df[['Batch No.', 'Input rubber weight(0.1kg)', 'day', 'Mixing date']]
extrud_visc_df['length_from_extruder'] = extrud_visc_df['day'].map(day_length_dic)
extrud_visc_df['length_from_extruder'] = extrud_visc_df['length_from_extruder'].fillna(0)
daily_sum_weight = extrud_visc_df.groupby('day')['Input rubber weight(0.1kg)'].sum() / 10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df['m/kg'] = extrud_visc_df.apply(
lambda row: row['length_from_extruder'] / daily_sum_weight[row['day']], axis=1)
extrud_visc_df['batch_length'] = extrud_visc_df.apply(
lambda row: row['m/kg'] * row['Input rubber weight(0.1kg)'] / 10, axis=1).astype('float64')
extrud_visc_df['batch_length'] = extrud_visc_df['batch_length'].apply(math.ceil)
extrud_visc_df['cumulative_length'] = extrud_visc_df.groupby('day')['batch_length'].cumsum()
discharge_dict = extrud_visc_df.groupby('day').apply(
lambda group: group.set_index('Batch No.').to_dict()['cumulative_length']).to_dict()
test_sorted_extr_df = sorted_bof_df
test_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Iterate through each row in the DataFrame
for index, row in test_df.iterrows():
day = row['day']
discharge_length = row['length passed through']
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
# Add the 'batch_no' column to the DataFrame
test_df['batch_no'] = batch_numbers
batch_number = 0
batch_list = []
started_with_one = False
current_day = None
for value, day in zip(list(test_df['lower door open']), list(test_df['day'])):
if current_day != day:
current_day = day
batch_number = 0
if value == 1:
if not started_with_one:
batch_number += 1
started_with_one = True
batch_list.append(batch_number)
else:
batch_list.append(batch_number)
started_with_one = False
batch_number = 0
batch_list = []
started_with_one = False
for value in test_df['lower door open']:
if value == 1:
if not started_with_one:
batch_number += 1
started_with_one = True
batch_list.append(batch_number)
else:
batch_list.append(batch_number)
started_with_one = False
test_df['batch_no'] = test_df['batch_no'].astype('float')
test_df['bof_batch_date'] = 'Batch_' + test_df['batch_no'].astype('str') + '_' + test_df['day'].astype(
'str')
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_df.iterrows():
if value['batch_no'] == 0.0:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
else:
start_time = date_dict.get(value["bof_batch_date"]).get("start_time")
end_time = date_dict.get(value["bof_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0],
'%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0],
'%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append('true')
extrud_flg_vms.append(1)
else:
extruder_flag_list.append('false')
extrud_flg_vms.append(0)
test_df['bof_flag'] = extruder_flag_list
test_df['bof_batch_diff'] = extrud_flg_vms
# test_df['updated_bt_list'] = batch_list
test_df['bof_batch_number'] = test_df['batch_no'] - test_df['bof_batch_diff'].astype('float')
test_df['batch-date'] = 'Batch_' + test_df['bof_batch_number'].astype(
'str') + '_' + test_df['day'].astype('str')
return test_df
except Exception as er:
logger.error(f'Error in adding batch data to bof section: {str(er)}')
logger.error(traceback.format_exc())
raise Exception(str(er))
def preprocess_bof_section(df, index_number, vis_df):
try:
df['Time Stamp'] = pd.to_datetime(df['Time Stamp'])
df = df.sort_values(by='Time Stamp')
df['day'] = df['Time Stamp'].dt.date
df['day'] = df['day'].astype('str')
date_dict = mixer_section_start_end_time(df, index_number)
bof_merged_df_final = return_batch_no_df(df, vis_df, date_dict, index_number)
bof_merged_df_final = bof_merged_df_final[bof_merged_df_final['bof_batch_number'] != 0]
# print(bof_merged_df_final.columns)
grouped_cols = ['batch-date']
aggregate_dict = BofConstants.bof_aggregate_dict
df_bof_grouped = bof_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index()
col_renamer = {}
for col, col_agg in aggregate_dict.items():
if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff',
'max_rpm_count']:
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
col_renamer[col] = renamed_col
else:
col_renamer[col] = col
df_bof_grouped = df_bof_grouped.rename(columns=col_renamer)
df_bof_grouped_rest = df_bof_grouped.drop('batch-date', axis=1)
df_bof_grouped_rest = df_bof_grouped_rest.fillna(df_bof_grouped_rest.mean())
df_bof_grouped_rest = round(df_bof_grouped_rest, 6)
df_bof_grouped_rest['batch-date'] = df_bof_grouped['batch-date']
return df_bof_grouped_rest
except Exception as err:
logger.error(f'Error in fetching the bof preprocess data: {str(err)}')
logger.error(traceback.format_exc())
raise Exception(str(err))
class ModelLoader(object):
def __init__(self, model_info):
self.model_info = model_info
def load_model(self):
logger.info("Loading the Model")
if self.model_info["type"] == "mlflow.sklearn":
return self._load_mlflow_sklearn_model()
else:
logger.info("Unsupported Model Type")
def _load_mlflow_sklearn_model(self):
try:
_model = mlflow.sklearn.load_model(self.model_info["path"])
logger.debug("Model loaded successfully!")
return _model
except Exception as e:
logger.error("Error while loading mlflow.sklearn model : {}".format(str(e)))
class RawConstants:
columns = ['Time Stamp',
'Shipper size No.',
'Shipper No.1 DH',
'Shipper No.1 Pallet',
'Shipper No.3 DH',
'Shipper No.2 Pallet',
'Shipper No.3 DH.1',
'Shipper No.3 Pallet',
'Size No (INDEX No)',
'Weighing times',
'Process mass',
'Mass',
'Material detection',
'Surface temperature (mixer side)',
'Surface temperature (center)',
'Surface temperature (receiving side)',
'temperature',
'humidity',
'Weighing command No.',
'spare',
'spare.1',
'spare.2',
'spare.3',
'spare.4',
'Size No (INDEX No).1',
'Weighing times.1',
'Process mass.1',
'real mass',
'spare.5',
'spare.6',
'spare.7',
'Size No (INDEX No).2',
'Weighing times.2',
'Process mass.2',
'CB weighing machine measurement',
'Dust collection duct (Immediately after ****)',
'Dust collection duct (before dust collector)\n',
'CB slot open',
'CB slot closed',
'carbon cycle',
'carbon2 cycle',
'spare.8',
'spare.9',
'spare.10',
'spare.11',
'Size No (INDEX No).3',
'Size name',
'Mixing batch number',
'Mixing Weight (Integrated Value)',
'Rotor actual rpm',
'Mixing timer value',
'Temperature (DS side)',
'Temperature (WS side)',
'Electric power',
'Electric energy',
'Mixing electric power average',
'Ram pressure',
'Ram rising',
'Ram down',
'Ram position',
'front door open',
'Front door closed',
'lower door open',
'lower door closed',
'Before mixer rotation detection',
'After mixer rotation detection',
'Drilled side left Inlet side Cooling water temperature',
'Drilled side left Exit side Cooling water temperature',
'Drilled side right Inlet side Cooling water temperature',
'Drilled side right Exit side Cooling water temperature',
'Mixer rotor left inlet side Coolant temperature',
'Mixer rotor left output side Cooling water temperature',
'Mixer rotor right inlet side Coolant temperature',
'Mixer rotor right exit side Cooling water temperature',
'Mixer body temperature',
'Drilled side left Inlet side Cooling water flow rate',
'Drilled side left Exit side Cooling water flow rate',
'Drilled side right Inlet side Cooling water flow rate',
'Drilled side right Exit side Cooling water flow rate',
'Mixer rotor left inlet side Cooling water flow rate',
'Mixer rotor left outlet side Cooling water flow rate',
'Mixer rotor right inlet side Cooling water flow rate',
'Mixer rotor right outlet side Cooling water flow rate',
'temperature.1',
'humidity.1',
'idle time between batches',
'spare.12',
'spare.13',
'spare.14',
'spare.15',
'spare.16',
'spare.17',
'spare.18',
'Size No (INDEX No).4',
'discharge length',
'Hopper bank upper limit',
'middle of hopper bank',
'Hopper bank lower limit',
'Hopper bank below lower limit',
'Extruder rpm',
'Extruder current',
'Calendar rpm',
' Calendar current',
'Calendar bank load',
'Calendar GAP Operation side',
'Calendar GAP Opposite operation side',
'Residence time',
'Screw operation side Inlet side Cooling water temperature',
'Screw operation side Outlet side Cooling water temperature',
'Screw Opposite operation side Inlet side Cooling water temperature',
'Screw Opposite operation side Outlet side Cooling water temperature',
'Calender roll Lower side Inlet side Cooling water temperature',
'Calender roll Lower side Outlet side Cooling water temperature',
'Calender roll upper side Inlet side Cooling water temperature',
'Calender roll Upper side Outlet side Cooling water temperature',
'Screw operation side Inlet side Cooling water flow rate',
'Screw operation side Outlet side Cooling water flow rate',
'Screw Opposite operation side Inlet side Cooling water flow rate',
'Screw Opposite operation side Outlet side Cooling water flow rate',
'Calender roll Lower side Inlet side Cooling water flow rate',
'Calender roll Lower side Outlet side Cooling water flow rate',
'Calender roll upper side Inlet side Cooling water flow rate',
'Calender roll Upper side Outlet side Cooling water flow rate',
'Extruder body temperature',
'spare.19',
'spare.20',
'spare.21',
'spare.22',
'spare.23',
'spare.24',
'spare.25',
'Size No (INDEX No).5',
'length passed through',
'Material detection.1',
'Sheet temperature immediately after calendering',
'Withdrawal CV speed',
'DUST CV\nspeed',
'spare.26',
'spare.27',
'spare.28',
'Size No (INDEX No).6',
'length passed through.1',
'Material detection.2',
'Seat temperature immediately after BOF',
'temperature.2',
'humidity.2',
'spare.29',
'spare.30',
'spare.31',
'spare.32',
'Size No (INDEX No).7',
'Setting length',
'length passed through(Integrated Value)',
'Mass\n(Integrated Value)',
'Pallet No.',
'Loading completion flag',
'spare.33',
'spare.34',
'spare.35',
'spare.36',
'mixer cooling water',
'Under cooling water']
class ViscosityConstants:
rubber_cols = [
'Quantity using type1 bale',
'PO_type1',
'DIRT_type1',
'ASH_type1',
'VM_type1',
'PRI_type1',
'NITROGEN_type1',
'Temperature during transportation_type1[℃]',
'Humidity during transportation_type1[%]',
'Quantity using type2 bale',
'PO_type2',
'DIRT_type1.1',
'ASH_type2',
'VM_type2',
'PRI_type2',
'NITROGEN_type2',
'Temperature during transportation_type2[℃]',
'Humidity during transportation__type2[%]'
]
req_cols = [
'Rubber No.', 'Batch No.', 'Index No', 'Chemical weight (g)',
'Input rubber weight(0.1kg)', 'date', 'batch-date',
'Weight_type1', 'Weight_type2', 'Weighted_PO_type',
'Weighted_DIRT_type', 'Weighted_ASH_type', 'Weighted_VM_type',
'Weighted_PRI_type', 'Weighted_NITROGEN_type',
'Weighted_Temperature during transportation_type[℃]',
'Weighted_Humidity during transportation__type[%]', 'Weighted Sum', 'viscosity', 'status']
class SheetConstants:
sheet_supply_column = ['Time Stamp',
'Shipper size No.',
'Shipper No.1 DH',
'Shipper No.1 Pallet',
'Shipper No.3 DH',
'Shipper No.2 Pallet',
'Shipper No.3 DH.1',
'Shipper No.3 Pallet',
'Size No (INDEX No)',
'Weighing times',
'Process mass',
'Mass',
'Material detection',
'Surface temperature (mixer side)',
'Surface temperature (center)',
'Surface temperature (receiving side)',
'temperature',
'humidity',
'Weighing command No.',
'spare',
'spare.1',
'spare.2',
'spare.3',
'spare.4']
aggregation_dict = {
"Surface temperature (mixer side)": "mean",
"Surface temperature (center)": "std",
"Surface temperature (receiving side)": "mean",
"temperature": "mean",
"humidity": "mean",
'Process mass': 'mean',
}
class MixerConstants:
mixer_cols = ['Time Stamp',
'Size No (INDEX No).3',
'Size name',
'Mixing batch number',
'Mixing Weight (Integrated Value)',
'Rotor actual rpm',
'Mixing timer value',
'Temperature (DS side)',
'Temperature (WS side)',
'Electric power',
'Electric energy',
'Mixing electric power average',
'Ram pressure',
'Ram rising',
'Ram down',
'Ram position',
'front door open',
'Front door closed',
'lower door open',
'lower door closed',
'Before mixer rotation detection',
'After mixer rotation detection',
'Drilled side left Inlet side Cooling water temperature',
'Drilled side left Exit side Cooling water temperature',
'Drilled side right Inlet side Cooling water temperature',
'Drilled side right Exit side Cooling water temperature',
'Mixer rotor left inlet side Coolant temperature',
'Mixer rotor left output side Cooling water temperature',
'Mixer rotor right inlet side Coolant temperature',
'Mixer rotor right exit side Cooling water temperature',
'Mixer body temperature',
'Drilled side left Inlet side Cooling water flow rate',
'Drilled side left Exit side Cooling water flow rate',
'Drilled side right Inlet side Cooling water flow rate',
'Drilled side right Exit side Cooling water flow rate',
'Mixer rotor left inlet side Cooling water flow rate',
'Mixer rotor left outlet side Cooling water flow rate',
'Mixer rotor right inlet side Cooling water flow rate',
'Mixer rotor right outlet side Cooling water flow rate',
'temperature.1',
'humidity.1',
'idle time between batches',
]
aggregation_dict = {
'Mixing timer value': 'max',
'Temperature (DS side)': 'mean',
'Temperature (WS side)': 'std',
'Electric power': 'mean',
'Electric energy': 'mean',
'Mixing electric power average': 'mean',
'Ram pressure': 'mean',
# 'Ram rising': '',
# 'Ram down': '',
'Ram position': 'std',
# 'front door open': '',
# 'Front door closed': '',
# 'lower door open': '',
# 'lower door closed': '',
# 'Before mixer rotation detection': '',
# 'After mixer rotation detection': '',
'Drilled side left Inlet side Cooling water temperature': 'std',
'Drilled side left Exit side Cooling water temperature': 'mean', #
'Drilled side right Inlet side Cooling water temperature': 'mean',
'Drilled side right Exit side Cooling water temperature': 'std',
'Mixer rotor left inlet side Coolant temperature': 'std',
'Mixer rotor left output side Cooling water temperature': 'mean',
'Mixer rotor right inlet side Coolant temperature': 'mean',
'Mixer rotor right exit side Cooling water temperature': 'std',
'Mixer body temperature': 'mean',
'Drilled side left Inlet side Cooling water flow rate': 'std',
'Drilled side left Exit side Cooling water flow rate': 'mean', #
'Drilled side right Inlet side Cooling water flow rate': 'mean',
'Drilled side right Exit side Cooling water flow rate': 'std', #
'Mixer rotor left inlet side Cooling water flow rate': 'std',
'Mixer rotor left outlet side Cooling water flow rate': 'mean',
'Mixer rotor right inlet side Cooling water flow rate': 'mean',
'Mixer rotor right outlet side Cooling water flow rate': 'std',
'temperature.1': 'mean',
'humidity.1': 'mean',
'idle time between batches': 'mean',
'Mixing Weight (Integrated Value)_diff': 'max', # any agg will work
'max_rpm_count': 'max' # any agg will work
}
class ExtruderConstants:
extruder_cols = ['Size No (INDEX No).4',
'discharge length',
'Hopper bank upper limit',
'middle of hopper bank',
'Hopper bank lower limit',
'Hopper bank below lower limit',
'Extruder rpm',
'Extruder current',
'Calendar rpm',
' Calendar current',
'Calendar bank load',
'Calendar GAP Operation side',
'Calendar GAP Opposite operation side',
'Residence time',
'Screw operation side Inlet side Cooling water temperature',
'Screw operation side Outlet side Cooling water temperature',
'Screw Opposite operation side Inlet side Cooling water temperature',
'Screw Opposite operation side Outlet side Cooling water temperature',
'Calender roll Lower side Inlet side Cooling water temperature',
'Calender roll Lower side Outlet side Cooling water temperature',
'Calender roll upper side Inlet side Cooling water temperature',
'Calender roll Upper side Outlet side Cooling water temperature',
'Screw operation side Inlet side Cooling water flow rate',
'Screw operation side Outlet side Cooling water flow rate',
'Screw Opposite operation side Inlet side Cooling water flow rate',
'Screw Opposite operation side Outlet side Cooling water flow rate',
'Calender roll Lower side Inlet side Cooling water flow rate',
'Calender roll Lower side Outlet side Cooling water flow rate',
'Calender roll upper side Inlet side Cooling water flow rate',
'Calender roll Upper side Outlet side Cooling water flow rate',
'Extruder body temperature',
'spare.19',
'spare.20',
'spare.21',
'spare.22',
'spare.23',
'spare.24',
'spare.25']
aggregate_dict = {
'discharge length': "max",
'Extruder rpm': "mean",
'Extruder current': "std",
'Calendar rpm': "std",
' Calendar current': "mean",
'Calendar bank load': "max",
'Calendar GAP Operation side': "median",
'Calendar GAP Opposite operation side': "std",
'Residence time': "max",
'Screw operation side Inlet side Cooling water temperature': "mean",
'Screw operation side Outlet side Cooling water temperature': "std",
'Screw Opposite operation side Inlet side Cooling water temperature': "mean",
'Screw Opposite operation side Outlet side Cooling water temperature': "std",
'Calender roll Lower side Inlet side Cooling water temperature': "mean",
'Calender roll Lower side Outlet side Cooling water temperature': "std",
'Calender roll upper side Inlet side Cooling water temperature': "mean",
'Calender roll Upper side Outlet side Cooling water temperature': "std",
'Screw operation side Inlet side Cooling water flow rate': "mean",
'Screw operation side Outlet side Cooling water flow rate': "std",
'Screw Opposite operation side Inlet side Cooling water flow rate': "mean",
'Screw Opposite operation side Outlet side Cooling water flow rate': "std",
'Calender roll Lower side Inlet side Cooling water flow rate': "mean",
'Calender roll Lower side Outlet side Cooling water flow rate': "std",
'Calender roll upper side Inlet side Cooling water flow rate': "mean",
'Calender roll Upper side Outlet side Cooling water flow rate': "std",
'Extruder body temperature': "mean"
}
class PickupConstants:
pick_cols = ['Size No (INDEX No).6',
'length passed through.1',
'Material detection.2',
'Seat temperature immediately after BOF',
'temperature.2',
'humidity.2',
'spare.29',
'spare.30',
'spare.31',
'spare.32']
pick_imp_mixer_cols = ['Time Stamp',
'Size No (INDEX No).3',
'Size name',
'Mixing batch number',
'idle time between batches',
]
pick_imp_bof_cols = ['Time Stamp',
'Size No (INDEX No).5',
'bof_batch_number'
]
pick_additional_cols = ['day',
'Time Stamp',
'length passed through',
'discharge length']
pick_aggregate_dict = {'Seat temperature immediately after BOF': 'mean', 'viscosity': 'mean'}
pick_grouped_cols = ['batch-date']
class BofConstants:
bof_cols = ['Size No (INDEX No).5',
'length passed through',
'Material detection.1',
'Sheet temperature immediately after calendering',
'Withdrawal CV speed',
'DUST CV\nspeed', 'spare.26',
'spare.27',
'spare.28', 'lower door open']
bof_add_cols = ['Time Stamp', 'day', 'lower door open']
bof_mixer_cols = ['Time Stamp',
'Size No (INDEX No).3',
'Size name',
'Mixing batch number',
'idle time between batches']
bof_aggregate_dict = aggregate_dict = {'Sheet temperature immediately after calendering': 'mean',
'Withdrawal CV speed': 'mean',
'DUST CV\nspeed': 'std'}
def model_trainer(df_grouped, index_no, model_path):
cols_x, cols_y, saved_model = None, None, None
if index_no == 1250:
cols_x = ['temperature_ws_side_std', '_calendar_current_mean', 'Weighted_NITROGEN_type', 'ram_pressure_mean',
'electric_energy_mean', 'screw_operation_side_outlet_side_cooling_water_flow_rate_std',
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'seat_temperature_immediately_after_bof_mean', 'Weighted_DIRT_type', 'surface_temperature_center_std',
'residence_time_max', 'drilled_side_left_exit_side_cooling_water_temperature_mean',
'Weighted_PRI_type', 'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'Weighted_ASH_type',
'Weighted_PO_type', 'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean',
'drilled_side_right_exit_side_cooling_water_flow_rate_std',
'Weighted_Humidity during transportation__type[%]']
cols_y = "viscosity"
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": model_path
}).load_model()
elif index_no == 3294:
cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
'drilled_side_left_inlet_side_cooling_water_temperature_std',
'seat_temperature_immediately_after_bof_mean',
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean', 'ram_position_std',
'drilled_side_right_exit_side_cooling_water_temperature_std',
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
'Weighted_Temperature during transportation_type[℃]']
cols_y = "viscosity"
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": model_path
}).load_model()
req_cols = cols_x + ['viscosity']
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
# df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
print(f'x_train shape - {x_train.shape}')
print(f'x_test shape - {x_test.shape}')
print(f'y_train shape - {y_train.shape}')
print(f'y_test shape - {y_test.shape}')
y_pred = saved_model.predict(x_test)
predictions = [round(value, 2) for value in y_pred]
metric_dictionary = dict()
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
mape = metrics.mean_absolute_percentage_error(y_test, predictions)
explained_variance_score = metrics.explained_variance_score(y_test, predictions)
max_error = metrics.max_error(y_test, predictions)
r2_score = metrics.r2_score(y_test, predictions)
median_absolute_error = metrics.median_absolute_error(y_test, predictions)
mean_poisson_deviance = metrics.mean_poisson_deviance(y_test, predictions)
mean_gamma_deviance = metrics.mean_gamma_deviance(y_test, predictions)
metric_dictionary["Mean Absolute Error (MAE)"] = mae
metric_dictionary["Mean Squared Error (MSE)"] = mse
metric_dictionary["Root Mean Squared Error (RMSE)"] = np.sqrt(mse)
metric_dictionary["Mean Absolute Percentage Error (MAPE)"] = mape
metric_dictionary["Explained Variance Score"] = explained_variance_score
metric_dictionary["Max Error"] = max_error
metric_dictionary["Median Absolute Error"] = median_absolute_error
metric_dictionary["R2 Score"] = r2_score
metric_dictionary["Mean Gamma Deviance"] = mean_gamma_deviance
metric_dictionary["Mean Poisson Deviance"] = mean_poisson_deviance
print(metric_dictionary)
def read_raw_data(raw_path, raw_skip_rows):
try:
df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
except Exception as e:
df = pd.read_csv(raw_path)
if len(df.columns) == len(RawConstants.columns):
logger.info(f"Total cols are {len(RawConstants.columns)} and are same as the df cols length")
df.columns = RawConstants.columns
else:
missed_cols = RawConstants.columns[len(df.columns):]
logger.info(f"missed cols are {missed_cols}")
for col in missed_cols:
df[col] = float('nan')
df.columns = RawConstants.columns
logger.info(f"Shape of df is {df.shape}")
return df
def merged_all_sections(sheet_df, mixer_df, extruder_df, bof_df, pickup_df, viscosity_df):
merged_df = pd.merge(sheet_df, mixer_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, extruder_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, bof_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, pickup_df, on='batch-date', how='left')
df_grouped = pd.merge(merged_df, viscosity_df, on='batch-date', how='left')
selected_cols = df_grouped.columns
df_grouped = df_grouped[df_grouped['status'] == True]
df_grouped = df_grouped[selected_cols]
viscosity_rubber_cols = ['Weight_type1', 'Weight_type2',
'Weighted_PO_type', 'Weighted_DIRT_type', 'Weighted_ASH_type',
'Weighted_VM_type', 'Weighted_PRI_type', 'Weighted_NITROGEN_type',
'Weighted_Temperature during transportation_type[℃]',
'Weighted_Humidity during transportation__type[%]', 'Weighted Sum',
'viscosity']
# Replace 0 values with NaN
for col in viscosity_rubber_cols:
df_grouped[col] = df_grouped[col].replace(0, np.nan)
df_grouped[col] = df_grouped[col].fillna(df_grouped[col].mean())
# Extract batch number and date
batch_number = df_grouped['batch-date'].str.extract(r'Batch_(\d+\.\d+)_')[0].astype(float)
date = pd.to_datetime(df_grouped['batch-date'].str.extract(r'_(\d{4}-\d{2}-\d{2})$')[0])
# Add extracted data as separate columns
df_grouped['Batch Number'] = batch_number
df_grouped['Date'] = date
# Sort by 'Batch Number' and 'Date'
df_grouped = df_grouped.sort_values(by=['Date', 'Batch Number'])
df_grouped = round(df_grouped, 6)
return df_grouped
def load_and_predict(df_grouped, index_no, model_path):
if index_no == 1250:
logger.info(f"Loading model for {index_no}")
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": model_path
}).load_model()
cols_x = ['temperature_ws_side_std', '_calendar_current_mean', 'Weighted_NITROGEN_type', 'ram_pressure_mean',
'electric_energy_mean', 'screw_operation_side_outlet_side_cooling_water_flow_rate_std',
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean', 'Weighted_VM_type',
'seat_temperature_immediately_after_bof_mean', 'Weighted_DIRT_type', 'surface_temperature_center_std',
'residence_time_max', 'drilled_side_left_exit_side_cooling_water_temperature_mean',
'Weighted_PRI_type', 'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean',
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'Weighted_ASH_type',
'Weighted_PO_type', 'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean',
'drilled_side_right_exit_side_cooling_water_flow_rate_std',
'Weighted_Humidity during transportation__type[%]']
cols_y = "viscosity"
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')
elif index_no == 3294:
logger.info(f"Loading model for {index_no}")
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": model_path
}).load_model()
cols_x = ['Weighted_ASH_type', 'Weighted_NITROGEN_type', 'electric_energy_mean',
'drilled_side_left_inlet_side_cooling_water_temperature_std',
'seat_temperature_immediately_after_bof_mean',
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean', 'humidity_mean',
'drilled_side_left_exit_side_cooling_water_flow_rate_mean',
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'calendar_bank_load_max',
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean', 'Weighted_PRI_type',
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean', 'temperature_ws_side_std',
'dust_cv\nspeed_std', 'mixer_rotor_right_inlet_side_coolant_temperature_mean',
'ram_position_std',
'drilled_side_right_exit_side_cooling_water_temperature_std',
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std',
'Weighted_Temperature during transportation_type[℃]']
cols_y = "viscosity"
features = df_grouped[cols_x]
labels = df_grouped[cols_y]
y_pred_full = saved_model.predict(features)
df_grouped['predicted_viscosity'] = y_pred_full
final_df = df_grouped[['Date', 'Batch Number', 'predicted_viscosity']]
final_df.to_csv(f'{index_no}_final_predicted_viscosity.csv')
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info(f"Starting prediction for {index_no}")
logger.info("Reading raw file data")
df = read_raw_data(raw_path, raw_skip_rows)
logger.info(f"Shape of raw df is {df.shape}")
logger.info("Starting preprocessing material section")
visc_df = pd.read_excel(viscosity_path, skiprows=viscosity_skip_rows)
viscosity_df, raw_viscosity_df = preprocess_viscosity_section(visc_df, index_no)
# viscosity_df.to_csv('viscosity-agg.csv')
logger.info(f"The shape of the viscosity df is {viscosity_df.shape}")
logger.info("Completed material section preprocessing")
logger.info("Starting preprocessing sheet section")
df_sheet_grouped = preprocess_sheet_section(df, index_no)
logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}")
logger.info("Completed sheet section preprocessing")
# df_sheet_grouped.to_csv('sheet-agg.csv')
logger.info("Starting preprocessing mixer section")
df_mixer_grouped = preprocess_mixer_section(df, index_no)
logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}")
logger.info("Completed mixer section preprocessing")
# df_mixer_grouped.to_csv('mixer-agg.csv')
logger.info("Starting preprocessing extruder section")
df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}")
logger.info("Completed extruder section preprocessing")
# df_extruder_grouped.to_csv('extruder-agg.csv')
logger.info("Starting preprocessing bof section")
df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}")
logger.info("Completed bof section preprocessing")
# df_bof_grouped.to_csv('bof-agg.csv')
# bof_desc = df_bof_grouped.describe()
# bof_desc.to_csv('bof-describe.csv')
logger.info("Starting preprocessing pickup section")
df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}")
logger.info("Completed pickup section preprocessing")
# df_pickup_grouped.to_csv('pickup-agg.csv')
# df = pd.read_csv('pickup-agg.csv')
# print(df.describe())
df_grouped = merged_all_sections(df_sheet_grouped, df_mixer_grouped, df_extruder_grouped, df_bof_grouped,
df_pickup_grouped, viscosity_df)
load_and_predict(df_grouped, index_no, model_path)
# model_trainer(df_grouped, index_no, model_path)
if __name__ == "__main__":
try:
raw_file_path = sys.argv[sys.argv.index('-r') + 1]
viscosity_file_path = sys.argv[sys.argv.index('-v') + 1]
raw_file_skip_rows = int(sys.argv[sys.argv.index('-sr') + 1])
viscosity_file_skip_rows = int(sys.argv[sys.argv.index('-sv') + 1])
index_number = int(sys.argv[sys.argv.index('-index') + 1])
model_path = sys.argv[sys.argv.index('-m') + 1]
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
except Exception as e:
logger.exception(f"Module failed because of error {e}")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment