Commit d73b79d8 authored by sagar.shee's avatar sagar.shee

# Updated exception clauses

parent 88724804
...@@ -199,56 +199,60 @@ def load_and_predict(df_grouped, index_no): ...@@ -199,56 +199,60 @@ def load_and_predict(df_grouped, index_no):
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows): def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info(f"Starting prediction for {index_no}") try:
logger.info("Reading raw file data") logger.info(f"Starting prediction for {index_no}")
df = read_raw_data(raw_path, raw_skip_rows) logger.info("Reading raw file data")
logger.info(f"Shape of raw df is {df.shape}") df = read_raw_data(raw_path, raw_skip_rows)
logger.info(f"Shape of raw df is {df.shape}")
logger.info("Starting preprocessing material section") logger.info("Starting preprocessing material section")
visc_df = pd.read_excel(viscosity_path, skiprows=viscosity_skip_rows) visc_df = pd.read_excel(viscosity_path, skiprows=viscosity_skip_rows)
viscosity_df, raw_viscosity_df = preprocess_viscosity_section(visc_df, index_no) viscosity_df, raw_viscosity_df = preprocess_viscosity_section(visc_df, index_no)
# viscosity_df.to_csv('viscosity-agg.csv') # viscosity_df.to_csv('viscosity-agg.csv')
logger.info(f"The shape of the viscosity df is {viscosity_df.shape}") logger.info(f"The shape of the viscosity df is {viscosity_df.shape}")
logger.info("Completed material section preprocessing") logger.info("Completed material section preprocessing")
logger.info("Starting preprocessing sheet section") logger.info("Starting preprocessing sheet section")
df_sheet_grouped = preprocess_sheet_section(df, index_no) df_sheet_grouped = preprocess_sheet_section(df, index_no)
logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}") logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}")
logger.info("Completed sheet section preprocessing") logger.info("Completed sheet section preprocessing")
# df_sheet_grouped.to_csv('sheet-agg.csv') # df_sheet_grouped.to_csv('sheet-agg.csv')
logger.info("Starting preprocessing mixer section") logger.info("Starting preprocessing mixer section")
df_mixer_grouped = preprocess_mixer_section(df, index_no) df_mixer_grouped = preprocess_mixer_section(df, index_no)
logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}") logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}")
logger.info("Completed mixer section preprocessing") logger.info("Completed mixer section preprocessing")
# df_mixer_grouped.to_csv('mixer-agg.csv') # df_mixer_grouped.to_csv('mixer-agg.csv')
logger.info("Starting preprocessing extruder section") logger.info("Starting preprocessing extruder section")
df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df) df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}") logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}")
logger.info("Completed extruder section preprocessing") logger.info("Completed extruder section preprocessing")
# df_extruder_grouped.to_csv('extruder-agg.csv') # df_extruder_grouped.to_csv('extruder-agg.csv')
logger.info("Starting preprocessing bof section") logger.info("Starting preprocessing bof section")
df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df) df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}") logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}")
logger.info("Completed bof section preprocessing") logger.info("Completed bof section preprocessing")
# df_bof_grouped.to_csv('bof-agg.csv') # df_bof_grouped.to_csv('bof-agg.csv')
# bof_desc = df_bof_grouped.describe() # bof_desc = df_bof_grouped.describe()
# bof_desc.to_csv('bof-describe.csv') # bof_desc.to_csv('bof-describe.csv')
logger.info("Starting preprocessing pickup section") logger.info("Starting preprocessing pickup section")
df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df) df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}") logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}")
logger.info("Completed pickup section preprocessing") logger.info("Completed pickup section preprocessing")
# df_pickup_grouped.to_csv('pickup-agg.csv') # df_pickup_grouped.to_csv('pickup-agg.csv')
# df = pd.read_csv('pickup-agg.csv') # df = pd.read_csv('pickup-agg.csv')
# print(df.describe()) # print(df.describe())
df_grouped = merged_all_sections(df_sheet_grouped, df_mixer_grouped, df_extruder_grouped, df_bof_grouped, df_grouped = merged_all_sections(df_sheet_grouped, df_mixer_grouped, df_extruder_grouped, df_bof_grouped,
df_pickup_grouped, viscosity_df) df_pickup_grouped, viscosity_df)
# load_and_predict(df_grouped, index_no) # load_and_predict(df_grouped, index_no)
model_trainer(df_grouped, index_no) model_trainer(df_grouped, index_no)
except Exception as err:
logger.error(f"Exception in main prediction {str(err)}")
raise Exception(str(err))
if __name__ == "__main__": if __name__ == "__main__":
...@@ -274,3 +278,4 @@ if __name__ == "__main__": ...@@ -274,3 +278,4 @@ if __name__ == "__main__":
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows) start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
except Exception as e: except Exception as e:
logger.exception(f"Module failed because of error {e}") logger.exception(f"Module failed because of error {e}")
raise Exception(str(e))
...@@ -293,7 +293,7 @@ def return_fy676a_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batc ...@@ -293,7 +293,7 @@ def return_fy676a_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, bof_batc
for each_day in dt_list: for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day] day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
if day_df['length passed through.1'].max() - day_df['length passed through.1'].min() <= 0: if day_df['length passed through.1'].max() - day_df['length passed through.1'].min() <= 0:
value = 0 raise Exception(f"Length passed through in pick up section for {each_day} is 0")
else: else:
value = day_df['length passed through.1'].max() - day_df['length passed through.1'].min() value = day_df['length passed through.1'].max() - day_df['length passed through.1'].min()
day_length_dic[each_day] = value day_length_dic[each_day] = value
...@@ -423,7 +423,7 @@ def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no ...@@ -423,7 +423,7 @@ def return_fy664g_pick_batch_no_df(raw_df, viscosity_df, bof_date_dict, index_no
if minimum <= 0: if minimum <= 0:
minimum = 0 minimum = 0
if day_df['length passed through.1'].max() - minimum <= 0: if day_df['length passed through.1'].max() - minimum <= 0:
value = 0 raise Exception(f"Length passed through in pick up section for {each_day} is 0")
else: else:
value = day_df['length passed through.1'].max() - minimum value = day_df['length passed through.1'].max() - minimum
day_length_dic[each_day] = value day_length_dic[each_day] = value
...@@ -538,6 +538,8 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df): ...@@ -538,6 +538,8 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df):
for each_day in dt_list: for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day] day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
day_length_dic[each_day] = day_df['length passed through.1'].max() - day_df['length passed through.1'].min() day_length_dic[each_day] = day_df['length passed through.1'].max() - day_df['length passed through.1'].min()
if day_df['length passed through.1'].max() - day_df['length passed through.1'].min() <= 0:
raise Exception(f"Length passed through in pick up section for {each_day} is 0")
''' Reading viscosity file with skipping 2 rows ''' ''' Reading viscosity file with skipping 2 rows '''
viscosity_df['Mixing date'] = pd.to_datetime(viscosity_df['Mixing date']) viscosity_df['Mixing date'] = pd.to_datetime(viscosity_df['Mixing date'])
...@@ -774,256 +776,265 @@ def preprocess_viscosity_section(viscosity_df, index_number): ...@@ -774,256 +776,265 @@ def preprocess_viscosity_section(viscosity_df, index_number):
def mixer_section_start_end_time(raw_df, index_no): def mixer_section_start_end_time(raw_df, index_no):
mixer_cols = ['Time Stamp', try:
'Size No (INDEX No).3', mixer_cols = ['Time Stamp',
'Size name', 'Size No (INDEX No).3',
'Mixing batch number', 'Size name',
'idle time between batches', 'Mixing batch number',
] 'idle time between batches',
mixer_df = raw_df[mixer_cols] ]
mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp']) mixer_df = raw_df[mixer_cols]
mixer_df = mixer_df.sort_values(by='Time Stamp') mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp'])
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns mixer_df = mixer_df.sort_values(by='Time Stamp')
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_no]
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['time_min'] = mixer_df['Time Stamp']
mixer_df['time_max'] = mixer_df['Time Stamp']
aggregation_dict = {
'time_min': 'min',
'time_max': 'max',
}
group_by = ['day', 'Mixing batch number']
df_mixer_grouped = mixer_df.groupby(group_by).agg(aggregation_dict).reset_index()
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped['time_max'] - df_mixer_grouped['time_min']
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped[
'mixer_section_time_diff_second'].dt.total_seconds()
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
df_mixer_grouped['day'].astype(str)
date_dict = {}
batch_lis = list(df_mixer_grouped['batch-date'].unique())
for each_bt in batch_lis:
df_nw = df_mixer_grouped[df_mixer_grouped['batch-date'] == each_bt]
date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]),
'end_time': str(list(df_nw['time_max'])[0])}
return date_dict
def return_batch_no_df_1(
raw_df, viscosity_df, date_dict, bof_cols, additional_cols, index_no
):
raw_df = raw_df.sort_values(by='Time Stamp')
raw_df['Time Stamp'] = pd.to_datetime(raw_df['Time Stamp'])
raw_df["day"] = raw_df["Time Stamp"].dt.date
raw_df["day"] = raw_df["day"].astype("str")
raw_df["Mixing batch number"] = raw_df["Mixing batch number"].astype("float")
raw_df["batch-date"] = (
"Batch_"
+ raw_df["Mixing batch number"].astype("str")
+ "_"
+ raw_df["day"].astype("str")
)
bof_add_cols = bof_cols + additional_cols
bof_df = raw_df[bof_add_cols]
sorted_bof_df = bof_df.sort_values(by="Time Stamp", ascending=True)
sorted_bof_df = sorted_bof_df[sorted_bof_df["Size No (INDEX No).4"] == index_no]
dt_list = list(sorted_bof_df["day"].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_bof_df[sorted_bof_df["day"] == each_day]
if day_df["discharge length"].max() - day_df["discharge length"].min() <= 0:
value = 0
else:
value = day_df["discharge length"].max() - day_df["discharge length"].min()
day_length_dic[each_day] = value
# print(day_length_dic) # Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_no]
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['time_min'] = mixer_df['Time Stamp']
mixer_df['time_max'] = mixer_df['Time Stamp']
aggregation_dict = {
'time_min': 'min',
'time_max': 'max',
}
group_by = ['day', 'Mixing batch number']
df_mixer_grouped = mixer_df.groupby(group_by).agg(aggregation_dict).reset_index()
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped['time_max'] - df_mixer_grouped['time_min']
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped[
'mixer_section_time_diff_second'].dt.total_seconds()
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
df_mixer_grouped['day'].astype(str)
date_dict = {}
batch_lis = list(df_mixer_grouped['batch-date'].unique())
for each_bt in batch_lis:
df_nw = df_mixer_grouped[df_mixer_grouped['batch-date'] == each_bt]
date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]),
'end_time': str(list(df_nw['time_max'])[0])}
return date_dict
except Exception as err:
logger.error(f'Exception in mixer time fetch: {str(err)}')
raise Exception(str(err))
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date
sorted_viscosity_df["day"] = sorted_viscosity_df["day"].astype("str")
extrud_visc_df = sorted_viscosity_df[ def return_batch_no_df_1(raw_df, viscosity_df, date_dict, bof_cols, additional_cols, index_no):
["Batch No.", "Input rubber weight(0.1kg)", "day", "Mixing date"] try:
] raw_df = raw_df.sort_values(by='Time Stamp')
extrud_visc_df["length_from_extruder"] = extrud_visc_df["day"].map(day_length_dic) raw_df['Time Stamp'] = pd.to_datetime(raw_df['Time Stamp'])
extrud_visc_df["length_from_extruder"] = extrud_visc_df[ raw_df["day"] = raw_df["Time Stamp"].dt.date
"length_from_extruder" raw_df["day"] = raw_df["day"].astype("str")
].fillna(0)
daily_sum_weight = ( raw_df["Mixing batch number"] = raw_df["Mixing batch number"].astype("float")
extrud_visc_df.groupby("day")["Input rubber weight(0.1kg)"].sum() / 10 raw_df["batch-date"] = (
) "Batch_"
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day + raw_df["Mixing batch number"].astype("str")
extrud_visc_df["m/kg"] = extrud_visc_df.apply( + "_"
lambda row: row["length_from_extruder"] / daily_sum_weight[row["day"]], axis=1 + raw_df["day"].astype("str")
)
extrud_visc_df["batch_length"] = extrud_visc_df.apply(
lambda row: row["m/kg"] * row["Input rubber weight(0.1kg)"] / 10, axis=1
).astype("float64")
extrud_visc_df["batch_length"] = extrud_visc_df["batch_length"].apply(math.ceil)
extrud_visc_df["cumulative_length"] = extrud_visc_df.groupby("day")[
"batch_length"
].cumsum()
discharge_dict = (
extrud_visc_df.groupby("day")
.apply(
lambda group: group.set_index("Batch No.").to_dict()["cumulative_length"]
) )
.to_dict()
)
test_sorted_extr_df = sorted_bof_df bof_add_cols = bof_cols + additional_cols
test_df = test_sorted_extr_df bof_df = raw_df[bof_add_cols]
# Initialize an empty list to store batch numbers sorted_bof_df = bof_df.sort_values(by="Time Stamp", ascending=True)
batch_numbers = [] sorted_bof_df = sorted_bof_df[sorted_bof_df["Size No (INDEX No).4"] == index_no]
dt_list = list(sorted_bof_df["day"].unique())
# Iterate through each row in the DataFrame day_length_dic = {}
for index, row in test_df.iterrows(): for each_day in dt_list:
day = row["day"] day_df = sorted_bof_df[sorted_bof_df["day"] == each_day]
discharge_length = row["discharge length"] if day_df["discharge length"].max() - day_df["discharge length"].min() <= 0:
if discharge_length == 0: raise Exception(f"Discharge length in extruder section for {each_day} is 0")
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else: else:
# If day is not in the dictionary, assign NaN to batch number value = day_df["discharge length"].max() - day_df["discharge length"].min()
batch_numbers.append(np.nan) day_length_dic[each_day] = value
# print(day_length_dic)
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date
sorted_viscosity_df["day"] = sorted_viscosity_df["day"].astype("str")
extrud_visc_df = sorted_viscosity_df[
["Batch No.", "Input rubber weight(0.1kg)", "day", "Mixing date"]
]
extrud_visc_df["length_from_extruder"] = extrud_visc_df["day"].map(day_length_dic)
extrud_visc_df["length_from_extruder"] = extrud_visc_df[
"length_from_extruder"
].fillna(0)
daily_sum_weight = (
extrud_visc_df.groupby("day")["Input rubber weight(0.1kg)"].sum() / 10
)
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df["m/kg"] = extrud_visc_df.apply(
lambda row: row["length_from_extruder"] / daily_sum_weight[row["day"]], axis=1
)
extrud_visc_df["batch_length"] = extrud_visc_df.apply(
lambda row: row["m/kg"] * row["Input rubber weight(0.1kg)"] / 10, axis=1
).astype("float64")
extrud_visc_df["batch_length"] = extrud_visc_df["batch_length"].apply(math.ceil)
extrud_visc_df["cumulative_length"] = extrud_visc_df.groupby("day")[
"batch_length"
].cumsum()
discharge_dict = (
extrud_visc_df.groupby("day")
.apply(
lambda group: group.set_index("Batch No.").to_dict()["cumulative_length"]
)
.to_dict()
)
test_sorted_extr_df = sorted_bof_df
test_df = test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers = []
# Add the 'batch_no' column to the DataFrame # Iterate through each row in the DataFrame
test_df["batch_no"] = batch_numbers for index, row in test_df.iterrows():
day = row["day"]
discharge_length = row["discharge length"]
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
batch_number = 0 # Add the 'batch_no' column to the DataFrame
batch_list = [] test_df["batch_no"] = batch_numbers
started_with_one = False batch_number = 0
current_day = None batch_list = []
for value, day in zip(list(test_df["lower door open"]), list(test_df["day"])): started_with_one = False
current_day = None
if current_day != day: for value, day in zip(list(test_df["lower door open"]), list(test_df["day"])):
current_day = day
batch_number = 0
if value == 1: if current_day != day:
if not started_with_one: current_day = day
batch_number += 1 batch_number = 0
started_with_one = True
batch_list.append(batch_number)
else:
batch_list.append(batch_number)
started_with_one = False
test_df["batch_no"] = test_df["batch_no"].astype("float")
test_df["extruder_batch_date"] = (
"Batch_"
+ test_df["batch_no"].astype("str")
+ "_"
+ test_df["day"].astype("str")
)
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_df.iterrows():
if value["batch_no"] == 0.0:
extruder_flag_list.append("false")
extrud_flg_vms.append(0)
else:
start_time = date_dict.get(value["extruder_batch_date"]).get("start_time")
end_time = date_dict.get(value["extruder_batch_date"]).get("end_time") if value == 1:
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime( if not started_with_one:
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \ batch_number += 1
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime( started_with_one = True
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')): batch_list.append(batch_number)
extruder_flag_list.append("true")
extrud_flg_vms.append(1)
else: else:
batch_list.append(batch_number)
started_with_one = False
test_df["batch_no"] = test_df["batch_no"].astype("float")
test_df["extruder_batch_date"] = (
"Batch_"
+ test_df["batch_no"].astype("str")
+ "_"
+ test_df["day"].astype("str")
)
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_df.iterrows():
if value["batch_no"] == 0.0:
extruder_flag_list.append("false") extruder_flag_list.append("false")
extrud_flg_vms.append(0) extrud_flg_vms.append(0)
else:
start_time = date_dict.get(value["extruder_batch_date"]).get("start_time")
test_df["extruder_flag"] = extruder_flag_list end_time = date_dict.get(value["extruder_batch_date"]).get("end_time")
test_df["extruder_batch_diff"] = extrud_flg_vms if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
test_df["updtaed_bt_list"] = batch_list start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append("true")
extrud_flg_vms.append(1)
else:
extruder_flag_list.append("false")
extrud_flg_vms.append(0)
test_df["extruder_batch_number"] = test_df["batch_no"] - test_df[ test_df["extruder_flag"] = extruder_flag_list
"extruder_batch_diff" test_df["extruder_batch_diff"] = extrud_flg_vms
].astype("float") test_df["updtaed_bt_list"] = batch_list
test_df["batch-date"] = (
"Batch_" test_df["extruder_batch_number"] = test_df["batch_no"] - test_df[
+ test_df["extruder_batch_number"].astype("str") "extruder_batch_diff"
+ "_" ].astype("float")
+ test_df["day"].astype("str") test_df["batch-date"] = (
) "Batch_"
return test_df + test_df["extruder_batch_number"].astype("str")
+ "_"
+ test_df["day"].astype("str")
)
return test_df
except Exception as err:
logger.error(f'Exception in batch formation in extruder: {str(err)}')
raise Exception(str(err))
def preprocess_extruder_section(df, index_number, vis_df): def preprocess_extruder_section(df, index_number, vis_df):
extruder_cols = ExtruderConstants.extruder_cols try:
additional_columns = ['Time Stamp'] extruder_cols = ExtruderConstants.extruder_cols
df_extruder = df[extruder_cols + additional_columns] additional_columns = ['Time Stamp']
df_extruder['Time Stamp'] = pd.to_datetime(df_extruder['Time Stamp']) df_extruder = df[extruder_cols + additional_columns]
df_extruder = df_extruder.sort_values(by='Time Stamp') df_extruder['Time Stamp'] = pd.to_datetime(df_extruder['Time Stamp'])
df_extruder['day'] = df_extruder['Time Stamp'].dt.date df_extruder = df_extruder.sort_values(by='Time Stamp')
df_extruder['day'] = df_extruder['day'].astype('str') df_extruder['day'] = df_extruder['Time Stamp'].dt.date
sorted_extrud_df = df_extruder.sort_values(by="Time Stamp", ascending=True) df_extruder['day'] = df_extruder['day'].astype('str')
sorted_extrud_df = sorted_extrud_df[sorted_extrud_df['Size No (INDEX No).4'] == index_number] sorted_extrud_df = df_extruder.sort_values(by="Time Stamp", ascending=True)
drop_col = ['spare.19', sorted_extrud_df = sorted_extrud_df[sorted_extrud_df['Size No (INDEX No).4'] == index_number]
'spare.20', drop_col = ['spare.19',
'spare.21', 'spare.20',
'spare.22', 'spare.21',
'spare.23', 'spare.22',
'spare.24', 'spare.23',
'spare.25', 'Hopper bank upper limit', 'spare.24',
'middle of hopper bank', 'spare.25', 'Hopper bank upper limit',
'Hopper bank lower limit', 'middle of hopper bank',
'Hopper bank below lower limit'] 'Hopper bank lower limit',
'Hopper bank below lower limit']
sorted_extrud_df.drop(columns=drop_col, inplace=True)
date_dict = mixer_section_start_end_time(df, index_number) sorted_extrud_df.drop(columns=drop_col, inplace=True)
additional_cols = ['day', 'Time Stamp', 'lower door open'] date_dict = mixer_section_start_end_time(df, index_number)
# adding date col to the viscosity df additional_cols = ['day', 'Time Stamp', 'lower door open']
vis_df = vis_df.sort_values(by='Mixing date') # adding date col to the viscosity df
vis_df['date'] = vis_df['Mixing date'].dt.date vis_df = vis_df.sort_values(by='Mixing date')
vis_df['batch-date'] = 'Batch_' + vis_df['Batch No.'].astype('float').astype(str) + '_' + vis_df[ vis_df['date'] = vis_df['Mixing date'].dt.date
'date'].astype(str) vis_df['batch-date'] = 'Batch_' + vis_df['Batch No.'].astype('float').astype(str) + '_' + vis_df[
vis_df = vis_df[vis_df['Index No'] == index_number] 'date'].astype(str)
extruder_merged_df_final = return_batch_no_df_1(df, vis_df, date_dict, extruder_cols, additional_cols, vis_df = vis_df[vis_df['Index No'] == index_number]
index_number) extruder_merged_df_final = return_batch_no_df_1(df, vis_df, date_dict, extruder_cols, additional_cols,
extruder_merged_df_final = extruder_merged_df_final[extruder_merged_df_final['extruder_batch_number'] != 0] index_number)
grouped_cols = ['batch-date'] extruder_merged_df_final = extruder_merged_df_final[extruder_merged_df_final['extruder_batch_number'] != 0]
aggregate_dict = ExtruderConstants.aggregate_dict grouped_cols = ['batch-date']
df_extruder_grouped = extruder_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index() aggregate_dict = ExtruderConstants.aggregate_dict
col_renamer = {} df_extruder_grouped = extruder_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index()
for col, col_agg in aggregate_dict.items(): col_renamer = {}
if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']: for col, col_agg in aggregate_dict.items():
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower() if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']:
col_renamer[col] = renamed_col renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
else: col_renamer[col] = renamed_col
col_renamer[col] = col else:
df_extruder_grouped = df_extruder_grouped.rename(columns=col_renamer) col_renamer[col] = col
df_extruder_grouped = df_extruder_grouped.fillna(df_extruder_grouped.mean()) df_extruder_grouped = df_extruder_grouped.rename(columns=col_renamer)
df_extruder_grouped = round(df_extruder_grouped, 6) df_extruder_grouped = df_extruder_grouped.fillna(df_extruder_grouped.mean())
return df_extruder_grouped df_extruder_grouped = round(df_extruder_grouped, 6)
return df_extruder_grouped
except Exception as err:
logger.error(f'Exception in extruder preprocess: {str(err)}')
raise Exception(str(err))
def return_batch_no_df(raw_df, viscosity_df, date_dict, index_number): def return_batch_no_df(raw_df, viscosity_df, date_dict, index_number):
...@@ -1047,7 +1058,7 @@ def return_batch_no_df(raw_df, viscosity_df, date_dict, index_number): ...@@ -1047,7 +1058,7 @@ def return_batch_no_df(raw_df, viscosity_df, date_dict, index_number):
for each_day in dt_list: for each_day in dt_list:
day_df = sorted_bof_df[sorted_bof_df['day'] == each_day] day_df = sorted_bof_df[sorted_bof_df['day'] == each_day]
if (day_df['length passed through'].max() - day_df['length passed through'].min()) <= 0: if (day_df['length passed through'].max() - day_df['length passed through'].min()) <= 0:
value = 0 raise Exception(f"Length passed through in bof section for {each_day} is 0")
else: else:
value = day_df['length passed through'].max() - day_df['length passed through'].min() value = day_df['length passed through'].max() - day_df['length passed through'].min()
day_length_dic[each_day] = value day_length_dic[each_day] = value
...@@ -1856,56 +1867,60 @@ def load_and_predict(df_grouped, index_no, model_path): ...@@ -1856,56 +1867,60 @@ def load_and_predict(df_grouped, index_no, model_path):
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows): def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info(f"Starting prediction for {index_no}") try:
logger.info("Reading raw file data") logger.info(f"Starting prediction for {index_no}")
df = read_raw_data(raw_path, raw_skip_rows) logger.info("Reading raw file data")
logger.info(f"Shape of raw df is {df.shape}") df = read_raw_data(raw_path, raw_skip_rows)
logger.info(f"Shape of raw df is {df.shape}")
logger.info("Starting preprocessing material section")
visc_df = pd.read_excel(viscosity_path, skiprows=viscosity_skip_rows) logger.info("Starting preprocessing material section")
viscosity_df, raw_viscosity_df = preprocess_viscosity_section(visc_df, index_no) visc_df = pd.read_excel(viscosity_path, skiprows=viscosity_skip_rows)
# viscosity_df.to_csv('viscosity-agg.csv') viscosity_df, raw_viscosity_df = preprocess_viscosity_section(visc_df, index_no)
logger.info(f"The shape of the viscosity df is {viscosity_df.shape}") # viscosity_df.to_csv('viscosity-agg.csv')
logger.info("Completed material section preprocessing") logger.info(f"The shape of the viscosity df is {viscosity_df.shape}")
logger.info("Completed material section preprocessing")
logger.info("Starting preprocessing sheet section")
df_sheet_grouped = preprocess_sheet_section(df, index_no) logger.info("Starting preprocessing sheet section")
logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}") df_sheet_grouped = preprocess_sheet_section(df, index_no)
logger.info("Completed sheet section preprocessing") logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}")
# df_sheet_grouped.to_csv('sheet-agg.csv') logger.info("Completed sheet section preprocessing")
# df_sheet_grouped.to_csv('sheet-agg.csv')
logger.info("Starting preprocessing mixer section")
df_mixer_grouped = preprocess_mixer_section(df, index_no) logger.info("Starting preprocessing mixer section")
logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}") df_mixer_grouped = preprocess_mixer_section(df, index_no)
logger.info("Completed mixer section preprocessing") logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}")
# df_mixer_grouped.to_csv('mixer-agg.csv') logger.info("Completed mixer section preprocessing")
# df_mixer_grouped.to_csv('mixer-agg.csv')
logger.info("Starting preprocessing extruder section")
df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df) logger.info("Starting preprocessing extruder section")
logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}") df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df)
logger.info("Completed extruder section preprocessing") logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}")
# df_extruder_grouped.to_csv('extruder-agg.csv') logger.info("Completed extruder section preprocessing")
# df_extruder_grouped.to_csv('extruder-agg.csv')
logger.info("Starting preprocessing bof section")
df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df) logger.info("Starting preprocessing bof section")
logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}") df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df)
logger.info("Completed bof section preprocessing") logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}")
# df_bof_grouped.to_csv('bof-agg.csv') logger.info("Completed bof section preprocessing")
# bof_desc = df_bof_grouped.describe() # df_bof_grouped.to_csv('bof-agg.csv')
# bof_desc.to_csv('bof-describe.csv') # bof_desc = df_bof_grouped.describe()
# bof_desc.to_csv('bof-describe.csv')
logger.info("Starting preprocessing pickup section")
df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df) logger.info("Starting preprocessing pickup section")
logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}") df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df)
logger.info("Completed pickup section preprocessing") logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}")
# df_pickup_grouped.to_csv('pickup-agg.csv') logger.info("Completed pickup section preprocessing")
# df = pd.read_csv('pickup-agg.csv') # df_pickup_grouped.to_csv('pickup-agg.csv')
# print(df.describe()) # df = pd.read_csv('pickup-agg.csv')
df_grouped = merged_all_sections(df_sheet_grouped, df_mixer_grouped, df_extruder_grouped, df_bof_grouped, # print(df.describe())
df_pickup_grouped, viscosity_df) df_grouped = merged_all_sections(df_sheet_grouped, df_mixer_grouped, df_extruder_grouped, df_bof_grouped,
df_pickup_grouped, viscosity_df)
# load_and_predict(df_grouped, index_no, model_path)
model_trainer(df_grouped, index_no, model_path) # load_and_predict(df_grouped, index_no, model_path)
model_trainer(df_grouped, index_no, model_path)
except Exception as er:
logger.error(f"Error in main prediction: {str(er)}")
raise Exception(str(er))
if __name__ == "__main__": if __name__ == "__main__":
...@@ -1919,3 +1934,4 @@ if __name__ == "__main__": ...@@ -1919,3 +1934,4 @@ if __name__ == "__main__":
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows) start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
except Exception as e: except Exception as e:
logger.exception(f"Module failed because of error {e}") logger.exception(f"Module failed because of error {e}")
raise Exception(str(e))
...@@ -72,7 +72,7 @@ def return_batch_no_df(raw_df, viscosity_df, date_dict, index_number): ...@@ -72,7 +72,7 @@ def return_batch_no_df(raw_df, viscosity_df, date_dict, index_number):
for each_day in dt_list: for each_day in dt_list:
day_df = sorted_bof_df[sorted_bof_df['day'] == each_day] day_df = sorted_bof_df[sorted_bof_df['day'] == each_day]
if (day_df['length passed through'].max() - day_df['length passed through'].min()) <= 0: if (day_df['length passed through'].max() - day_df['length passed through'].min()) <= 0:
value = 0 raise Exception(f"Length passed through in bof section for the day {each_day} is 0")
else: else:
value = day_df['length passed through'].max() - day_df['length passed through'].min() value = day_df['length passed through'].max() - day_df['length passed through'].min()
day_length_dic[each_day] = value day_length_dic[each_day] = value
...@@ -233,4 +233,4 @@ def preprocess_bof_section(df, index_number, vis_df): ...@@ -233,4 +233,4 @@ def preprocess_bof_section(df, index_number, vis_df):
except Exception as err: except Exception as err:
logger.error(f'Error in fetching the bof preprocess data: {str(err)}') logger.error(f'Error in fetching the bof preprocess data: {str(err)}')
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
raise Exception(str(err)) raise Exception(str(err))
\ No newline at end of file
...@@ -12,252 +12,262 @@ warnings.filterwarnings("ignore") ...@@ -12,252 +12,262 @@ warnings.filterwarnings("ignore")
def mixer_section_start_end_time(raw_df, index_no): def mixer_section_start_end_time(raw_df, index_no):
mixer_cols = ['Time Stamp', try:
'Size No (INDEX No).3', mixer_cols = ['Time Stamp',
'Size name', 'Size No (INDEX No).3',
'Mixing batch number', 'Size name',
'idle time between batches', 'Mixing batch number',
] 'idle time between batches',
mixer_df = raw_df[mixer_cols] ]
mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp']) mixer_df = raw_df[mixer_cols]
mixer_df = mixer_df.sort_values(by='Time Stamp') mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp'])
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns mixer_df = mixer_df.sort_values(by='Time Stamp')
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float) # Convert numeric columns to float
mixer_df['day'] = mixer_df['Time Stamp'].dt.date mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_no] mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0] mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_no]
mixer_df['time_min'] = mixer_df['Time Stamp'] mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['time_max'] = mixer_df['Time Stamp'] mixer_df['time_min'] = mixer_df['Time Stamp']
aggregation_dict = { mixer_df['time_max'] = mixer_df['Time Stamp']
'time_min': 'min', aggregation_dict = {
'time_max': 'max', 'time_min': 'min',
} 'time_max': 'max',
group_by = ['day', 'Mixing batch number'] }
df_mixer_grouped = mixer_df.groupby(group_by).agg(aggregation_dict).reset_index() group_by = ['day', 'Mixing batch number']
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped['time_max'] - df_mixer_grouped['time_min'] df_mixer_grouped = mixer_df.groupby(group_by).agg(aggregation_dict).reset_index()
df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped[ df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped['time_max'] - df_mixer_grouped['time_min']
'mixer_section_time_diff_second'].dt.total_seconds() df_mixer_grouped['mixer_section_time_diff_second'] = df_mixer_grouped[
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \ 'mixer_section_time_diff_second'].dt.total_seconds()
df_mixer_grouped['day'].astype(str) df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
date_dict = {} df_mixer_grouped['day'].astype(str)
batch_lis = list(df_mixer_grouped['batch-date'].unique()) date_dict = {}
for each_bt in batch_lis: batch_lis = list(df_mixer_grouped['batch-date'].unique())
df_nw = df_mixer_grouped[df_mixer_grouped['batch-date'] == each_bt] for each_bt in batch_lis:
date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]), df_nw = df_mixer_grouped[df_mixer_grouped['batch-date'] == each_bt]
'end_time': str(list(df_nw['time_max'])[0])} date_dict[each_bt] = {"start_time": str(list(df_nw['time_min'])[0]),
return date_dict 'end_time': str(list(df_nw['time_max'])[0])}
return date_dict
except Exception as err:
def return_batch_no_df( logger.error(f'Exception in extruder mixer time fetch {str(err)}')
raw_df, viscosity_df, date_dict, bof_cols, additional_cols, index_no raise Exception(str(err))
):
raw_df = raw_df.sort_values(by='Time Stamp')
raw_df['Time Stamp'] = pd.to_datetime(raw_df['Time Stamp']) def return_batch_no_df(raw_df, viscosity_df, date_dict, bof_cols, additional_cols, index_no):
raw_df["day"] = raw_df["Time Stamp"].dt.date try:
raw_df["day"] = raw_df["day"].astype("str") raw_df = raw_df.sort_values(by='Time Stamp')
raw_df['Time Stamp'] = pd.to_datetime(raw_df['Time Stamp'])
raw_df["Mixing batch number"] = raw_df["Mixing batch number"].astype("float") raw_df["day"] = raw_df["Time Stamp"].dt.date
raw_df["batch-date"] = ( raw_df["day"] = raw_df["day"].astype("str")
"Batch_"
+ raw_df["Mixing batch number"].astype("str") raw_df["Mixing batch number"] = raw_df["Mixing batch number"].astype("float")
+ "_" raw_df["batch-date"] = (
+ raw_df["day"].astype("str") "Batch_"
) + raw_df["Mixing batch number"].astype("str")
+ "_"
bof_add_cols = bof_cols + additional_cols + raw_df["day"].astype("str")
bof_df = raw_df[bof_add_cols]
sorted_bof_df = bof_df.sort_values(by="Time Stamp", ascending=True)
sorted_bof_df = sorted_bof_df[sorted_bof_df["Size No (INDEX No).4"] == index_no]
dt_list = list(sorted_bof_df["day"].unique())
day_length_dic = {}
for each_day in dt_list:
day_df = sorted_bof_df[sorted_bof_df["day"] == each_day]
if day_df["discharge length"].max() - day_df["discharge length"].min() <= 0:
value = 0
else:
value = day_df["discharge length"].max() - day_df["discharge length"].min()
day_length_dic[each_day] = value
# print(day_length_dic)
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date
sorted_viscosity_df["day"] = sorted_viscosity_df["day"].astype("str")
extrud_visc_df = sorted_viscosity_df[
["Batch No.", "Input rubber weight(0.1kg)", "day", "Mixing date"]
]
extrud_visc_df["length_from_extruder"] = extrud_visc_df["day"].map(day_length_dic)
extrud_visc_df["length_from_extruder"] = extrud_visc_df[
"length_from_extruder"
].fillna(0)
daily_sum_weight = (
extrud_visc_df.groupby("day")["Input rubber weight(0.1kg)"].sum() / 10
)
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df["m/kg"] = extrud_visc_df.apply(
lambda row: row["length_from_extruder"] / daily_sum_weight[row["day"]], axis=1
)
extrud_visc_df["batch_length"] = extrud_visc_df.apply(
lambda row: row["m/kg"] * row["Input rubber weight(0.1kg)"] / 10, axis=1
).astype("float64")
extrud_visc_df["batch_length"] = extrud_visc_df["batch_length"].apply(math.ceil)
extrud_visc_df["cumulative_length"] = extrud_visc_df.groupby("day")[
"batch_length"
].cumsum()
discharge_dict = (
extrud_visc_df.groupby("day")
.apply(
lambda group: group.set_index("Batch No.").to_dict()["cumulative_length"]
) )
.to_dict()
) bof_add_cols = bof_cols + additional_cols
bof_df = raw_df[bof_add_cols]
test_sorted_extr_df = sorted_bof_df
test_df = test_sorted_extr_df sorted_bof_df = bof_df.sort_values(by="Time Stamp", ascending=True)
sorted_bof_df = sorted_bof_df[sorted_bof_df["Size No (INDEX No).4"] == index_no]
# Initialize an empty list to store batch numbers dt_list = list(sorted_bof_df["day"].unique())
batch_numbers = []
day_length_dic = {}
# Iterate through each row in the DataFrame for each_day in dt_list:
for index, row in test_df.iterrows(): day_df = sorted_bof_df[sorted_bof_df["day"] == each_day]
day = row["day"] if day_df["discharge length"].max() - day_df["discharge length"].min() <= 0:
discharge_length = row["discharge length"] raise Exception(f"Discharge length in extruder section for the day {each_day} is 0")
if discharge_length == 0:
batch_numbers.append(0)
else:
# Check if the day is in the dictionary
if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else: else:
# If day is not in the dictionary, assign NaN to batch number value = day_df["discharge length"].max() - day_df["discharge length"].min()
batch_numbers.append(np.nan) day_length_dic[each_day] = value
# Add the 'batch_no' column to the DataFrame # print(day_length_dic)
test_df["batch_no"] = batch_numbers
sorted_viscosity_df = viscosity_df.sort_values(by="Mixing date", ascending=True)
batch_number = 0 sorted_viscosity_df["day"] = sorted_viscosity_df["Mixing date"].dt.date
batch_list = [] sorted_viscosity_df["day"] = sorted_viscosity_df["day"].astype("str")
started_with_one = False extrud_visc_df = sorted_viscosity_df[
current_day = None ["Batch No.", "Input rubber weight(0.1kg)", "day", "Mixing date"]
]
for value, day in zip(list(test_df["lower door open"]), list(test_df["day"])): extrud_visc_df["length_from_extruder"] = extrud_visc_df["day"].map(day_length_dic)
extrud_visc_df["length_from_extruder"] = extrud_visc_df[
if current_day != day: "length_from_extruder"
current_day = day ].fillna(0)
batch_number = 0 daily_sum_weight = (
extrud_visc_df.groupby("day")["Input rubber weight(0.1kg)"].sum() / 10
if value == 1: )
if not started_with_one: # Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
batch_number += 1 extrud_visc_df["m/kg"] = extrud_visc_df.apply(
started_with_one = True lambda row: row["length_from_extruder"] / daily_sum_weight[row["day"]], axis=1
batch_list.append(batch_number) )
else: extrud_visc_df["batch_length"] = extrud_visc_df.apply(
batch_list.append(batch_number) lambda row: row["m/kg"] * row["Input rubber weight(0.1kg)"] / 10, axis=1
started_with_one = False ).astype("float64")
test_df["batch_no"] = test_df["batch_no"].astype("float") extrud_visc_df["batch_length"] = extrud_visc_df["batch_length"].apply(math.ceil)
test_df["extruder_batch_date"] = ( extrud_visc_df["cumulative_length"] = extrud_visc_df.groupby("day")[
"Batch_" "batch_length"
+ test_df["batch_no"].astype("str") ].cumsum()
+ "_"
+ test_df["day"].astype("str") discharge_dict = (
) extrud_visc_df.groupby("day")
extruder_flag_list = [] .apply(
extrud_flg_vms = [] lambda group: group.set_index("Batch No.").to_dict()["cumulative_length"]
for i, value in test_df.iterrows(): )
if value["batch_no"] == 0.0: .to_dict()
extruder_flag_list.append("false") )
extrud_flg_vms.append(0)
else: test_sorted_extr_df = sorted_bof_df
start_time = date_dict.get(value["extruder_batch_date"]).get("start_time") test_df = test_sorted_extr_df
end_time = date_dict.get(value["extruder_batch_date"]).get("end_time") # Initialize an empty list to store batch numbers
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime( batch_numbers = []
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime( # Iterate through each row in the DataFrame
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')): for index, row in test_df.iterrows():
extruder_flag_list.append("true") day = row["day"]
extrud_flg_vms.append(1) discharge_length = row["discharge length"]
if discharge_length == 0:
batch_numbers.append(0)
else: else:
extruder_flag_list.append("false") # Check if the day is in the dictionary
extrud_flg_vms.append(0) if day in discharge_dict:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict = discharge_dict[day]
for batch_no, batch_length in batch_length_dict.items():
if discharge_length <= batch_length:
batch_numbers.append(batch_no)
break
else:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers.append(batch_numbers[-1])
else:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers.append(np.nan)
test_df["extruder_flag"] = extruder_flag_list # Add the 'batch_no' column to the DataFrame
test_df["extruder_batch_diff"] = extrud_flg_vms test_df["batch_no"] = batch_numbers
test_df["updtaed_bt_list"] = batch_list
test_df["extruder_batch_number"] = test_df["batch_no"] - test_df[ batch_number = 0
"extruder_batch_diff" batch_list = []
].astype("float")
test_df["batch-date"] = ( started_with_one = False
"Batch_" current_day = None
+ test_df["extruder_batch_number"].astype("str")
+ "_" for value, day in zip(list(test_df["lower door open"]), list(test_df["day"])):
+ test_df["day"].astype("str")
) if current_day != day:
return test_df current_day = day
batch_number = 0
if value == 1:
if not started_with_one:
batch_number += 1
started_with_one = True
batch_list.append(batch_number)
else:
batch_list.append(batch_number)
started_with_one = False
test_df["batch_no"] = test_df["batch_no"].astype("float")
test_df["extruder_batch_date"] = (
"Batch_"
+ test_df["batch_no"].astype("str")
+ "_"
+ test_df["day"].astype("str")
)
extruder_flag_list = []
extrud_flg_vms = []
for i, value in test_df.iterrows():
if value["batch_no"] == 0.0:
extruder_flag_list.append("false")
extrud_flg_vms.append(0)
else:
start_time = date_dict.get(value["extruder_batch_date"]).get("start_time")
end_time = date_dict.get(value["extruder_batch_date"]).get("end_time")
if (datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') > datetime.strptime(
start_time.split('+')[0], '%Y-%m-%d %H:%M:%S')) & \
(datetime.strptime(str(value["Time Stamp"]).split('+')[0], '%Y-%m-%d %H:%M:%S') < datetime.strptime(
end_time.split('+')[0], '%Y-%m-%d %H:%M:%S')):
extruder_flag_list.append("true")
extrud_flg_vms.append(1)
else:
extruder_flag_list.append("false")
extrud_flg_vms.append(0)
test_df["extruder_flag"] = extruder_flag_list
test_df["extruder_batch_diff"] = extrud_flg_vms
test_df["updtaed_bt_list"] = batch_list
test_df["extruder_batch_number"] = test_df["batch_no"] - test_df[
"extruder_batch_diff"
].astype("float")
test_df["batch-date"] = (
"Batch_"
+ test_df["extruder_batch_number"].astype("str")
+ "_"
+ test_df["day"].astype("str")
)
return test_df
except Exception as err:
logger.error(f'Exception in generating extruder batch {str(err)}')
raise Exception(str(err))
def preprocess_extruder_section(df, index_number, vis_df): def preprocess_extruder_section(df, index_number, vis_df):
extruder_cols = ExtruderConstants.extruder_cols try:
additional_columns = ['Time Stamp'] extruder_cols = ExtruderConstants.extruder_cols
df_extruder = df[extruder_cols + additional_columns] additional_columns = ['Time Stamp']
df_extruder['Time Stamp'] = pd.to_datetime(df_extruder['Time Stamp']) df_extruder = df[extruder_cols + additional_columns]
df_extruder = df_extruder.sort_values(by='Time Stamp') df_extruder['Time Stamp'] = pd.to_datetime(df_extruder['Time Stamp'])
df_extruder['day'] = df_extruder['Time Stamp'].dt.date df_extruder = df_extruder.sort_values(by='Time Stamp')
df_extruder['day'] = df_extruder['day'].astype('str') df_extruder['day'] = df_extruder['Time Stamp'].dt.date
sorted_extrud_df = df_extruder.sort_values(by="Time Stamp", ascending=True) df_extruder['day'] = df_extruder['day'].astype('str')
sorted_extrud_df = sorted_extrud_df[sorted_extrud_df['Size No (INDEX No).4'] == index_number] sorted_extrud_df = df_extruder.sort_values(by="Time Stamp", ascending=True)
drop_col = ['spare.19', sorted_extrud_df = sorted_extrud_df[sorted_extrud_df['Size No (INDEX No).4'] == index_number]
'spare.20', drop_col = ['spare.19',
'spare.21', 'spare.20',
'spare.22', 'spare.21',
'spare.23', 'spare.22',
'spare.24', 'spare.23',
'spare.25', 'Hopper bank upper limit', 'spare.24',
'middle of hopper bank', 'spare.25', 'Hopper bank upper limit',
'Hopper bank lower limit', 'middle of hopper bank',
'Hopper bank below lower limit'] 'Hopper bank lower limit',
'Hopper bank below lower limit']
sorted_extrud_df.drop(columns=drop_col, inplace=True)
date_dict = mixer_section_start_end_time(df, index_number) sorted_extrud_df.drop(columns=drop_col, inplace=True)
additional_cols = ['day', 'Time Stamp', 'lower door open'] date_dict = mixer_section_start_end_time(df, index_number)
# adding date col to the viscosity df additional_cols = ['day', 'Time Stamp', 'lower door open']
vis_df = vis_df.sort_values(by='Mixing date') # adding date col to the viscosity df
vis_df['date'] = vis_df['Mixing date'].dt.date vis_df = vis_df.sort_values(by='Mixing date')
vis_df['batch-date'] = 'Batch_' + vis_df['Batch No.'].astype('float').astype(str) + '_' + vis_df[ vis_df['date'] = vis_df['Mixing date'].dt.date
'date'].astype(str) vis_df['batch-date'] = 'Batch_' + vis_df['Batch No.'].astype('float').astype(str) + '_' + vis_df[
vis_df = vis_df[vis_df['Index No'] == index_number] 'date'].astype(str)
extruder_merged_df_final = return_batch_no_df(df, vis_df, date_dict, extruder_cols, additional_cols, vis_df = vis_df[vis_df['Index No'] == index_number]
index_number) extruder_merged_df_final = return_batch_no_df(df, vis_df, date_dict, extruder_cols, additional_cols,
extruder_merged_df_final = extruder_merged_df_final[extruder_merged_df_final['extruder_batch_number'] != 0] index_number)
grouped_cols = ['batch-date'] extruder_merged_df_final = extruder_merged_df_final[extruder_merged_df_final['extruder_batch_number'] != 0]
aggregate_dict = ExtruderConstants.aggregate_dict grouped_cols = ['batch-date']
df_extruder_grouped = extruder_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index() aggregate_dict = ExtruderConstants.aggregate_dict
col_renamer = {} df_extruder_grouped = extruder_merged_df_final.groupby(grouped_cols).agg(aggregate_dict).reset_index()
for col, col_agg in aggregate_dict.items(): col_renamer = {}
if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']: for col, col_agg in aggregate_dict.items():
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower() if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']:
col_renamer[col] = renamed_col renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
else: col_renamer[col] = renamed_col
col_renamer[col] = col else:
df_extruder_grouped = df_extruder_grouped.rename(columns=col_renamer) col_renamer[col] = col
df_extruder_grouped = df_extruder_grouped.fillna(df_extruder_grouped.mean()) df_extruder_grouped = df_extruder_grouped.rename(columns=col_renamer)
df_extruder_grouped = round(df_extruder_grouped, 6) df_extruder_grouped = df_extruder_grouped.fillna(df_extruder_grouped.mean())
return df_extruder_grouped df_extruder_grouped = round(df_extruder_grouped, 6)
return df_extruder_grouped
except Exception as err:
logger.error(f"Exception in extruder preprocess {str(err)}")
raise Exception(str(err))
...@@ -505,6 +505,8 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df): ...@@ -505,6 +505,8 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df):
for each_day in dt_list: for each_day in dt_list:
day_df = sorted_pick_df[sorted_pick_df['day'] == each_day] day_df = sorted_pick_df[sorted_pick_df['day'] == each_day]
day_length_dic[each_day] = day_df['length passed through.1'].max() - day_df['length passed through.1'].min() day_length_dic[each_day] = day_df['length passed through.1'].max() - day_df['length passed through.1'].min()
if day_df['length passed through.1'].max() - day_df['length passed through.1'].min() <= 0:
raise Exception(f"Length passed through in pick up section for {each_day} is 0")
''' Reading viscosity file with skipping 2 rows ''' ''' Reading viscosity file with skipping 2 rows '''
viscosity_df['Mixing date'] = pd.to_datetime(viscosity_df['Mixing date']) viscosity_df['Mixing date'] = pd.to_datetime(viscosity_df['Mixing date'])
...@@ -549,4 +551,4 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df): ...@@ -549,4 +551,4 @@ def preprocess_pickup_section(raw_df, index_number, viscosity_df):
except Exception as err: except Exception as err:
logger.error(f'Error while performing main function for pickup section {str(err)}') logger.error(f'Error while performing main function for pickup section {str(err)}')
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
raise Exception(str(err)) raise Exception(str(err))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment