Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bsj-models-for-rubbers
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
CI / CD Analytics
Repository Analytics
Value Stream Analytics
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dasharatha.vamshi
bsj-models-for-rubbers
Commits
5d708a91
Commit
5d708a91
authored
Dec 21, 2023
by
dasharatha.vamshi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added main
parent
1057b00c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2024 additions
and
13 deletions
+2024
-13
app.py
app.py
+19
-13
main.py
main.py
+2005
-0
No files found.
app.py
View file @
5d708a91
import
warnings
import
sys
import
numpy
as
np
import
pandas
as
pd
from
loguru
import
logger
...
...
@@ -253,18 +253,24 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
if
__name__
==
"__main__"
:
try
:
logger
.
info
(
"Starting the model"
)
index_number
=
1250
raw_file_path
=
'FY676-A-WO_Visc.xlsx'
raw_file_skip_rows
=
0
viscosity_file_path
=
'viscosity_natural_rubber_data.xlsx'
viscosity_file_skip_rows
=
3
start_prediction
(
raw_file_path
,
viscosity_file_path
,
index_number
,
raw_file_skip_rows
,
viscosity_file_skip_rows
)
index_number
=
3294
raw_file_path
=
'fy664g_raw.csv'
raw_file_skip_rows
=
0
viscosity_file_path
=
'fy664g-viscosity.xlsx'
viscosity_file_skip_rows
=
2
# logger.info("Starting the model")
# index_number = 1250
# raw_file_path = 'FY676-A-WO_Visc.xlsx'
# raw_file_skip_rows = 0
# viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
# viscosity_file_skip_rows = 3
# start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
# index_number = 3294
# raw_file_path = 'fy664g_raw.csv'
# raw_file_skip_rows = 0
# viscosity_file_path = 'fy664g-viscosity.xlsx'
# viscosity_file_skip_rows = 2
# start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
raw_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-r'
)
+
1
]
viscosity_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-v'
)
+
1
]
raw_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sr'
)
+
1
])
viscosity_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sv'
)
+
1
])
index_number
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-index'
)
+
1
])
start_prediction
(
raw_file_path
,
viscosity_file_path
,
index_number
,
raw_file_skip_rows
,
viscosity_file_skip_rows
)
except
Exception
as
e
:
logger
.
exception
(
f
"Module failed because of error {e}"
)
main.py
0 → 100644
View file @
5d708a91
import
sys
import
warnings
import
mlflow
from
sklearn
import
metrics
from
sklearn.model_selection
import
train_test_split
# from scripts.constants.constants import RawConstants
# from scripts.core.model_loader import ModelLoader
# from scripts.section_utils.bof_section import preprocess_bof_section
# from scripts.section_utils.extruder_section import preprocess_extruder_section
# from scripts.section_utils.material_section import preprocess_viscosity_section
# from scripts.section_utils.mixer_section import preprocess_mixer_section
# from scripts.section_utils.pickup_section import preprocess_pickup_section
# from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import ExtruderConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import ViscosityConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import MixerConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import PickupConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import SheetConstants
warnings
.
filterwarnings
(
"ignore"
)
def
preprocess_sheet_section
(
df
,
index_number
):
sheet_supply_column
=
SheetConstants
.
sheet_supply_column
sheet_supply_df
=
df
[
sheet_supply_column
]
sheet_supply_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
sheet_supply_df
[
'Time Stamp'
])
sheet_supply_df
=
sheet_supply_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
sheet_supply_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
sheet_supply_df
[
numeric_cols
]
=
sheet_supply_df
[
numeric_cols
]
.
astype
(
float
)
sheet_supply_df
[
'day'
]
=
sheet_supply_df
[
'Time Stamp'
]
.
dt
.
date
sheet_supply_df
[
'batch-date'
]
=
'Batch_'
+
sheet_supply_df
[
'Weighing times'
]
.
astype
(
str
)
+
'_'
+
sheet_supply_df
[
'day'
]
.
astype
(
str
)
sheet_supply_df
=
sheet_supply_df
[
sheet_supply_df
[
"Size No (INDEX No)"
]
==
index_number
]
sheet_supply_df
=
sheet_supply_df
[
sheet_supply_df
[
"Weighing times"
]
!=
0
]
aggregation_dict
=
SheetConstants
.
aggregation_dict
group_by
=
[
'day'
,
'Weighing times'
]
df_sheet_grouped
=
sheet_supply_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregation_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_sheet_grouped
=
df_sheet_grouped
.
rename
(
columns
=
col_renamer
)
df_sheet_grouped
[
'batch-date'
]
=
'Batch_'
+
df_sheet_grouped
[
'Weighing times'
]
.
astype
(
str
)
+
'_'
+
df_sheet_grouped
[
'day'
]
.
astype
(
str
)
df_sheet_grouped
=
round
(
df_sheet_grouped
,
6
)
return
df_sheet_grouped
def
get_mixer_batch_date
(
raw_df
,
index_number
):
try
:
logger
.
info
(
'Getting mixer batch date dictionary'
)
mixer_df
=
raw_df
[
PickupConstants
.
pick_imp_mixer_cols
]
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_number
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'time_min'
]
=
mixer_df
[
'Time Stamp'
]
mixer_df
[
'time_max'
]
=
mixer_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
mixer_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'time_max'
]
-
df_mixer_grouped
[
'time_min'
]
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
date_dict
=
{}
batch_lis
=
list
(
df_mixer_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_mixer_grouped
[
df_mixer_grouped
[
'batch-date'
]
==
each_bt
]
date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
date_dict
except
Exception
as
err
:
logger
.
error
(
f
'Error while getting mixer time and forming date dict: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_batch_no_bof_df
(
raw_df
,
viscosity_df
,
date_dict
,
index_number
):
try
:
logger
.
info
(
'Getting bof batch number'
)
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
bof_add_cols
=
[
'Size No (INDEX No).5'
,
'length passed through'
,
'Time Stamp'
,
'day'
,
'lower door open'
]
bof_df
=
raw_df
[
bof_add_cols
]
sorted_bof_df
=
bof_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_bof_df
=
sorted_bof_df
[
sorted_bof_df
[
'Size No (INDEX No).5'
]
==
index_number
]
dt_list
=
list
(
sorted_bof_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_bof_df
[
sorted_bof_df
[
'day'
]
==
each_day
]
if
(
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
())
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
()
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'length_from_extruder'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_extruder'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_bof_df
test_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_df
[
'batch_no'
]
=
batch_numbers
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
current_day
=
None
for
value
,
day
in
zip
(
list
(
test_df
[
'lower door open'
]),
list
(
test_df
[
'day'
])):
if
current_day
!=
day
:
current_day
=
day
batch_number
=
0
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
for
value
in
test_df
[
'lower door open'
]:
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
test_df
[
'batch_no'
]
=
test_df
[
'batch_no'
]
.
astype
(
'float'
)
test_df
[
'bof_batch_date'
]
=
'Batch_'
+
test_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_df
[
'day'
]
.
astype
(
'str'
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
# start_time = np.datetime64(date_dict.get(value['bof_batch_date']).get('start_time'))
# end_time = np.datetime64(date_dict.get(value['bof_batch_date']).get('end_time'))
start_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"start_time"
)
end_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'true'
)
extrud_flg_vms
.
append
(
1
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_df
[
'bof_flag'
]
=
extruder_flag_list
test_df
[
'bof_batch_diff'
]
=
extrud_flg_vms
test_df
[
'updated_bt_list'
]
=
batch_list
test_df
[
'bof_batch_number'
]
=
test_df
[
'batch_no'
]
-
test_df
[
'bof_batch_diff'
]
.
astype
(
'float'
)
test_df
=
test_df
.
rename
(
columns
=
{
'bof_batch_date'
:
'batch-date'
})
return
test_df
except
Exception
as
err
:
logger
.
error
(
f
'Error while adding batch to bof section: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
get_bof_batch_date
(
bof_batch_df
,
index_number
):
try
:
logger
.
info
(
'Getting bof batch date dictionary'
)
bof_cols
=
PickupConstants
.
pick_imp_bof_cols
bof_df
=
bof_batch_df
[
bof_cols
]
bof_df
=
bof_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
bof_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
bof_df
[
numeric_cols
]
=
bof_df
[
numeric_cols
]
.
astype
(
float
)
bof_df
[
'day'
]
=
bof_df
[
'Time Stamp'
]
.
dt
.
date
bof_df
=
bof_df
[
bof_df
[
"Size No (INDEX No).5"
]
==
index_number
]
bof_df
=
bof_df
[
bof_df
[
"bof_batch_number"
]
!=
0
]
bof_df
[
'time_min'
]
=
bof_df
[
'Time Stamp'
]
bof_df
[
'time_max'
]
=
bof_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'bof_batch_number'
]
df_bof_grouped
=
bof_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_bof_grouped
[
'mixer_section_time_diff_second'
]
=
df_bof_grouped
[
'time_max'
]
-
df_bof_grouped
[
'time_min'
]
df_bof_grouped
[
'mixer_section_time_diff_second'
]
=
df_bof_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_bof_grouped
[
'batch-date'
]
=
'Batch_'
+
df_bof_grouped
[
'bof_batch_number'
]
.
astype
(
str
)
+
'_'
+
df_bof_grouped
[
'day'
]
.
astype
(
str
)
bof_date_dict
=
{}
batch_lis
=
list
(
df_bof_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_bof_grouped
[
df_bof_grouped
[
'batch-date'
]
==
each_bt
]
bof_date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
bof_date_dict
except
Exception
as
err
:
logger
.
error
(
f
'Error while getting bof time and forming bof date dict: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_fy676a_pick_batch_no_df
(
raw_df
,
viscosity_df
,
bof_date_dict
,
bof_batch_num_df
,
index_number
):
try
:
logger
.
info
(
'Getting pickup batch date dataframe'
)
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
pick_add_cols
=
PickupConstants
.
pick_cols
+
PickupConstants
.
pick_additional_cols
pick_df
=
raw_df
[
pick_add_cols
]
sorted_pick_df
=
pick_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_pick_df
=
sorted_pick_df
[
sorted_pick_df
[
'Size No (INDEX No).6'
]
==
index_number
]
dt_list
=
list
(
sorted_pick_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_pick_df
[
sorted_pick_df
[
'day'
]
==
each_day
]
if
day_df
[
'length passed through.1'
]
.
max
()
-
day_df
[
'length passed through.1'
]
.
min
()
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through.1'
]
.
max
()
-
day_df
[
'length passed through.1'
]
.
min
()
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_pickup'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_pickup'
]
=
extrud_visc_df
[
'length_from_pickup'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_pickup' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_pickup'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_pick_df
test_pick_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_pick_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through.1'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_pick_df
[
'batch_no'
]
=
batch_numbers
test_pick_df
[
'batch_no'
]
=
test_pick_df
[
'batch_no'
]
.
astype
(
'float'
)
test_pick_df
[
'pickup_batch_date'
]
=
'Batch_'
+
test_pick_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_pick_df
[
'day'
]
.
astype
(
'str'
)
test_pick_df
[
'bof_batch_number'
]
=
bof_batch_num_df
[
'bof_batch_number'
]
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_pick_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
# start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
# end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
start_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"start_time"
)
end_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_pick_df
[
'pickup_flag'
]
=
extruder_flag_list
test_pick_df
[
'pickup_batch_diff'
]
=
extrud_flg_vms
test_pick_df
[
'pickup_batch_verify_number'
]
=
test_pick_df
[
'batch_no'
]
-
test_pick_df
[
'pickup_batch_diff'
]
.
astype
(
'float'
)
actual_pickup_bt_num
=
[]
for
i
,
value
in
test_pick_df
.
iterrows
():
pickup_batch_number
=
value
[
'pickup_batch_verify_number'
]
bof_batch_num
=
value
[
'bof_batch_number'
]
if
pickup_batch_number
<=
bof_batch_num
:
actual_pickup_bt_num
.
append
(
pickup_batch_number
)
else
:
actual_pickup_bt_num
.
append
(
bof_batch_num
)
test_pick_df
[
'pickup_batch_number'
]
=
actual_pickup_bt_num
test_pick_df
[
'batch-date'
]
=
'Batch_'
+
test_pick_df
[
'pickup_batch_number'
]
.
astype
(
'str'
)
+
'_'
+
\
test_pick_df
[
'day'
]
.
astype
(
'str'
)
return
test_pick_df
except
Exception
as
err
:
logger
.
error
(
f
'Error in adding batch to fy676a pick section: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_fy664g_pick_batch_no_df
(
raw_df
,
viscosity_df
,
bof_date_dict
,
index_no
):
try
:
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
pick_add_cols
=
PickupConstants
.
pick_cols
+
PickupConstants
.
pick_additional_cols
pick_df
=
raw_df
[
pick_add_cols
]
sorted_pick_df
=
pick_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_pick_df
=
sorted_pick_df
[
sorted_pick_df
[
'Size No (INDEX No).6'
]
==
index_no
]
dt_list
=
list
(
sorted_pick_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_pick_df
[
sorted_pick_df
[
'day'
]
==
each_day
]
minimum
=
day_df
[
'length passed through.1'
]
.
min
()
if
minimum
<=
0
:
minimum
=
0
if
day_df
[
'length passed through.1'
]
.
max
()
-
minimum
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through.1'
]
.
max
()
-
minimum
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'length_from_extruder'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_extruder'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_pick_df
test_pick_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_pick_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through.1'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_pick_df
[
'batch_no'
]
=
batch_numbers
test_pick_df
[
'batch_no'
]
=
test_pick_df
[
'batch_no'
]
.
astype
(
'float'
)
test_pick_df
[
'pickup_batch_date'
]
=
'Batch_'
+
test_pick_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_pick_df
[
'day'
]
.
astype
(
'str'
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_pick_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
# start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
# end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
start_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"start_time"
)
end_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_pick_df
[
'pickup_flag'
]
=
extruder_flag_list
test_pick_df
[
'pickup_batch_diff'
]
=
extrud_flg_vms
test_pick_df
[
'pickup_batch_number'
]
=
test_pick_df
[
'batch_no'
]
-
test_pick_df
[
'pickup_batch_diff'
]
.
astype
(
'float'
)
test_pick_df
[
'batch-date'
]
=
'Batch_'
+
test_pick_df
[
'pickup_batch_number'
]
.
astype
(
'str'
)
+
'_'
+
\
test_pick_df
[
'day'
]
.
astype
(
'str'
)
return
test_pick_df
except
Exception
as
err
:
logger
.
error
(
f
"Error while forming pickup batch number for fy664g: {str(err)}"
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
preprocess_pickup_section
(
raw_df
,
index_number
,
viscosity_df
):
try
:
logger
.
info
(
'Preprocessing and getting aggregated pickup dataframe'
)
raw_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
raw_df
[
'Time Stamp'
])
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
extr_cols
=
PickupConstants
.
pick_cols
+
PickupConstants
.
pick_additional_cols
pick_df
=
raw_df
[
extr_cols
]
sorted_pick_df
=
pick_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_pick_df
=
sorted_pick_df
[
sorted_pick_df
[
'Size No (INDEX No).6'
]
==
index_number
]
dt_list
=
list
(
sorted_pick_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_pick_df
[
sorted_pick_df
[
'day'
]
==
each_day
]
day_length_dic
[
each_day
]
=
day_df
[
'length passed through.1'
]
.
max
()
-
day_df
[
'length passed through.1'
]
.
min
()
''' Reading viscosity file with skipping 2 rows '''
viscosity_df
[
'Mixing date'
]
=
pd
.
to_datetime
(
viscosity_df
[
'Mixing date'
])
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
sorted_viscosity_df
[
'batch-date'
]
=
'Batch_'
+
viscosity_df
[
'Batch No.'
]
.
astype
(
'float'
)
.
astype
(
str
)
+
'_'
+
sorted_viscosity_df
[
'day'
]
.
astype
(
str
)
sorted_viscosity_df
=
sorted_viscosity_df
[
sorted_viscosity_df
[
'Index No'
]
==
index_number
]
date_dict
=
get_mixer_batch_date
(
raw_df
,
index_number
)
bof_test_df
=
return_batch_no_bof_df
(
raw_df
,
sorted_viscosity_df
,
date_dict
,
index_number
)
bof_date_dict
=
get_bof_batch_date
(
bof_test_df
,
index_number
)
pick_merged_batch_df
=
pd
.
DataFrame
()
if
index_number
==
1250
:
pick_merged_batch_df
=
return_fy676a_pick_batch_no_df
(
raw_df
,
sorted_viscosity_df
,
bof_date_dict
,
bof_test_df
,
index_number
)
elif
index_number
==
3294
:
pick_merged_batch_df
=
return_fy664g_pick_batch_no_df
(
raw_df
,
viscosity_df
,
bof_date_dict
,
index_number
)
# Merging pick data with viscosity data on date-batch column
pickup_merged_df_final
=
pd
.
merge
(
pick_merged_batch_df
,
sorted_viscosity_df
[[
'batch-date'
,
'viscosity'
]],
on
=
'batch-date'
,
how
=
'left'
)
# Removing batch 0
pickup_merged_df_final
=
pickup_merged_df_final
[
pickup_merged_df_final
[
'pickup_batch_number'
]
!=
0
]
# Grouping with aggregated data
df_pickup_grouped
=
pickup_merged_df_final
.
groupby
(
PickupConstants
.
pick_grouped_cols
)
.
agg
(
PickupConstants
.
pick_aggregate_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
PickupConstants
.
pick_aggregate_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_pickup_grouped
=
df_pickup_grouped
.
rename
(
columns
=
col_renamer
)
df_pickup_grouped_visc
=
df_pickup_grouped
.
drop
(
'viscosity'
,
axis
=
1
)
return
df_pickup_grouped_visc
except
Exception
as
err
:
logger
.
error
(
f
'Error while performing main function for pickup section {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
preprocess
(
df
):
logger
.
info
(
"Starting Preprocessing the Data"
)
# Replace 'nan' with NaN
df
=
df
.
replace
(
'nan'
,
np
.
nan
)
# Calculate the number of missing values in each column
missing_counts
=
df
.
isnull
()
.
sum
()
# Get the column names where the number of missing values is equal to the number of rows
cols_to_remove
=
missing_counts
[
missing_counts
==
len
(
df
)]
.
index
df
=
df
.
drop
(
cols_to_remove
,
axis
=
1
)
df
=
df
.
loc
[
df
[
'Mixing batch number'
]
!=
0
]
# Drop rows where 'Batch Number' is NaN
df
=
df
.
dropna
(
subset
=
[
'Mixing batch number'
])
# Identify constant columns
constant_columns
=
df
.
columns
[
df
.
nunique
()
==
1
]
# Drop constant columns
df
.
drop
(
columns
=
constant_columns
,
inplace
=
True
)
logger
.
info
(
f
"Preprocessing completed and the final shape is {df.shape}"
)
columns_with_missing_values
=
df
.
columns
[
df
.
isnull
()
.
sum
()
>
0
]
.
tolist
()
return
df
def
preprocess_mixer_section
(
df
,
index_number
):
mixer_cols
=
MixerConstants
.
mixer_cols
mixer_df
=
df
[
mixer_cols
]
mixer_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
mixer_df
[
'Time Stamp'
])
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_number
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'Mixing Weight (Integrated Value)_diff'
]
=
mixer_df
.
groupby
([
'day'
,
'Mixing batch number'
])[
'Mixing Weight (Integrated Value)'
]
.
transform
(
lambda
x
:
x
.
max
()
-
x
.
min
())
mixer_cleaned_df
=
preprocess
(
mixer_df
)
mixer_cleaned_df
[
"day"
]
=
mixer_cleaned_df
[
'Time Stamp'
]
.
dt
.
date
mixer_cleaned_df
[
'mixer_on_or_off'
]
=
mixer_cleaned_df
[
'Mixing timer value'
]
.
apply
(
lambda
x
:
0
if
x
==
0
else
1
)
mixer_cleaned_df
[
'batch-date'
]
=
'Batch_'
+
mixer_cleaned_df
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
mixer_cleaned_df
[
'day'
]
.
astype
(
str
)
mixer_cleaned_df
=
mixer_cleaned_df
.
sort_values
(
by
=
'Time Stamp'
)
# Group by 'batch-date' and add a new column 'rubber_addition'
df
[
'rubber_addition'
]
=
0
def
apply_conditions
(
group
):
max_value_index
=
group
[
'Mixing timer value'
]
.
idxmax
()
group
.
loc
[
group
[
'Mixing timer value'
]
!=
group
[
'Mixing timer value'
]
.
max
(),
'rubber_addition'
]
=
1
group
.
loc
[
max_value_index
,
'rubber_addition'
]
=
1
return
group
mixer_cleaned_df
=
mixer_cleaned_df
.
groupby
(
'batch-date'
)
.
apply
(
apply_conditions
)
# Add 'process_on_or_off' column based on conditions
mixer_cleaned_df
[
'process_on_or_off'
]
=
0
mixer_cleaned_df
.
loc
[(
mixer_cleaned_df
[
'mixer_on_or_off'
]
==
1
)
&
(
mixer_cleaned_df
[
'rubber_addition'
]
==
1
),
'process_on_or_off'
]
=
1
numeric_cols
=
mixer_cleaned_df
.
select_dtypes
(
include
=
[
'number'
,
'float'
])
.
columns
process_on_df
=
mixer_cleaned_df
[
mixer_cleaned_df
[
'process_on_or_off'
]
==
1
]
df_full
=
process_on_df
[
process_on_df
.
columns
]
# Define a dictionary for data type conversions
conversion_dict
=
{
col
:
float
for
col
in
df_full
.
select_dtypes
(
include
=
'number'
)
.
columns
}
# Apply the data type conversions
df_full
=
df_full
.
astype
(
conversion_dict
)
rpm_count
=
df_full
[
df_full
[
'Rotor actual rpm'
]
==
60.0
]
.
groupby
(
'batch-date'
)[
'Rotor actual rpm'
]
.
count
()
df_full
=
df_full
.
merge
(
rpm_count
,
left_on
=
'batch-date'
,
right_index
=
True
,
suffixes
=
(
''
,
'_count'
))
df_full
.
rename
(
columns
=
{
'Rotor actual rpm_count'
:
'max_rpm_count'
},
inplace
=
True
)
aggregation_dict
=
MixerConstants
.
aggregation_dict
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
df_full
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregation_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_mixer_grouped
=
df_mixer_grouped
.
rename
(
columns
=
col_renamer
)
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
df_mixer_grouped
=
round
(
df_mixer_grouped
,
6
)
return
df_mixer_grouped
def
create_status_column
(
df
,
type_col_name
,
columns_list
):
status_col
=
[]
for
i
,
val
in
enumerate
(
df
[
type_col_name
]):
if
val
==
0
:
status_col
.
append
(
False
)
else
:
if
any
(
df
[
column
]
.
iloc
[
i
]
==
0
for
column
in
columns_list
):
status_col
.
append
(
False
)
else
:
status_col
.
append
(
True
)
return
status_col
def
preprocess_viscosity_section
(
viscosity_df
,
index_number
):
# adding date col to the viscosity df
viscosity_df
=
viscosity_df
.
sort_values
(
by
=
'Mixing date'
)
raw_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
'Mixing date'
)
viscosity_df
[
'date'
]
=
viscosity_df
[
'Mixing date'
]
.
dt
.
date
viscosity_df
[
'batch-date'
]
=
'Batch_'
+
viscosity_df
[
'Batch No.'
]
.
astype
(
str
)
+
'_'
+
viscosity_df
[
'date'
]
.
astype
(
str
)
viscosity_df
=
viscosity_df
[
viscosity_df
[
'Index No'
]
==
index_number
]
rubber_cols
=
ViscosityConstants
.
rubber_cols
# Replace '-' with 0 for numerical and float columns
viscosity_df
[
rubber_cols
]
=
viscosity_df
[
rubber_cols
]
.
replace
(
'-'
,
0
)
viscosity_df
[
rubber_cols
]
=
viscosity_df
[
rubber_cols
]
.
apply
(
pd
.
to_numeric
,
errors
=
'coerce'
)
# Identify numerical and float columns
numerical_cols
=
viscosity_df
.
columns
[
viscosity_df
.
dtypes
.
apply
(
lambda
x
:
pd
.
api
.
types
.
is_numeric_dtype
(
x
)
or
pd
.
api
.
types
.
is_float_dtype
(
x
))]
integer_cols
=
viscosity_df
.
columns
[
viscosity_df
.
dtypes
==
'int64'
]
# Convert integer columns to float
viscosity_df
[
integer_cols
]
=
viscosity_df
[
integer_cols
]
.
astype
(
float
)
# Calculate weights
viscosity_df
[
'Weight_type1'
]
=
round
(
viscosity_df
[
'Quantity using type1 bale'
]
/
(
viscosity_df
[
'Quantity using type1 bale'
]
+
viscosity_df
[
'Quantity using type2 bale'
]),
2
)
viscosity_df
[
'Weight_type2'
]
=
round
(
viscosity_df
[
'Quantity using type2 bale'
]
/
(
viscosity_df
[
'Quantity using type1 bale'
]
+
viscosity_df
[
'Quantity using type2 bale'
]),
2
)
viscosity_df
[
'Weighted_PO_type'
]
=
(
viscosity_df
[
'PO_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
f
'PO_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_DIRT_type'
]
=
(
viscosity_df
[
'DIRT_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'DIRT_type1.1'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_ASH_type'
]
=
(
viscosity_df
[
'ASH_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'ASH_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_VM_type'
]
=
(
viscosity_df
[
'VM_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'VM_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_PRI_type'
]
=
(
viscosity_df
[
'PRI_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
f
'PRI_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_NITROGEN_type'
]
=
(
viscosity_df
[
'NITROGEN_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'NITROGEN_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_Temperature during transportation_type[℃]'
]
=
(
viscosity_df
[
'Temperature during transportation_type1[℃]'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'Temperature during transportation_type2[℃]'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_Humidity during transportation__type[
%
]'
]
=
(
viscosity_df
[
'Humidity during transportation_type1[
%
]'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'Humidity during transportation__type2[
%
]'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted Sum'
]
=
viscosity_df
[
'Weighted_PO_type'
]
+
viscosity_df
[
'Weighted_DIRT_type'
]
+
viscosity_df
[
'Weighted_ASH_type'
]
+
viscosity_df
[
'Weighted_VM_type'
]
+
viscosity_df
[
'Weighted_PRI_type'
]
+
viscosity_df
[
'Weighted_NITROGEN_type'
]
column_to_keep_at_end
=
'viscosity'
# Reorder columns
new_order
=
[
col
for
col
in
viscosity_df
.
columns
if
col
!=
column_to_keep_at_end
]
+
[
column_to_keep_at_end
]
viscosity_df
=
viscosity_df
[
new_order
]
viscosity_df
[
'batch-date'
]
=
'Batch_'
+
viscosity_df
[
'Batch No.'
]
.
astype
(
str
)
+
'_'
+
viscosity_df
[
'date'
]
.
astype
(
str
)
# Added Status to check rubber
# Rubber Type 1
rubber_1_cols
=
[
'DIRT_type1'
,
'ASH_type1'
,
'VM_type1'
,
'PRI_type1'
,
'NITROGEN_type1'
]
# Rubber Type 2
rubber_2_cols
=
[
'PO_type2'
,
'DIRT_type1'
,
'ASH_type2'
,
'VM_type2'
,
'PRI_type2'
,
'NITROGEN_type2'
]
viscosity_df
[
'rubber_status_1'
]
=
create_status_column
(
viscosity_df
,
'Weight_type1'
,
rubber_1_cols
)
viscosity_df
[
'rubber_status_2'
]
=
create_status_column
(
viscosity_df
,
'Weight_type2'
,
rubber_2_cols
)
viscosity_df
[
'status'
]
=
viscosity_df
[
'rubber_status_1'
]
|
viscosity_df
[
'rubber_status_2'
]
req_cols
=
ViscosityConstants
.
req_cols
final_viscosity_df
=
viscosity_df
[
req_cols
]
final_viscosity_df
=
round
(
final_viscosity_df
,
6
)
return
final_viscosity_df
,
raw_viscosity_df
def
mixer_section_start_end_time
(
raw_df
,
index_no
):
mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'idle time between batches'
,
]
mixer_df
=
raw_df
[
mixer_cols
]
mixer_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
mixer_df
[
'Time Stamp'
])
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_no
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'time_min'
]
=
mixer_df
[
'Time Stamp'
]
mixer_df
[
'time_max'
]
=
mixer_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
mixer_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'time_max'
]
-
df_mixer_grouped
[
'time_min'
]
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
date_dict
=
{}
batch_lis
=
list
(
df_mixer_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_mixer_grouped
[
df_mixer_grouped
[
'batch-date'
]
==
each_bt
]
date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
date_dict
def
return_batch_no_df_1
(
raw_df
,
viscosity_df
,
date_dict
,
bof_cols
,
additional_cols
,
index_no
):
raw_df
=
raw_df
.
sort_values
(
by
=
'Time Stamp'
)
raw_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
raw_df
[
'Time Stamp'
])
raw_df
[
"day"
]
=
raw_df
[
"Time Stamp"
]
.
dt
.
date
raw_df
[
"day"
]
=
raw_df
[
"day"
]
.
astype
(
"str"
)
raw_df
[
"Mixing batch number"
]
=
raw_df
[
"Mixing batch number"
]
.
astype
(
"float"
)
raw_df
[
"batch-date"
]
=
(
"Batch_"
+
raw_df
[
"Mixing batch number"
]
.
astype
(
"str"
)
+
"_"
+
raw_df
[
"day"
]
.
astype
(
"str"
)
)
bof_add_cols
=
bof_cols
+
additional_cols
bof_df
=
raw_df
[
bof_add_cols
]
sorted_bof_df
=
bof_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_bof_df
=
sorted_bof_df
[
sorted_bof_df
[
"Size No (INDEX No).4"
]
==
index_no
]
dt_list
=
list
(
sorted_bof_df
[
"day"
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_bof_df
[
sorted_bof_df
[
"day"
]
==
each_day
]
if
day_df
[
"discharge length"
]
.
max
()
-
day_df
[
"discharge length"
]
.
min
()
<=
0
:
value
=
0
else
:
value
=
day_df
[
"discharge length"
]
.
max
()
-
day_df
[
"discharge length"
]
.
min
()
day_length_dic
[
each_day
]
=
value
# print(day_length_dic)
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
"day"
]
=
sorted_viscosity_df
[
"Mixing date"
]
.
dt
.
date
sorted_viscosity_df
[
"day"
]
=
sorted_viscosity_df
[
"day"
]
.
astype
(
"str"
)
extrud_visc_df
=
sorted_viscosity_df
[
[
"Batch No."
,
"Input rubber weight(0.1kg)"
,
"day"
,
"Mixing date"
]
]
extrud_visc_df
[
"length_from_extruder"
]
=
extrud_visc_df
[
"day"
]
.
map
(
day_length_dic
)
extrud_visc_df
[
"length_from_extruder"
]
=
extrud_visc_df
[
"length_from_extruder"
]
.
fillna
(
0
)
daily_sum_weight
=
(
extrud_visc_df
.
groupby
(
"day"
)[
"Input rubber weight(0.1kg)"
]
.
sum
()
/
10
)
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
"m/kg"
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
"length_from_extruder"
]
/
daily_sum_weight
[
row
[
"day"
]],
axis
=
1
)
extrud_visc_df
[
"batch_length"
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
"m/kg"
]
*
row
[
"Input rubber weight(0.1kg)"
]
/
10
,
axis
=
1
)
.
astype
(
"float64"
)
extrud_visc_df
[
"batch_length"
]
=
extrud_visc_df
[
"batch_length"
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
"cumulative_length"
]
=
extrud_visc_df
.
groupby
(
"day"
)[
"batch_length"
]
.
cumsum
()
discharge_dict
=
(
extrud_visc_df
.
groupby
(
"day"
)
.
apply
(
lambda
group
:
group
.
set_index
(
"Batch No."
)
.
to_dict
()[
"cumulative_length"
]
)
.
to_dict
()
)
test_sorted_extr_df
=
sorted_bof_df
test_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_df
.
iterrows
():
day
=
row
[
"day"
]
discharge_length
=
row
[
"discharge length"
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_df
[
"batch_no"
]
=
batch_numbers
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
current_day
=
None
for
value
,
day
in
zip
(
list
(
test_df
[
"lower door open"
]),
list
(
test_df
[
"day"
])):
if
current_day
!=
day
:
current_day
=
day
batch_number
=
0
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
test_df
[
"batch_no"
]
=
test_df
[
"batch_no"
]
.
astype
(
"float"
)
test_df
[
"extruder_batch_date"
]
=
(
"Batch_"
+
test_df
[
"batch_no"
]
.
astype
(
"str"
)
+
"_"
+
test_df
[
"day"
]
.
astype
(
"str"
)
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_df
.
iterrows
():
if
value
[
"batch_no"
]
==
0.0
:
extruder_flag_list
.
append
(
"false"
)
extrud_flg_vms
.
append
(
0
)
else
:
start_time
=
date_dict
.
get
(
value
[
"extruder_batch_date"
])
.
get
(
"start_time"
)
end_time
=
date_dict
.
get
(
value
[
"extruder_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
"true"
)
extrud_flg_vms
.
append
(
1
)
else
:
extruder_flag_list
.
append
(
"false"
)
extrud_flg_vms
.
append
(
0
)
test_df
[
"extruder_flag"
]
=
extruder_flag_list
test_df
[
"extruder_batch_diff"
]
=
extrud_flg_vms
test_df
[
"updtaed_bt_list"
]
=
batch_list
test_df
[
"extruder_batch_number"
]
=
test_df
[
"batch_no"
]
-
test_df
[
"extruder_batch_diff"
]
.
astype
(
"float"
)
test_df
[
"batch-date"
]
=
(
"Batch_"
+
test_df
[
"extruder_batch_number"
]
.
astype
(
"str"
)
+
"_"
+
test_df
[
"day"
]
.
astype
(
"str"
)
)
return
test_df
def
preprocess_extruder_section
(
df
,
index_number
,
vis_df
):
extruder_cols
=
ExtruderConstants
.
extruder_cols
additional_columns
=
[
'Time Stamp'
]
df_extruder
=
df
[
extruder_cols
+
additional_columns
]
df_extruder
[
'Time Stamp'
]
=
pd
.
to_datetime
(
df_extruder
[
'Time Stamp'
])
df_extruder
=
df_extruder
.
sort_values
(
by
=
'Time Stamp'
)
df_extruder
[
'day'
]
=
df_extruder
[
'Time Stamp'
]
.
dt
.
date
df_extruder
[
'day'
]
=
df_extruder
[
'day'
]
.
astype
(
'str'
)
sorted_extrud_df
=
df_extruder
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_extrud_df
=
sorted_extrud_df
[
sorted_extrud_df
[
'Size No (INDEX No).4'
]
==
index_number
]
drop_col
=
[
'spare.19'
,
'spare.20'
,
'spare.21'
,
'spare.22'
,
'spare.23'
,
'spare.24'
,
'spare.25'
,
'Hopper bank upper limit'
,
'middle of hopper bank'
,
'Hopper bank lower limit'
,
'Hopper bank below lower limit'
]
sorted_extrud_df
.
drop
(
columns
=
drop_col
,
inplace
=
True
)
date_dict
=
mixer_section_start_end_time
(
df
,
index_number
)
additional_cols
=
[
'day'
,
'Time Stamp'
,
'lower door open'
]
# adding date col to the viscosity df
vis_df
=
vis_df
.
sort_values
(
by
=
'Mixing date'
)
vis_df
[
'date'
]
=
vis_df
[
'Mixing date'
]
.
dt
.
date
vis_df
[
'batch-date'
]
=
'Batch_'
+
vis_df
[
'Batch No.'
]
.
astype
(
'float'
)
.
astype
(
str
)
+
'_'
+
vis_df
[
'date'
]
.
astype
(
str
)
vis_df
=
vis_df
[
vis_df
[
'Index No'
]
==
index_number
]
extruder_merged_df_final
=
return_batch_no_df_1
(
df
,
vis_df
,
date_dict
,
extruder_cols
,
additional_cols
,
index_number
)
extruder_merged_df_final
=
extruder_merged_df_final
[
extruder_merged_df_final
[
'extruder_batch_number'
]
!=
0
]
grouped_cols
=
[
'batch-date'
]
aggregate_dict
=
ExtruderConstants
.
aggregate_dict
df_extruder_grouped
=
extruder_merged_df_final
.
groupby
(
grouped_cols
)
.
agg
(
aggregate_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregate_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_extruder_grouped
=
df_extruder_grouped
.
rename
(
columns
=
col_renamer
)
df_extruder_grouped
=
df_extruder_grouped
.
fillna
(
df_extruder_grouped
.
mean
())
df_extruder_grouped
=
round
(
df_extruder_grouped
,
6
)
return
df_extruder_grouped
import
math
import
warnings
import
traceback
from
datetime
import
datetime
import
numpy
as
np
import
pandas
as
pd
from
loguru
import
logger
from
scripts.constants.constants
import
BofConstants
warnings
.
filterwarnings
(
"ignore"
)
def
mixer_section_start_end_time
(
raw_df
,
index_no
):
try
:
mixer_cols
=
BofConstants
.
bof_mixer_cols
mixer_df
=
raw_df
[
mixer_cols
]
mixer_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
mixer_df
[
'Time Stamp'
])
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_no
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'time_min'
]
=
mixer_df
[
'Time Stamp'
]
mixer_df
[
'time_max'
]
=
mixer_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
mixer_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'time_max'
]
-
df_mixer_grouped
[
'time_min'
]
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
date_dict
=
{}
batch_lis
=
list
(
df_mixer_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_mixer_grouped
[
df_mixer_grouped
[
'batch-date'
]
==
each_bt
]
date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
date_dict
except
Exception
as
err
:
logger
.
error
(
f
'Error in fetching mixer batch date dictionary: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_batch_no_df
(
raw_df
,
viscosity_df
,
date_dict
,
index_number
):
try
:
logger
.
info
(
'Getting bof batch number'
)
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
bof_add_cols
=
BofConstants
.
bof_add_cols
bof_df
=
raw_df
[
BofConstants
.
bof_cols
+
bof_add_cols
]
sorted_bof_df
=
bof_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_bof_df
=
sorted_bof_df
[
sorted_bof_df
[
'Size No (INDEX No).5'
]
==
index_number
]
dt_list
=
list
(
sorted_bof_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_bof_df
[
sorted_bof_df
[
'day'
]
==
each_day
]
if
(
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
())
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
()
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'length_from_extruder'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_extruder'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_bof_df
test_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_df
[
'batch_no'
]
=
batch_numbers
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
current_day
=
None
for
value
,
day
in
zip
(
list
(
test_df
[
'lower door open'
]),
list
(
test_df
[
'day'
])):
if
current_day
!=
day
:
current_day
=
day
batch_number
=
0
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
for
value
in
test_df
[
'lower door open'
]:
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
test_df
[
'batch_no'
]
=
test_df
[
'batch_no'
]
.
astype
(
'float'
)
test_df
[
'bof_batch_date'
]
=
'Batch_'
+
test_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_df
[
'day'
]
.
astype
(
'str'
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
start_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"start_time"
)
end_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'true'
)
extrud_flg_vms
.
append
(
1
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_df
[
'bof_flag'
]
=
extruder_flag_list
test_df
[
'bof_batch_diff'
]
=
extrud_flg_vms
# test_df['updated_bt_list'] = batch_list
test_df
[
'bof_batch_number'
]
=
test_df
[
'batch_no'
]
-
test_df
[
'bof_batch_diff'
]
.
astype
(
'float'
)
test_df
[
'batch-date'
]
=
'Batch_'
+
test_df
[
'bof_batch_number'
]
.
astype
(
'str'
)
+
'_'
+
test_df
[
'day'
]
.
astype
(
'str'
)
return
test_df
except
Exception
as
er
:
logger
.
error
(
f
'Error in adding batch data to bof section: {str(er)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
er
))
def
preprocess_bof_section
(
df
,
index_number
,
vis_df
):
try
:
df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
df
[
'Time Stamp'
])
df
=
df
.
sort_values
(
by
=
'Time Stamp'
)
df
[
'day'
]
=
df
[
'Time Stamp'
]
.
dt
.
date
df
[
'day'
]
=
df
[
'day'
]
.
astype
(
'str'
)
date_dict
=
mixer_section_start_end_time
(
df
,
index_number
)
bof_merged_df_final
=
return_batch_no_df
(
df
,
vis_df
,
date_dict
,
index_number
)
bof_merged_df_final
=
bof_merged_df_final
[
bof_merged_df_final
[
'bof_batch_number'
]
!=
0
]
# print(bof_merged_df_final.columns)
grouped_cols
=
[
'batch-date'
]
aggregate_dict
=
BofConstants
.
bof_aggregate_dict
df_bof_grouped
=
bof_merged_df_final
.
groupby
(
grouped_cols
)
.
agg
(
aggregate_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregate_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_bof_grouped
=
df_bof_grouped
.
rename
(
columns
=
col_renamer
)
df_bof_grouped_rest
=
df_bof_grouped
.
drop
(
'batch-date'
,
axis
=
1
)
df_bof_grouped_rest
=
df_bof_grouped_rest
.
fillna
(
df_bof_grouped_rest
.
mean
())
df_bof_grouped_rest
=
round
(
df_bof_grouped_rest
,
6
)
df_bof_grouped_rest
[
'batch-date'
]
=
df_bof_grouped
[
'batch-date'
]
return
df_bof_grouped_rest
except
Exception
as
err
:
logger
.
error
(
f
'Error in fetching the bof preprocess data: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
class
ModelLoader
(
object
):
def
__init__
(
self
,
model_info
):
self
.
model_info
=
model_info
def
load_model
(
self
):
logger
.
info
(
"Loading the Model"
)
if
self
.
model_info
[
"type"
]
==
"mlflow.sklearn"
:
return
self
.
_load_mlflow_sklearn_model
()
else
:
logger
.
info
(
"Unsupported Model Type"
)
def
_load_mlflow_sklearn_model
(
self
):
try
:
_model
=
mlflow
.
sklearn
.
load_model
(
self
.
model_info
[
"path"
])
logger
.
debug
(
"Model loaded successfully!"
)
return
_model
except
Exception
as
e
:
logger
.
error
(
"Error while loading mlflow.sklearn model : {}"
.
format
(
str
(
e
)))
class
RawConstants
:
columns
=
[
'Time Stamp'
,
'Shipper size No.'
,
'Shipper No.1 DH'
,
'Shipper No.1 Pallet'
,
'Shipper No.3 DH'
,
'Shipper No.2 Pallet'
,
'Shipper No.3 DH.1'
,
'Shipper No.3 Pallet'
,
'Size No (INDEX No)'
,
'Weighing times'
,
'Process mass'
,
'Mass'
,
'Material detection'
,
'Surface temperature (mixer side)'
,
'Surface temperature (center)'
,
'Surface temperature (receiving side)'
,
'temperature'
,
'humidity'
,
'Weighing command No.'
,
'spare'
,
'spare.1'
,
'spare.2'
,
'spare.3'
,
'spare.4'
,
'Size No (INDEX No).1'
,
'Weighing times.1'
,
'Process mass.1'
,
'real mass'
,
'spare.5'
,
'spare.6'
,
'spare.7'
,
'Size No (INDEX No).2'
,
'Weighing times.2'
,
'Process mass.2'
,
'CB weighing machine measurement'
,
'Dust collection duct (Immediately after ****)'
,
'Dust collection duct (before dust collector)
\n
'
,
'CB slot open'
,
'CB slot closed'
,
'carbon cycle'
,
'carbon2 cycle'
,
'spare.8'
,
'spare.9'
,
'spare.10'
,
'spare.11'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'Mixing Weight (Integrated Value)'
,
'Rotor actual rpm'
,
'Mixing timer value'
,
'Temperature (DS side)'
,
'Temperature (WS side)'
,
'Electric power'
,
'Electric energy'
,
'Mixing electric power average'
,
'Ram pressure'
,
'Ram rising'
,
'Ram down'
,
'Ram position'
,
'front door open'
,
'Front door closed'
,
'lower door open'
,
'lower door closed'
,
'Before mixer rotation detection'
,
'After mixer rotation detection'
,
'Drilled side left Inlet side Cooling water temperature'
,
'Drilled side left Exit side Cooling water temperature'
,
'Drilled side right Inlet side Cooling water temperature'
,
'Drilled side right Exit side Cooling water temperature'
,
'Mixer rotor left inlet side Coolant temperature'
,
'Mixer rotor left output side Cooling water temperature'
,
'Mixer rotor right inlet side Coolant temperature'
,
'Mixer rotor right exit side Cooling water temperature'
,
'Mixer body temperature'
,
'Drilled side left Inlet side Cooling water flow rate'
,
'Drilled side left Exit side Cooling water flow rate'
,
'Drilled side right Inlet side Cooling water flow rate'
,
'Drilled side right Exit side Cooling water flow rate'
,
'Mixer rotor left inlet side Cooling water flow rate'
,
'Mixer rotor left outlet side Cooling water flow rate'
,
'Mixer rotor right inlet side Cooling water flow rate'
,
'Mixer rotor right outlet side Cooling water flow rate'
,
'temperature.1'
,
'humidity.1'
,
'idle time between batches'
,
'spare.12'
,
'spare.13'
,
'spare.14'
,
'spare.15'
,
'spare.16'
,
'spare.17'
,
'spare.18'
,
'Size No (INDEX No).4'
,
'discharge length'
,
'Hopper bank upper limit'
,
'middle of hopper bank'
,
'Hopper bank lower limit'
,
'Hopper bank below lower limit'
,
'Extruder rpm'
,
'Extruder current'
,
'Calendar rpm'
,
' Calendar current'
,
'Calendar bank load'
,
'Calendar GAP Operation side'
,
'Calendar GAP Opposite operation side'
,
'Residence time'
,
'Screw operation side Inlet side Cooling water temperature'
,
'Screw operation side Outlet side Cooling water temperature'
,
'Screw Opposite operation side Inlet side Cooling water temperature'
,
'Screw Opposite operation side Outlet side Cooling water temperature'
,
'Calender roll Lower side Inlet side Cooling water temperature'
,
'Calender roll Lower side Outlet side Cooling water temperature'
,
'Calender roll upper side Inlet side Cooling water temperature'
,
'Calender roll Upper side Outlet side Cooling water temperature'
,
'Screw operation side Inlet side Cooling water flow rate'
,
'Screw operation side Outlet side Cooling water flow rate'
,
'Screw Opposite operation side Inlet side Cooling water flow rate'
,
'Screw Opposite operation side Outlet side Cooling water flow rate'
,
'Calender roll Lower side Inlet side Cooling water flow rate'
,
'Calender roll Lower side Outlet side Cooling water flow rate'
,
'Calender roll upper side Inlet side Cooling water flow rate'
,
'Calender roll Upper side Outlet side Cooling water flow rate'
,
'Extruder body temperature'
,
'spare.19'
,
'spare.20'
,
'spare.21'
,
'spare.22'
,
'spare.23'
,
'spare.24'
,
'spare.25'
,
'Size No (INDEX No).5'
,
'length passed through'
,
'Material detection.1'
,
'Sheet temperature immediately after calendering'
,
'Withdrawal CV speed'
,
'DUST CV
\n
speed'
,
'spare.26'
,
'spare.27'
,
'spare.28'
,
'Size No (INDEX No).6'
,
'length passed through.1'
,
'Material detection.2'
,
'Seat temperature immediately after BOF'
,
'temperature.2'
,
'humidity.2'
,
'spare.29'
,
'spare.30'
,
'spare.31'
,
'spare.32'
,
'Size No (INDEX No).7'
,
'Setting length'
,
'length passed through(Integrated Value)'
,
'Mass
\n
(Integrated Value)'
,
'Pallet No.'
,
'Loading completion flag'
,
'spare.33'
,
'spare.34'
,
'spare.35'
,
'spare.36'
,
'mixer cooling water'
,
'Under cooling water'
]
class
ViscosityConstants
:
rubber_cols
=
[
'Quantity using type1 bale'
,
'PO_type1'
,
'DIRT_type1'
,
'ASH_type1'
,
'VM_type1'
,
'PRI_type1'
,
'NITROGEN_type1'
,
'Temperature during transportation_type1[℃]'
,
'Humidity during transportation_type1[
%
]'
,
'Quantity using type2 bale'
,
'PO_type2'
,
'DIRT_type1.1'
,
'ASH_type2'
,
'VM_type2'
,
'PRI_type2'
,
'NITROGEN_type2'
,
'Temperature during transportation_type2[℃]'
,
'Humidity during transportation__type2[
%
]'
]
req_cols
=
[
'Rubber No.'
,
'Batch No.'
,
'Index No'
,
'Chemical weight (g)'
,
'Input rubber weight(0.1kg)'
,
'date'
,
'batch-date'
,
'Weight_type1'
,
'Weight_type2'
,
'Weighted_PO_type'
,
'Weighted_DIRT_type'
,
'Weighted_ASH_type'
,
'Weighted_VM_type'
,
'Weighted_PRI_type'
,
'Weighted_NITROGEN_type'
,
'Weighted_Temperature during transportation_type[℃]'
,
'Weighted_Humidity during transportation__type[
%
]'
,
'Weighted Sum'
,
'viscosity'
,
'status'
]
class
SheetConstants
:
sheet_supply_column
=
[
'Time Stamp'
,
'Shipper size No.'
,
'Shipper No.1 DH'
,
'Shipper No.1 Pallet'
,
'Shipper No.3 DH'
,
'Shipper No.2 Pallet'
,
'Shipper No.3 DH.1'
,
'Shipper No.3 Pallet'
,
'Size No (INDEX No)'
,
'Weighing times'
,
'Process mass'
,
'Mass'
,
'Material detection'
,
'Surface temperature (mixer side)'
,
'Surface temperature (center)'
,
'Surface temperature (receiving side)'
,
'temperature'
,
'humidity'
,
'Weighing command No.'
,
'spare'
,
'spare.1'
,
'spare.2'
,
'spare.3'
,
'spare.4'
]
aggregation_dict
=
{
"Surface temperature (mixer side)"
:
"mean"
,
"Surface temperature (center)"
:
"std"
,
"Surface temperature (receiving side)"
:
"mean"
,
"temperature"
:
"mean"
,
"humidity"
:
"mean"
,
'Process mass'
:
'mean'
,
}
class
MixerConstants
:
mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'Mixing Weight (Integrated Value)'
,
'Rotor actual rpm'
,
'Mixing timer value'
,
'Temperature (DS side)'
,
'Temperature (WS side)'
,
'Electric power'
,
'Electric energy'
,
'Mixing electric power average'
,
'Ram pressure'
,
'Ram rising'
,
'Ram down'
,
'Ram position'
,
'front door open'
,
'Front door closed'
,
'lower door open'
,
'lower door closed'
,
'Before mixer rotation detection'
,
'After mixer rotation detection'
,
'Drilled side left Inlet side Cooling water temperature'
,
'Drilled side left Exit side Cooling water temperature'
,
'Drilled side right Inlet side Cooling water temperature'
,
'Drilled side right Exit side Cooling water temperature'
,
'Mixer rotor left inlet side Coolant temperature'
,
'Mixer rotor left output side Cooling water temperature'
,
'Mixer rotor right inlet side Coolant temperature'
,
'Mixer rotor right exit side Cooling water temperature'
,
'Mixer body temperature'
,
'Drilled side left Inlet side Cooling water flow rate'
,
'Drilled side left Exit side Cooling water flow rate'
,
'Drilled side right Inlet side Cooling water flow rate'
,
'Drilled side right Exit side Cooling water flow rate'
,
'Mixer rotor left inlet side Cooling water flow rate'
,
'Mixer rotor left outlet side Cooling water flow rate'
,
'Mixer rotor right inlet side Cooling water flow rate'
,
'Mixer rotor right outlet side Cooling water flow rate'
,
'temperature.1'
,
'humidity.1'
,
'idle time between batches'
,
]
aggregation_dict
=
{
'Mixing timer value'
:
'max'
,
'Temperature (DS side)'
:
'mean'
,
'Temperature (WS side)'
:
'std'
,
'Electric power'
:
'mean'
,
'Electric energy'
:
'mean'
,
'Mixing electric power average'
:
'mean'
,
'Ram pressure'
:
'mean'
,
# 'Ram rising': '',
# 'Ram down': '',
'Ram position'
:
'std'
,
# 'front door open': '',
# 'Front door closed': '',
# 'lower door open': '',
# 'lower door closed': '',
# 'Before mixer rotation detection': '',
# 'After mixer rotation detection': '',
'Drilled side left Inlet side Cooling water temperature'
:
'std'
,
'Drilled side left Exit side Cooling water temperature'
:
'mean'
,
#
'Drilled side right Inlet side Cooling water temperature'
:
'mean'
,
'Drilled side right Exit side Cooling water temperature'
:
'std'
,
'Mixer rotor left inlet side Coolant temperature'
:
'std'
,
'Mixer rotor left output side Cooling water temperature'
:
'mean'
,
'Mixer rotor right inlet side Coolant temperature'
:
'mean'
,
'Mixer rotor right exit side Cooling water temperature'
:
'std'
,
'Mixer body temperature'
:
'mean'
,
'Drilled side left Inlet side Cooling water flow rate'
:
'std'
,
'Drilled side left Exit side Cooling water flow rate'
:
'mean'
,
#
'Drilled side right Inlet side Cooling water flow rate'
:
'mean'
,
'Drilled side right Exit side Cooling water flow rate'
:
'std'
,
#
'Mixer rotor left inlet side Cooling water flow rate'
:
'std'
,
'Mixer rotor left outlet side Cooling water flow rate'
:
'mean'
,
'Mixer rotor right inlet side Cooling water flow rate'
:
'mean'
,
'Mixer rotor right outlet side Cooling water flow rate'
:
'std'
,
'temperature.1'
:
'mean'
,
'humidity.1'
:
'mean'
,
'idle time between batches'
:
'mean'
,
'Mixing Weight (Integrated Value)_diff'
:
'max'
,
# any agg will work
'max_rpm_count'
:
'max'
# any agg will work
}
class
ExtruderConstants
:
extruder_cols
=
[
'Size No (INDEX No).4'
,
'discharge length'
,
'Hopper bank upper limit'
,
'middle of hopper bank'
,
'Hopper bank lower limit'
,
'Hopper bank below lower limit'
,
'Extruder rpm'
,
'Extruder current'
,
'Calendar rpm'
,
' Calendar current'
,
'Calendar bank load'
,
'Calendar GAP Operation side'
,
'Calendar GAP Opposite operation side'
,
'Residence time'
,
'Screw operation side Inlet side Cooling water temperature'
,
'Screw operation side Outlet side Cooling water temperature'
,
'Screw Opposite operation side Inlet side Cooling water temperature'
,
'Screw Opposite operation side Outlet side Cooling water temperature'
,
'Calender roll Lower side Inlet side Cooling water temperature'
,
'Calender roll Lower side Outlet side Cooling water temperature'
,
'Calender roll upper side Inlet side Cooling water temperature'
,
'Calender roll Upper side Outlet side Cooling water temperature'
,
'Screw operation side Inlet side Cooling water flow rate'
,
'Screw operation side Outlet side Cooling water flow rate'
,
'Screw Opposite operation side Inlet side Cooling water flow rate'
,
'Screw Opposite operation side Outlet side Cooling water flow rate'
,
'Calender roll Lower side Inlet side Cooling water flow rate'
,
'Calender roll Lower side Outlet side Cooling water flow rate'
,
'Calender roll upper side Inlet side Cooling water flow rate'
,
'Calender roll Upper side Outlet side Cooling water flow rate'
,
'Extruder body temperature'
,
'spare.19'
,
'spare.20'
,
'spare.21'
,
'spare.22'
,
'spare.23'
,
'spare.24'
,
'spare.25'
]
aggregate_dict
=
{
'discharge length'
:
"max"
,
'Extruder rpm'
:
"mean"
,
'Extruder current'
:
"std"
,
'Calendar rpm'
:
"std"
,
' Calendar current'
:
"mean"
,
'Calendar bank load'
:
"max"
,
'Calendar GAP Operation side'
:
"median"
,
'Calendar GAP Opposite operation side'
:
"std"
,
'Residence time'
:
"max"
,
'Screw operation side Inlet side Cooling water temperature'
:
"mean"
,
'Screw operation side Outlet side Cooling water temperature'
:
"std"
,
'Screw Opposite operation side Inlet side Cooling water temperature'
:
"mean"
,
'Screw Opposite operation side Outlet side Cooling water temperature'
:
"std"
,
'Calender roll Lower side Inlet side Cooling water temperature'
:
"mean"
,
'Calender roll Lower side Outlet side Cooling water temperature'
:
"std"
,
'Calender roll upper side Inlet side Cooling water temperature'
:
"mean"
,
'Calender roll Upper side Outlet side Cooling water temperature'
:
"std"
,
'Screw operation side Inlet side Cooling water flow rate'
:
"mean"
,
'Screw operation side Outlet side Cooling water flow rate'
:
"std"
,
'Screw Opposite operation side Inlet side Cooling water flow rate'
:
"mean"
,
'Screw Opposite operation side Outlet side Cooling water flow rate'
:
"std"
,
'Calender roll Lower side Inlet side Cooling water flow rate'
:
"mean"
,
'Calender roll Lower side Outlet side Cooling water flow rate'
:
"std"
,
'Calender roll upper side Inlet side Cooling water flow rate'
:
"mean"
,
'Calender roll Upper side Outlet side Cooling water flow rate'
:
"std"
,
'Extruder body temperature'
:
"mean"
}
class
PickupConstants
:
pick_cols
=
[
'Size No (INDEX No).6'
,
'length passed through.1'
,
'Material detection.2'
,
'Seat temperature immediately after BOF'
,
'temperature.2'
,
'humidity.2'
,
'spare.29'
,
'spare.30'
,
'spare.31'
,
'spare.32'
]
pick_imp_mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'idle time between batches'
,
]
pick_imp_bof_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).5'
,
'bof_batch_number'
]
pick_additional_cols
=
[
'day'
,
'Time Stamp'
,
'length passed through'
,
'discharge length'
]
pick_aggregate_dict
=
{
'Seat temperature immediately after BOF'
:
'mean'
,
'viscosity'
:
'mean'
}
pick_grouped_cols
=
[
'batch-date'
]
class
BofConstants
:
bof_cols
=
[
'Size No (INDEX No).5'
,
'length passed through'
,
'Material detection.1'
,
'Sheet temperature immediately after calendering'
,
'Withdrawal CV speed'
,
'DUST CV
\n
speed'
,
'spare.26'
,
'spare.27'
,
'spare.28'
,
'lower door open'
]
bof_add_cols
=
[
'Time Stamp'
,
'day'
,
'lower door open'
]
bof_mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'idle time between batches'
]
bof_aggregate_dict
=
aggregate_dict
=
{
'Sheet temperature immediately after calendering'
:
'mean'
,
'Withdrawal CV speed'
:
'mean'
,
'DUST CV
\n
speed'
:
'std'
}
def
model_trainer
(
df_grouped
,
index_no
,
model_path
):
cols_x
,
cols_y
,
saved_model
=
None
,
None
,
None
if
index_no
==
1250
:
cols_x
=
[
'temperature_ws_side_std'
,
'_calendar_current_mean'
,
'Weighted_NITROGEN_type'
,
'ram_pressure_mean'
,
'electric_energy_mean'
,
'screw_operation_side_outlet_side_cooling_water_flow_rate_std'
,
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean'
,
'Weighted_VM_type'
,
'seat_temperature_immediately_after_bof_mean'
,
'Weighted_DIRT_type'
,
'surface_temperature_center_std'
,
'residence_time_max'
,
'drilled_side_left_exit_side_cooling_water_temperature_mean'
,
'Weighted_PRI_type'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std'
,
'Weighted_ASH_type'
,
'Weighted_PO_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'drilled_side_right_exit_side_cooling_water_flow_rate_std'
,
'Weighted_Humidity during transportation__type[
%
]'
]
cols_y
=
"viscosity"
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
elif
index_no
==
3294
:
cols_x
=
[
'Weighted_ASH_type'
,
'Weighted_NITROGEN_type'
,
'electric_energy_mean'
,
'drilled_side_left_inlet_side_cooling_water_temperature_std'
,
'seat_temperature_immediately_after_bof_mean'
,
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean'
,
'humidity_mean'
,
'drilled_side_left_exit_side_cooling_water_flow_rate_mean'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'calendar_bank_load_max'
,
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean'
,
'Weighted_PRI_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'temperature_ws_side_std'
,
'dust_cv
\n
speed_std'
,
'mixer_rotor_right_inlet_side_coolant_temperature_mean'
,
'ram_position_std'
,
'drilled_side_right_exit_side_cooling_water_temperature_std'
,
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std'
,
'Weighted_Temperature during transportation_type[℃]'
]
cols_y
=
"viscosity"
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
req_cols
=
cols_x
+
[
'viscosity'
]
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
# df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets
x_train
,
x_test
,
y_train
,
y_test
=
train_test_split
(
features
,
labels
,
random_state
=
42
,
test_size
=
0.25
)
print
(
f
'x_train shape - {x_train.shape}'
)
print
(
f
'x_test shape - {x_test.shape}'
)
print
(
f
'y_train shape - {y_train.shape}'
)
print
(
f
'y_test shape - {y_test.shape}'
)
y_pred
=
saved_model
.
predict
(
x_test
)
predictions
=
[
round
(
value
,
2
)
for
value
in
y_pred
]
metric_dictionary
=
dict
()
mae
=
metrics
.
mean_absolute_error
(
y_test
,
predictions
)
mse
=
metrics
.
mean_squared_error
(
y_test
,
predictions
)
mape
=
metrics
.
mean_absolute_percentage_error
(
y_test
,
predictions
)
explained_variance_score
=
metrics
.
explained_variance_score
(
y_test
,
predictions
)
max_error
=
metrics
.
max_error
(
y_test
,
predictions
)
r2_score
=
metrics
.
r2_score
(
y_test
,
predictions
)
median_absolute_error
=
metrics
.
median_absolute_error
(
y_test
,
predictions
)
mean_poisson_deviance
=
metrics
.
mean_poisson_deviance
(
y_test
,
predictions
)
mean_gamma_deviance
=
metrics
.
mean_gamma_deviance
(
y_test
,
predictions
)
metric_dictionary
[
"Mean Absolute Error (MAE)"
]
=
mae
metric_dictionary
[
"Mean Squared Error (MSE)"
]
=
mse
metric_dictionary
[
"Root Mean Squared Error (RMSE)"
]
=
np
.
sqrt
(
mse
)
metric_dictionary
[
"Mean Absolute Percentage Error (MAPE)"
]
=
mape
metric_dictionary
[
"Explained Variance Score"
]
=
explained_variance_score
metric_dictionary
[
"Max Error"
]
=
max_error
metric_dictionary
[
"Median Absolute Error"
]
=
median_absolute_error
metric_dictionary
[
"R2 Score"
]
=
r2_score
metric_dictionary
[
"Mean Gamma Deviance"
]
=
mean_gamma_deviance
metric_dictionary
[
"Mean Poisson Deviance"
]
=
mean_poisson_deviance
print
(
metric_dictionary
)
def
read_raw_data
(
raw_path
,
raw_skip_rows
):
try
:
df
=
pd
.
read_excel
(
raw_path
,
skiprows
=
raw_skip_rows
)
except
Exception
as
e
:
df
=
pd
.
read_csv
(
raw_path
)
if
len
(
df
.
columns
)
==
len
(
RawConstants
.
columns
):
logger
.
info
(
f
"Total cols are {len(RawConstants.columns)} and are same as the df cols length"
)
df
.
columns
=
RawConstants
.
columns
else
:
missed_cols
=
RawConstants
.
columns
[
len
(
df
.
columns
):]
logger
.
info
(
f
"missed cols are {missed_cols}"
)
for
col
in
missed_cols
:
df
[
col
]
=
float
(
'nan'
)
df
.
columns
=
RawConstants
.
columns
logger
.
info
(
f
"Shape of df is {df.shape}"
)
return
df
def
merged_all_sections
(
sheet_df
,
mixer_df
,
extruder_df
,
bof_df
,
pickup_df
,
viscosity_df
):
merged_df
=
pd
.
merge
(
sheet_df
,
mixer_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
extruder_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
bof_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
pickup_df
,
on
=
'batch-date'
,
how
=
'left'
)
df_grouped
=
pd
.
merge
(
merged_df
,
viscosity_df
,
on
=
'batch-date'
,
how
=
'left'
)
selected_cols
=
df_grouped
.
columns
df_grouped
=
df_grouped
[
df_grouped
[
'status'
]
==
True
]
df_grouped
=
df_grouped
[
selected_cols
]
viscosity_rubber_cols
=
[
'Weight_type1'
,
'Weight_type2'
,
'Weighted_PO_type'
,
'Weighted_DIRT_type'
,
'Weighted_ASH_type'
,
'Weighted_VM_type'
,
'Weighted_PRI_type'
,
'Weighted_NITROGEN_type'
,
'Weighted_Temperature during transportation_type[℃]'
,
'Weighted_Humidity during transportation__type[
%
]'
,
'Weighted Sum'
,
'viscosity'
]
# Replace 0 values with NaN
for
col
in
viscosity_rubber_cols
:
df_grouped
[
col
]
=
df_grouped
[
col
]
.
replace
(
0
,
np
.
nan
)
df_grouped
[
col
]
=
df_grouped
[
col
]
.
fillna
(
df_grouped
[
col
]
.
mean
())
# Extract batch number and date
batch_number
=
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'Batch_(\d+\.\d+)_'
)[
0
]
.
astype
(
float
)
date
=
pd
.
to_datetime
(
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'_(\d{4}-\d{2}-\d{2})$'
)[
0
])
# Add extracted data as separate columns
df_grouped
[
'Batch Number'
]
=
batch_number
df_grouped
[
'Date'
]
=
date
# Sort by 'Batch Number' and 'Date'
df_grouped
=
df_grouped
.
sort_values
(
by
=
[
'Date'
,
'Batch Number'
])
df_grouped
=
round
(
df_grouped
,
6
)
return
df_grouped
def
load_and_predict
(
df_grouped
,
index_no
,
model_path
):
if
index_no
==
1250
:
logger
.
info
(
f
"Loading model for {index_no}"
)
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
cols_x
=
[
'temperature_ws_side_std'
,
'_calendar_current_mean'
,
'Weighted_NITROGEN_type'
,
'ram_pressure_mean'
,
'electric_energy_mean'
,
'screw_operation_side_outlet_side_cooling_water_flow_rate_std'
,
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean'
,
'Weighted_VM_type'
,
'seat_temperature_immediately_after_bof_mean'
,
'Weighted_DIRT_type'
,
'surface_temperature_center_std'
,
'residence_time_max'
,
'drilled_side_left_exit_side_cooling_water_temperature_mean'
,
'Weighted_PRI_type'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std'
,
'Weighted_ASH_type'
,
'Weighted_PO_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'drilled_side_right_exit_side_cooling_water_flow_rate_std'
,
'Weighted_Humidity during transportation__type[
%
]'
]
cols_y
=
"viscosity"
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
y_pred_full
=
saved_model
.
predict
(
features
)
df_grouped
[
'predicted_viscosity'
]
=
y_pred_full
final_df
=
df_grouped
[[
'Date'
,
'Batch Number'
,
'predicted_viscosity'
]]
final_df
.
to_csv
(
f
'{index_no}_final_predicted_viscosity.csv'
)
elif
index_no
==
3294
:
logger
.
info
(
f
"Loading model for {index_no}"
)
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
cols_x
=
[
'Weighted_ASH_type'
,
'Weighted_NITROGEN_type'
,
'electric_energy_mean'
,
'drilled_side_left_inlet_side_cooling_water_temperature_std'
,
'seat_temperature_immediately_after_bof_mean'
,
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean'
,
'humidity_mean'
,
'drilled_side_left_exit_side_cooling_water_flow_rate_mean'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'calendar_bank_load_max'
,
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean'
,
'Weighted_PRI_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'temperature_ws_side_std'
,
'dust_cv
\n
speed_std'
,
'mixer_rotor_right_inlet_side_coolant_temperature_mean'
,
'ram_position_std'
,
'drilled_side_right_exit_side_cooling_water_temperature_std'
,
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std'
,
'Weighted_Temperature during transportation_type[℃]'
]
cols_y
=
"viscosity"
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
y_pred_full
=
saved_model
.
predict
(
features
)
df_grouped
[
'predicted_viscosity'
]
=
y_pred_full
final_df
=
df_grouped
[[
'Date'
,
'Batch Number'
,
'predicted_viscosity'
]]
final_df
.
to_csv
(
f
'{index_no}_final_predicted_viscosity.csv'
)
def
start_prediction
(
raw_path
,
viscosity_path
,
index_no
,
raw_skip_rows
,
viscosity_skip_rows
):
logger
.
info
(
f
"Starting prediction for {index_no}"
)
logger
.
info
(
"Reading raw file data"
)
df
=
read_raw_data
(
raw_path
,
raw_skip_rows
)
logger
.
info
(
f
"Shape of raw df is {df.shape}"
)
logger
.
info
(
"Starting preprocessing material section"
)
visc_df
=
pd
.
read_excel
(
viscosity_path
,
skiprows
=
viscosity_skip_rows
)
viscosity_df
,
raw_viscosity_df
=
preprocess_viscosity_section
(
visc_df
,
index_no
)
# viscosity_df.to_csv('viscosity-agg.csv')
logger
.
info
(
f
"The shape of the viscosity df is {viscosity_df.shape}"
)
logger
.
info
(
"Completed material section preprocessing"
)
logger
.
info
(
"Starting preprocessing sheet section"
)
df_sheet_grouped
=
preprocess_sheet_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Sheet df is {df_sheet_grouped.shape}"
)
logger
.
info
(
"Completed sheet section preprocessing"
)
# df_sheet_grouped.to_csv('sheet-agg.csv')
logger
.
info
(
"Starting preprocessing mixer section"
)
df_mixer_grouped
=
preprocess_mixer_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Mixer df is {df_mixer_grouped.shape}"
)
logger
.
info
(
"Completed mixer section preprocessing"
)
# df_mixer_grouped.to_csv('mixer-agg.csv')
logger
.
info
(
"Starting preprocessing extruder section"
)
df_extruder_grouped
=
preprocess_extruder_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_extruder_grouped.shape}"
)
logger
.
info
(
"Completed extruder section preprocessing"
)
# df_extruder_grouped.to_csv('extruder-agg.csv')
logger
.
info
(
"Starting preprocessing bof section"
)
df_bof_grouped
=
preprocess_bof_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the BOF df is {df_bof_grouped.shape}"
)
logger
.
info
(
"Completed bof section preprocessing"
)
# df_bof_grouped.to_csv('bof-agg.csv')
# bof_desc = df_bof_grouped.describe()
# bof_desc.to_csv('bof-describe.csv')
logger
.
info
(
"Starting preprocessing pickup section"
)
df_pickup_grouped
=
preprocess_pickup_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_pickup_grouped.shape}"
)
logger
.
info
(
"Completed pickup section preprocessing"
)
# df_pickup_grouped.to_csv('pickup-agg.csv')
# df = pd.read_csv('pickup-agg.csv')
# print(df.describe())
df_grouped
=
merged_all_sections
(
df_sheet_grouped
,
df_mixer_grouped
,
df_extruder_grouped
,
df_bof_grouped
,
df_pickup_grouped
,
viscosity_df
)
load_and_predict
(
df_grouped
,
index_no
,
model_path
)
# model_trainer(df_grouped, index_no, model_path)
if
__name__
==
"__main__"
:
try
:
raw_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-r'
)
+
1
]
viscosity_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-v'
)
+
1
]
raw_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sr'
)
+
1
])
viscosity_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sv'
)
+
1
])
index_number
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-index'
)
+
1
])
model_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-m'
)
+
1
]
start_prediction
(
raw_file_path
,
viscosity_file_path
,
index_number
,
raw_file_skip_rows
,
viscosity_file_skip_rows
)
except
Exception
as
e
:
logger
.
exception
(
f
"Module failed because of error {e}"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment