Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bsj-models-for-rubbers
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
CI / CD Analytics
Repository Analytics
Value Stream Analytics
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dasharatha.vamshi
bsj-models-for-rubbers
Commits
5d708a91
Commit
5d708a91
authored
Dec 21, 2023
by
dasharatha.vamshi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added main
parent
1057b00c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2024 additions
and
13 deletions
+2024
-13
app.py
app.py
+19
-13
main.py
main.py
+2005
-0
No files found.
app.py
View file @
5d708a91
import
warnings
import
warnings
import
sys
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
loguru
import
logger
from
loguru
import
logger
...
@@ -253,18 +253,24 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
...
@@ -253,18 +253,24 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
try
:
try
:
logger
.
info
(
"Starting the model"
)
# logger.info("Starting the model")
index_number
=
1250
# index_number = 1250
raw_file_path
=
'FY676-A-WO_Visc.xlsx'
# raw_file_path = 'FY676-A-WO_Visc.xlsx'
raw_file_skip_rows
=
0
# raw_file_skip_rows = 0
viscosity_file_path
=
'viscosity_natural_rubber_data.xlsx'
# viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
viscosity_file_skip_rows
=
3
# viscosity_file_skip_rows = 3
start_prediction
(
raw_file_path
,
viscosity_file_path
,
index_number
,
raw_file_skip_rows
,
viscosity_file_skip_rows
)
# start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
index_number
=
3294
# index_number = 3294
raw_file_path
=
'fy664g_raw.csv'
# raw_file_path = 'fy664g_raw.csv'
raw_file_skip_rows
=
0
# raw_file_skip_rows = 0
viscosity_file_path
=
'fy664g-viscosity.xlsx'
# viscosity_file_path = 'fy664g-viscosity.xlsx'
viscosity_file_skip_rows
=
2
# viscosity_file_skip_rows = 2
# start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
raw_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-r'
)
+
1
]
viscosity_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-v'
)
+
1
]
raw_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sr'
)
+
1
])
viscosity_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sv'
)
+
1
])
index_number
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-index'
)
+
1
])
start_prediction
(
raw_file_path
,
viscosity_file_path
,
index_number
,
raw_file_skip_rows
,
viscosity_file_skip_rows
)
start_prediction
(
raw_file_path
,
viscosity_file_path
,
index_number
,
raw_file_skip_rows
,
viscosity_file_skip_rows
)
except
Exception
as
e
:
except
Exception
as
e
:
logger
.
exception
(
f
"Module failed because of error {e}"
)
logger
.
exception
(
f
"Module failed because of error {e}"
)
main.py
0 → 100644
View file @
5d708a91
import
sys
import
warnings
import
mlflow
from
sklearn
import
metrics
from
sklearn.model_selection
import
train_test_split
# from scripts.constants.constants import RawConstants
# from scripts.core.model_loader import ModelLoader
# from scripts.section_utils.bof_section import preprocess_bof_section
# from scripts.section_utils.extruder_section import preprocess_extruder_section
# from scripts.section_utils.material_section import preprocess_viscosity_section
# from scripts.section_utils.mixer_section import preprocess_mixer_section
# from scripts.section_utils.pickup_section import preprocess_pickup_section
# from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import ExtruderConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import ViscosityConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import MixerConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import PickupConstants
warnings
.
filterwarnings
(
"ignore"
)
import
warnings
# from scripts.constants.constants import SheetConstants
warnings
.
filterwarnings
(
"ignore"
)
def
preprocess_sheet_section
(
df
,
index_number
):
sheet_supply_column
=
SheetConstants
.
sheet_supply_column
sheet_supply_df
=
df
[
sheet_supply_column
]
sheet_supply_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
sheet_supply_df
[
'Time Stamp'
])
sheet_supply_df
=
sheet_supply_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
sheet_supply_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
sheet_supply_df
[
numeric_cols
]
=
sheet_supply_df
[
numeric_cols
]
.
astype
(
float
)
sheet_supply_df
[
'day'
]
=
sheet_supply_df
[
'Time Stamp'
]
.
dt
.
date
sheet_supply_df
[
'batch-date'
]
=
'Batch_'
+
sheet_supply_df
[
'Weighing times'
]
.
astype
(
str
)
+
'_'
+
sheet_supply_df
[
'day'
]
.
astype
(
str
)
sheet_supply_df
=
sheet_supply_df
[
sheet_supply_df
[
"Size No (INDEX No)"
]
==
index_number
]
sheet_supply_df
=
sheet_supply_df
[
sheet_supply_df
[
"Weighing times"
]
!=
0
]
aggregation_dict
=
SheetConstants
.
aggregation_dict
group_by
=
[
'day'
,
'Weighing times'
]
df_sheet_grouped
=
sheet_supply_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregation_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_sheet_grouped
=
df_sheet_grouped
.
rename
(
columns
=
col_renamer
)
df_sheet_grouped
[
'batch-date'
]
=
'Batch_'
+
df_sheet_grouped
[
'Weighing times'
]
.
astype
(
str
)
+
'_'
+
df_sheet_grouped
[
'day'
]
.
astype
(
str
)
df_sheet_grouped
=
round
(
df_sheet_grouped
,
6
)
return
df_sheet_grouped
def
get_mixer_batch_date
(
raw_df
,
index_number
):
try
:
logger
.
info
(
'Getting mixer batch date dictionary'
)
mixer_df
=
raw_df
[
PickupConstants
.
pick_imp_mixer_cols
]
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_number
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'time_min'
]
=
mixer_df
[
'Time Stamp'
]
mixer_df
[
'time_max'
]
=
mixer_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
mixer_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'time_max'
]
-
df_mixer_grouped
[
'time_min'
]
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
date_dict
=
{}
batch_lis
=
list
(
df_mixer_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_mixer_grouped
[
df_mixer_grouped
[
'batch-date'
]
==
each_bt
]
date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
date_dict
except
Exception
as
err
:
logger
.
error
(
f
'Error while getting mixer time and forming date dict: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_batch_no_bof_df
(
raw_df
,
viscosity_df
,
date_dict
,
index_number
):
try
:
logger
.
info
(
'Getting bof batch number'
)
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
bof_add_cols
=
[
'Size No (INDEX No).5'
,
'length passed through'
,
'Time Stamp'
,
'day'
,
'lower door open'
]
bof_df
=
raw_df
[
bof_add_cols
]
sorted_bof_df
=
bof_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_bof_df
=
sorted_bof_df
[
sorted_bof_df
[
'Size No (INDEX No).5'
]
==
index_number
]
dt_list
=
list
(
sorted_bof_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_bof_df
[
sorted_bof_df
[
'day'
]
==
each_day
]
if
(
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
())
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
()
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'length_from_extruder'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_extruder'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_bof_df
test_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_df
[
'batch_no'
]
=
batch_numbers
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
current_day
=
None
for
value
,
day
in
zip
(
list
(
test_df
[
'lower door open'
]),
list
(
test_df
[
'day'
])):
if
current_day
!=
day
:
current_day
=
day
batch_number
=
0
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
for
value
in
test_df
[
'lower door open'
]:
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
test_df
[
'batch_no'
]
=
test_df
[
'batch_no'
]
.
astype
(
'float'
)
test_df
[
'bof_batch_date'
]
=
'Batch_'
+
test_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_df
[
'day'
]
.
astype
(
'str'
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
# start_time = np.datetime64(date_dict.get(value['bof_batch_date']).get('start_time'))
# end_time = np.datetime64(date_dict.get(value['bof_batch_date']).get('end_time'))
start_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"start_time"
)
end_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'true'
)
extrud_flg_vms
.
append
(
1
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_df
[
'bof_flag'
]
=
extruder_flag_list
test_df
[
'bof_batch_diff'
]
=
extrud_flg_vms
test_df
[
'updated_bt_list'
]
=
batch_list
test_df
[
'bof_batch_number'
]
=
test_df
[
'batch_no'
]
-
test_df
[
'bof_batch_diff'
]
.
astype
(
'float'
)
test_df
=
test_df
.
rename
(
columns
=
{
'bof_batch_date'
:
'batch-date'
})
return
test_df
except
Exception
as
err
:
logger
.
error
(
f
'Error while adding batch to bof section: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
get_bof_batch_date
(
bof_batch_df
,
index_number
):
try
:
logger
.
info
(
'Getting bof batch date dictionary'
)
bof_cols
=
PickupConstants
.
pick_imp_bof_cols
bof_df
=
bof_batch_df
[
bof_cols
]
bof_df
=
bof_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
bof_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
bof_df
[
numeric_cols
]
=
bof_df
[
numeric_cols
]
.
astype
(
float
)
bof_df
[
'day'
]
=
bof_df
[
'Time Stamp'
]
.
dt
.
date
bof_df
=
bof_df
[
bof_df
[
"Size No (INDEX No).5"
]
==
index_number
]
bof_df
=
bof_df
[
bof_df
[
"bof_batch_number"
]
!=
0
]
bof_df
[
'time_min'
]
=
bof_df
[
'Time Stamp'
]
bof_df
[
'time_max'
]
=
bof_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'bof_batch_number'
]
df_bof_grouped
=
bof_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_bof_grouped
[
'mixer_section_time_diff_second'
]
=
df_bof_grouped
[
'time_max'
]
-
df_bof_grouped
[
'time_min'
]
df_bof_grouped
[
'mixer_section_time_diff_second'
]
=
df_bof_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_bof_grouped
[
'batch-date'
]
=
'Batch_'
+
df_bof_grouped
[
'bof_batch_number'
]
.
astype
(
str
)
+
'_'
+
df_bof_grouped
[
'day'
]
.
astype
(
str
)
bof_date_dict
=
{}
batch_lis
=
list
(
df_bof_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_bof_grouped
[
df_bof_grouped
[
'batch-date'
]
==
each_bt
]
bof_date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
bof_date_dict
except
Exception
as
err
:
logger
.
error
(
f
'Error while getting bof time and forming bof date dict: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_fy676a_pick_batch_no_df
(
raw_df
,
viscosity_df
,
bof_date_dict
,
bof_batch_num_df
,
index_number
):
try
:
logger
.
info
(
'Getting pickup batch date dataframe'
)
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
pick_add_cols
=
PickupConstants
.
pick_cols
+
PickupConstants
.
pick_additional_cols
pick_df
=
raw_df
[
pick_add_cols
]
sorted_pick_df
=
pick_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_pick_df
=
sorted_pick_df
[
sorted_pick_df
[
'Size No (INDEX No).6'
]
==
index_number
]
dt_list
=
list
(
sorted_pick_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_pick_df
[
sorted_pick_df
[
'day'
]
==
each_day
]
if
day_df
[
'length passed through.1'
]
.
max
()
-
day_df
[
'length passed through.1'
]
.
min
()
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through.1'
]
.
max
()
-
day_df
[
'length passed through.1'
]
.
min
()
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_pickup'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_pickup'
]
=
extrud_visc_df
[
'length_from_pickup'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_pickup' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_pickup'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_pick_df
test_pick_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_pick_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through.1'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_pick_df
[
'batch_no'
]
=
batch_numbers
test_pick_df
[
'batch_no'
]
=
test_pick_df
[
'batch_no'
]
.
astype
(
'float'
)
test_pick_df
[
'pickup_batch_date'
]
=
'Batch_'
+
test_pick_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_pick_df
[
'day'
]
.
astype
(
'str'
)
test_pick_df
[
'bof_batch_number'
]
=
bof_batch_num_df
[
'bof_batch_number'
]
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_pick_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
# start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
# end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
start_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"start_time"
)
end_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_pick_df
[
'pickup_flag'
]
=
extruder_flag_list
test_pick_df
[
'pickup_batch_diff'
]
=
extrud_flg_vms
test_pick_df
[
'pickup_batch_verify_number'
]
=
test_pick_df
[
'batch_no'
]
-
test_pick_df
[
'pickup_batch_diff'
]
.
astype
(
'float'
)
actual_pickup_bt_num
=
[]
for
i
,
value
in
test_pick_df
.
iterrows
():
pickup_batch_number
=
value
[
'pickup_batch_verify_number'
]
bof_batch_num
=
value
[
'bof_batch_number'
]
if
pickup_batch_number
<=
bof_batch_num
:
actual_pickup_bt_num
.
append
(
pickup_batch_number
)
else
:
actual_pickup_bt_num
.
append
(
bof_batch_num
)
test_pick_df
[
'pickup_batch_number'
]
=
actual_pickup_bt_num
test_pick_df
[
'batch-date'
]
=
'Batch_'
+
test_pick_df
[
'pickup_batch_number'
]
.
astype
(
'str'
)
+
'_'
+
\
test_pick_df
[
'day'
]
.
astype
(
'str'
)
return
test_pick_df
except
Exception
as
err
:
logger
.
error
(
f
'Error in adding batch to fy676a pick section: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_fy664g_pick_batch_no_df
(
raw_df
,
viscosity_df
,
bof_date_dict
,
index_no
):
try
:
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
pick_add_cols
=
PickupConstants
.
pick_cols
+
PickupConstants
.
pick_additional_cols
pick_df
=
raw_df
[
pick_add_cols
]
sorted_pick_df
=
pick_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_pick_df
=
sorted_pick_df
[
sorted_pick_df
[
'Size No (INDEX No).6'
]
==
index_no
]
dt_list
=
list
(
sorted_pick_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_pick_df
[
sorted_pick_df
[
'day'
]
==
each_day
]
minimum
=
day_df
[
'length passed through.1'
]
.
min
()
if
minimum
<=
0
:
minimum
=
0
if
day_df
[
'length passed through.1'
]
.
max
()
-
minimum
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through.1'
]
.
max
()
-
minimum
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'length_from_extruder'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_extruder'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_pick_df
test_pick_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_pick_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through.1'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_pick_df
[
'batch_no'
]
=
batch_numbers
test_pick_df
[
'batch_no'
]
=
test_pick_df
[
'batch_no'
]
.
astype
(
'float'
)
test_pick_df
[
'pickup_batch_date'
]
=
'Batch_'
+
test_pick_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_pick_df
[
'day'
]
.
astype
(
'str'
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_pick_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
# start_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('start_time'))
# end_time = np.datetime64(bof_date_dict.get(value['pickup_batch_date']).get('end_time'))
start_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"start_time"
)
end_time
=
bof_date_dict
.
get
(
value
[
"pickup_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_pick_df
[
'pickup_flag'
]
=
extruder_flag_list
test_pick_df
[
'pickup_batch_diff'
]
=
extrud_flg_vms
test_pick_df
[
'pickup_batch_number'
]
=
test_pick_df
[
'batch_no'
]
-
test_pick_df
[
'pickup_batch_diff'
]
.
astype
(
'float'
)
test_pick_df
[
'batch-date'
]
=
'Batch_'
+
test_pick_df
[
'pickup_batch_number'
]
.
astype
(
'str'
)
+
'_'
+
\
test_pick_df
[
'day'
]
.
astype
(
'str'
)
return
test_pick_df
except
Exception
as
err
:
logger
.
error
(
f
"Error while forming pickup batch number for fy664g: {str(err)}"
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
preprocess_pickup_section
(
raw_df
,
index_number
,
viscosity_df
):
try
:
logger
.
info
(
'Preprocessing and getting aggregated pickup dataframe'
)
raw_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
raw_df
[
'Time Stamp'
])
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
extr_cols
=
PickupConstants
.
pick_cols
+
PickupConstants
.
pick_additional_cols
pick_df
=
raw_df
[
extr_cols
]
sorted_pick_df
=
pick_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_pick_df
=
sorted_pick_df
[
sorted_pick_df
[
'Size No (INDEX No).6'
]
==
index_number
]
dt_list
=
list
(
sorted_pick_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_pick_df
[
sorted_pick_df
[
'day'
]
==
each_day
]
day_length_dic
[
each_day
]
=
day_df
[
'length passed through.1'
]
.
max
()
-
day_df
[
'length passed through.1'
]
.
min
()
''' Reading viscosity file with skipping 2 rows '''
viscosity_df
[
'Mixing date'
]
=
pd
.
to_datetime
(
viscosity_df
[
'Mixing date'
])
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
sorted_viscosity_df
[
'batch-date'
]
=
'Batch_'
+
viscosity_df
[
'Batch No.'
]
.
astype
(
'float'
)
.
astype
(
str
)
+
'_'
+
sorted_viscosity_df
[
'day'
]
.
astype
(
str
)
sorted_viscosity_df
=
sorted_viscosity_df
[
sorted_viscosity_df
[
'Index No'
]
==
index_number
]
date_dict
=
get_mixer_batch_date
(
raw_df
,
index_number
)
bof_test_df
=
return_batch_no_bof_df
(
raw_df
,
sorted_viscosity_df
,
date_dict
,
index_number
)
bof_date_dict
=
get_bof_batch_date
(
bof_test_df
,
index_number
)
pick_merged_batch_df
=
pd
.
DataFrame
()
if
index_number
==
1250
:
pick_merged_batch_df
=
return_fy676a_pick_batch_no_df
(
raw_df
,
sorted_viscosity_df
,
bof_date_dict
,
bof_test_df
,
index_number
)
elif
index_number
==
3294
:
pick_merged_batch_df
=
return_fy664g_pick_batch_no_df
(
raw_df
,
viscosity_df
,
bof_date_dict
,
index_number
)
# Merging pick data with viscosity data on date-batch column
pickup_merged_df_final
=
pd
.
merge
(
pick_merged_batch_df
,
sorted_viscosity_df
[[
'batch-date'
,
'viscosity'
]],
on
=
'batch-date'
,
how
=
'left'
)
# Removing batch 0
pickup_merged_df_final
=
pickup_merged_df_final
[
pickup_merged_df_final
[
'pickup_batch_number'
]
!=
0
]
# Grouping with aggregated data
df_pickup_grouped
=
pickup_merged_df_final
.
groupby
(
PickupConstants
.
pick_grouped_cols
)
.
agg
(
PickupConstants
.
pick_aggregate_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
PickupConstants
.
pick_aggregate_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_pickup_grouped
=
df_pickup_grouped
.
rename
(
columns
=
col_renamer
)
df_pickup_grouped_visc
=
df_pickup_grouped
.
drop
(
'viscosity'
,
axis
=
1
)
return
df_pickup_grouped_visc
except
Exception
as
err
:
logger
.
error
(
f
'Error while performing main function for pickup section {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
preprocess
(
df
):
logger
.
info
(
"Starting Preprocessing the Data"
)
# Replace 'nan' with NaN
df
=
df
.
replace
(
'nan'
,
np
.
nan
)
# Calculate the number of missing values in each column
missing_counts
=
df
.
isnull
()
.
sum
()
# Get the column names where the number of missing values is equal to the number of rows
cols_to_remove
=
missing_counts
[
missing_counts
==
len
(
df
)]
.
index
df
=
df
.
drop
(
cols_to_remove
,
axis
=
1
)
df
=
df
.
loc
[
df
[
'Mixing batch number'
]
!=
0
]
# Drop rows where 'Batch Number' is NaN
df
=
df
.
dropna
(
subset
=
[
'Mixing batch number'
])
# Identify constant columns
constant_columns
=
df
.
columns
[
df
.
nunique
()
==
1
]
# Drop constant columns
df
.
drop
(
columns
=
constant_columns
,
inplace
=
True
)
logger
.
info
(
f
"Preprocessing completed and the final shape is {df.shape}"
)
columns_with_missing_values
=
df
.
columns
[
df
.
isnull
()
.
sum
()
>
0
]
.
tolist
()
return
df
def
preprocess_mixer_section
(
df
,
index_number
):
mixer_cols
=
MixerConstants
.
mixer_cols
mixer_df
=
df
[
mixer_cols
]
mixer_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
mixer_df
[
'Time Stamp'
])
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_number
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'Mixing Weight (Integrated Value)_diff'
]
=
mixer_df
.
groupby
([
'day'
,
'Mixing batch number'
])[
'Mixing Weight (Integrated Value)'
]
.
transform
(
lambda
x
:
x
.
max
()
-
x
.
min
())
mixer_cleaned_df
=
preprocess
(
mixer_df
)
mixer_cleaned_df
[
"day"
]
=
mixer_cleaned_df
[
'Time Stamp'
]
.
dt
.
date
mixer_cleaned_df
[
'mixer_on_or_off'
]
=
mixer_cleaned_df
[
'Mixing timer value'
]
.
apply
(
lambda
x
:
0
if
x
==
0
else
1
)
mixer_cleaned_df
[
'batch-date'
]
=
'Batch_'
+
mixer_cleaned_df
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
mixer_cleaned_df
[
'day'
]
.
astype
(
str
)
mixer_cleaned_df
=
mixer_cleaned_df
.
sort_values
(
by
=
'Time Stamp'
)
# Group by 'batch-date' and add a new column 'rubber_addition'
df
[
'rubber_addition'
]
=
0
def
apply_conditions
(
group
):
max_value_index
=
group
[
'Mixing timer value'
]
.
idxmax
()
group
.
loc
[
group
[
'Mixing timer value'
]
!=
group
[
'Mixing timer value'
]
.
max
(),
'rubber_addition'
]
=
1
group
.
loc
[
max_value_index
,
'rubber_addition'
]
=
1
return
group
mixer_cleaned_df
=
mixer_cleaned_df
.
groupby
(
'batch-date'
)
.
apply
(
apply_conditions
)
# Add 'process_on_or_off' column based on conditions
mixer_cleaned_df
[
'process_on_or_off'
]
=
0
mixer_cleaned_df
.
loc
[(
mixer_cleaned_df
[
'mixer_on_or_off'
]
==
1
)
&
(
mixer_cleaned_df
[
'rubber_addition'
]
==
1
),
'process_on_or_off'
]
=
1
numeric_cols
=
mixer_cleaned_df
.
select_dtypes
(
include
=
[
'number'
,
'float'
])
.
columns
process_on_df
=
mixer_cleaned_df
[
mixer_cleaned_df
[
'process_on_or_off'
]
==
1
]
df_full
=
process_on_df
[
process_on_df
.
columns
]
# Define a dictionary for data type conversions
conversion_dict
=
{
col
:
float
for
col
in
df_full
.
select_dtypes
(
include
=
'number'
)
.
columns
}
# Apply the data type conversions
df_full
=
df_full
.
astype
(
conversion_dict
)
rpm_count
=
df_full
[
df_full
[
'Rotor actual rpm'
]
==
60.0
]
.
groupby
(
'batch-date'
)[
'Rotor actual rpm'
]
.
count
()
df_full
=
df_full
.
merge
(
rpm_count
,
left_on
=
'batch-date'
,
right_index
=
True
,
suffixes
=
(
''
,
'_count'
))
df_full
.
rename
(
columns
=
{
'Rotor actual rpm_count'
:
'max_rpm_count'
},
inplace
=
True
)
aggregation_dict
=
MixerConstants
.
aggregation_dict
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
df_full
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregation_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_mixer_grouped
=
df_mixer_grouped
.
rename
(
columns
=
col_renamer
)
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
df_mixer_grouped
=
round
(
df_mixer_grouped
,
6
)
return
df_mixer_grouped
def
create_status_column
(
df
,
type_col_name
,
columns_list
):
status_col
=
[]
for
i
,
val
in
enumerate
(
df
[
type_col_name
]):
if
val
==
0
:
status_col
.
append
(
False
)
else
:
if
any
(
df
[
column
]
.
iloc
[
i
]
==
0
for
column
in
columns_list
):
status_col
.
append
(
False
)
else
:
status_col
.
append
(
True
)
return
status_col
def
preprocess_viscosity_section
(
viscosity_df
,
index_number
):
# adding date col to the viscosity df
viscosity_df
=
viscosity_df
.
sort_values
(
by
=
'Mixing date'
)
raw_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
'Mixing date'
)
viscosity_df
[
'date'
]
=
viscosity_df
[
'Mixing date'
]
.
dt
.
date
viscosity_df
[
'batch-date'
]
=
'Batch_'
+
viscosity_df
[
'Batch No.'
]
.
astype
(
str
)
+
'_'
+
viscosity_df
[
'date'
]
.
astype
(
str
)
viscosity_df
=
viscosity_df
[
viscosity_df
[
'Index No'
]
==
index_number
]
rubber_cols
=
ViscosityConstants
.
rubber_cols
# Replace '-' with 0 for numerical and float columns
viscosity_df
[
rubber_cols
]
=
viscosity_df
[
rubber_cols
]
.
replace
(
'-'
,
0
)
viscosity_df
[
rubber_cols
]
=
viscosity_df
[
rubber_cols
]
.
apply
(
pd
.
to_numeric
,
errors
=
'coerce'
)
# Identify numerical and float columns
numerical_cols
=
viscosity_df
.
columns
[
viscosity_df
.
dtypes
.
apply
(
lambda
x
:
pd
.
api
.
types
.
is_numeric_dtype
(
x
)
or
pd
.
api
.
types
.
is_float_dtype
(
x
))]
integer_cols
=
viscosity_df
.
columns
[
viscosity_df
.
dtypes
==
'int64'
]
# Convert integer columns to float
viscosity_df
[
integer_cols
]
=
viscosity_df
[
integer_cols
]
.
astype
(
float
)
# Calculate weights
viscosity_df
[
'Weight_type1'
]
=
round
(
viscosity_df
[
'Quantity using type1 bale'
]
/
(
viscosity_df
[
'Quantity using type1 bale'
]
+
viscosity_df
[
'Quantity using type2 bale'
]),
2
)
viscosity_df
[
'Weight_type2'
]
=
round
(
viscosity_df
[
'Quantity using type2 bale'
]
/
(
viscosity_df
[
'Quantity using type1 bale'
]
+
viscosity_df
[
'Quantity using type2 bale'
]),
2
)
viscosity_df
[
'Weighted_PO_type'
]
=
(
viscosity_df
[
'PO_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
f
'PO_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_DIRT_type'
]
=
(
viscosity_df
[
'DIRT_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'DIRT_type1.1'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_ASH_type'
]
=
(
viscosity_df
[
'ASH_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'ASH_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_VM_type'
]
=
(
viscosity_df
[
'VM_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'VM_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_PRI_type'
]
=
(
viscosity_df
[
'PRI_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
f
'PRI_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_NITROGEN_type'
]
=
(
viscosity_df
[
'NITROGEN_type1'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'NITROGEN_type2'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_Temperature during transportation_type[℃]'
]
=
(
viscosity_df
[
'Temperature during transportation_type1[℃]'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'Temperature during transportation_type2[℃]'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted_Humidity during transportation__type[
%
]'
]
=
(
viscosity_df
[
'Humidity during transportation_type1[
%
]'
]
*
viscosity_df
[
'Weight_type1'
]
+
viscosity_df
[
'Humidity during transportation__type2[
%
]'
]
*
viscosity_df
[
'Weight_type2'
])
viscosity_df
[
'Weighted Sum'
]
=
viscosity_df
[
'Weighted_PO_type'
]
+
viscosity_df
[
'Weighted_DIRT_type'
]
+
viscosity_df
[
'Weighted_ASH_type'
]
+
viscosity_df
[
'Weighted_VM_type'
]
+
viscosity_df
[
'Weighted_PRI_type'
]
+
viscosity_df
[
'Weighted_NITROGEN_type'
]
column_to_keep_at_end
=
'viscosity'
# Reorder columns
new_order
=
[
col
for
col
in
viscosity_df
.
columns
if
col
!=
column_to_keep_at_end
]
+
[
column_to_keep_at_end
]
viscosity_df
=
viscosity_df
[
new_order
]
viscosity_df
[
'batch-date'
]
=
'Batch_'
+
viscosity_df
[
'Batch No.'
]
.
astype
(
str
)
+
'_'
+
viscosity_df
[
'date'
]
.
astype
(
str
)
# Added Status to check rubber
# Rubber Type 1
rubber_1_cols
=
[
'DIRT_type1'
,
'ASH_type1'
,
'VM_type1'
,
'PRI_type1'
,
'NITROGEN_type1'
]
# Rubber Type 2
rubber_2_cols
=
[
'PO_type2'
,
'DIRT_type1'
,
'ASH_type2'
,
'VM_type2'
,
'PRI_type2'
,
'NITROGEN_type2'
]
viscosity_df
[
'rubber_status_1'
]
=
create_status_column
(
viscosity_df
,
'Weight_type1'
,
rubber_1_cols
)
viscosity_df
[
'rubber_status_2'
]
=
create_status_column
(
viscosity_df
,
'Weight_type2'
,
rubber_2_cols
)
viscosity_df
[
'status'
]
=
viscosity_df
[
'rubber_status_1'
]
|
viscosity_df
[
'rubber_status_2'
]
req_cols
=
ViscosityConstants
.
req_cols
final_viscosity_df
=
viscosity_df
[
req_cols
]
final_viscosity_df
=
round
(
final_viscosity_df
,
6
)
return
final_viscosity_df
,
raw_viscosity_df
def
mixer_section_start_end_time
(
raw_df
,
index_no
):
mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'idle time between batches'
,
]
mixer_df
=
raw_df
[
mixer_cols
]
mixer_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
mixer_df
[
'Time Stamp'
])
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_no
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'time_min'
]
=
mixer_df
[
'Time Stamp'
]
mixer_df
[
'time_max'
]
=
mixer_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
mixer_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'time_max'
]
-
df_mixer_grouped
[
'time_min'
]
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
date_dict
=
{}
batch_lis
=
list
(
df_mixer_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_mixer_grouped
[
df_mixer_grouped
[
'batch-date'
]
==
each_bt
]
date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
date_dict
def
return_batch_no_df_1
(
raw_df
,
viscosity_df
,
date_dict
,
bof_cols
,
additional_cols
,
index_no
):
raw_df
=
raw_df
.
sort_values
(
by
=
'Time Stamp'
)
raw_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
raw_df
[
'Time Stamp'
])
raw_df
[
"day"
]
=
raw_df
[
"Time Stamp"
]
.
dt
.
date
raw_df
[
"day"
]
=
raw_df
[
"day"
]
.
astype
(
"str"
)
raw_df
[
"Mixing batch number"
]
=
raw_df
[
"Mixing batch number"
]
.
astype
(
"float"
)
raw_df
[
"batch-date"
]
=
(
"Batch_"
+
raw_df
[
"Mixing batch number"
]
.
astype
(
"str"
)
+
"_"
+
raw_df
[
"day"
]
.
astype
(
"str"
)
)
bof_add_cols
=
bof_cols
+
additional_cols
bof_df
=
raw_df
[
bof_add_cols
]
sorted_bof_df
=
bof_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_bof_df
=
sorted_bof_df
[
sorted_bof_df
[
"Size No (INDEX No).4"
]
==
index_no
]
dt_list
=
list
(
sorted_bof_df
[
"day"
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_bof_df
[
sorted_bof_df
[
"day"
]
==
each_day
]
if
day_df
[
"discharge length"
]
.
max
()
-
day_df
[
"discharge length"
]
.
min
()
<=
0
:
value
=
0
else
:
value
=
day_df
[
"discharge length"
]
.
max
()
-
day_df
[
"discharge length"
]
.
min
()
day_length_dic
[
each_day
]
=
value
# print(day_length_dic)
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
"day"
]
=
sorted_viscosity_df
[
"Mixing date"
]
.
dt
.
date
sorted_viscosity_df
[
"day"
]
=
sorted_viscosity_df
[
"day"
]
.
astype
(
"str"
)
extrud_visc_df
=
sorted_viscosity_df
[
[
"Batch No."
,
"Input rubber weight(0.1kg)"
,
"day"
,
"Mixing date"
]
]
extrud_visc_df
[
"length_from_extruder"
]
=
extrud_visc_df
[
"day"
]
.
map
(
day_length_dic
)
extrud_visc_df
[
"length_from_extruder"
]
=
extrud_visc_df
[
"length_from_extruder"
]
.
fillna
(
0
)
daily_sum_weight
=
(
extrud_visc_df
.
groupby
(
"day"
)[
"Input rubber weight(0.1kg)"
]
.
sum
()
/
10
)
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
"m/kg"
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
"length_from_extruder"
]
/
daily_sum_weight
[
row
[
"day"
]],
axis
=
1
)
extrud_visc_df
[
"batch_length"
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
"m/kg"
]
*
row
[
"Input rubber weight(0.1kg)"
]
/
10
,
axis
=
1
)
.
astype
(
"float64"
)
extrud_visc_df
[
"batch_length"
]
=
extrud_visc_df
[
"batch_length"
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
"cumulative_length"
]
=
extrud_visc_df
.
groupby
(
"day"
)[
"batch_length"
]
.
cumsum
()
discharge_dict
=
(
extrud_visc_df
.
groupby
(
"day"
)
.
apply
(
lambda
group
:
group
.
set_index
(
"Batch No."
)
.
to_dict
()[
"cumulative_length"
]
)
.
to_dict
()
)
test_sorted_extr_df
=
sorted_bof_df
test_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_df
.
iterrows
():
day
=
row
[
"day"
]
discharge_length
=
row
[
"discharge length"
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_df
[
"batch_no"
]
=
batch_numbers
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
current_day
=
None
for
value
,
day
in
zip
(
list
(
test_df
[
"lower door open"
]),
list
(
test_df
[
"day"
])):
if
current_day
!=
day
:
current_day
=
day
batch_number
=
0
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
test_df
[
"batch_no"
]
=
test_df
[
"batch_no"
]
.
astype
(
"float"
)
test_df
[
"extruder_batch_date"
]
=
(
"Batch_"
+
test_df
[
"batch_no"
]
.
astype
(
"str"
)
+
"_"
+
test_df
[
"day"
]
.
astype
(
"str"
)
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_df
.
iterrows
():
if
value
[
"batch_no"
]
==
0.0
:
extruder_flag_list
.
append
(
"false"
)
extrud_flg_vms
.
append
(
0
)
else
:
start_time
=
date_dict
.
get
(
value
[
"extruder_batch_date"
])
.
get
(
"start_time"
)
end_time
=
date_dict
.
get
(
value
[
"extruder_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
"true"
)
extrud_flg_vms
.
append
(
1
)
else
:
extruder_flag_list
.
append
(
"false"
)
extrud_flg_vms
.
append
(
0
)
test_df
[
"extruder_flag"
]
=
extruder_flag_list
test_df
[
"extruder_batch_diff"
]
=
extrud_flg_vms
test_df
[
"updtaed_bt_list"
]
=
batch_list
test_df
[
"extruder_batch_number"
]
=
test_df
[
"batch_no"
]
-
test_df
[
"extruder_batch_diff"
]
.
astype
(
"float"
)
test_df
[
"batch-date"
]
=
(
"Batch_"
+
test_df
[
"extruder_batch_number"
]
.
astype
(
"str"
)
+
"_"
+
test_df
[
"day"
]
.
astype
(
"str"
)
)
return
test_df
def
preprocess_extruder_section
(
df
,
index_number
,
vis_df
):
extruder_cols
=
ExtruderConstants
.
extruder_cols
additional_columns
=
[
'Time Stamp'
]
df_extruder
=
df
[
extruder_cols
+
additional_columns
]
df_extruder
[
'Time Stamp'
]
=
pd
.
to_datetime
(
df_extruder
[
'Time Stamp'
])
df_extruder
=
df_extruder
.
sort_values
(
by
=
'Time Stamp'
)
df_extruder
[
'day'
]
=
df_extruder
[
'Time Stamp'
]
.
dt
.
date
df_extruder
[
'day'
]
=
df_extruder
[
'day'
]
.
astype
(
'str'
)
sorted_extrud_df
=
df_extruder
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_extrud_df
=
sorted_extrud_df
[
sorted_extrud_df
[
'Size No (INDEX No).4'
]
==
index_number
]
drop_col
=
[
'spare.19'
,
'spare.20'
,
'spare.21'
,
'spare.22'
,
'spare.23'
,
'spare.24'
,
'spare.25'
,
'Hopper bank upper limit'
,
'middle of hopper bank'
,
'Hopper bank lower limit'
,
'Hopper bank below lower limit'
]
sorted_extrud_df
.
drop
(
columns
=
drop_col
,
inplace
=
True
)
date_dict
=
mixer_section_start_end_time
(
df
,
index_number
)
additional_cols
=
[
'day'
,
'Time Stamp'
,
'lower door open'
]
# adding date col to the viscosity df
vis_df
=
vis_df
.
sort_values
(
by
=
'Mixing date'
)
vis_df
[
'date'
]
=
vis_df
[
'Mixing date'
]
.
dt
.
date
vis_df
[
'batch-date'
]
=
'Batch_'
+
vis_df
[
'Batch No.'
]
.
astype
(
'float'
)
.
astype
(
str
)
+
'_'
+
vis_df
[
'date'
]
.
astype
(
str
)
vis_df
=
vis_df
[
vis_df
[
'Index No'
]
==
index_number
]
extruder_merged_df_final
=
return_batch_no_df_1
(
df
,
vis_df
,
date_dict
,
extruder_cols
,
additional_cols
,
index_number
)
extruder_merged_df_final
=
extruder_merged_df_final
[
extruder_merged_df_final
[
'extruder_batch_number'
]
!=
0
]
grouped_cols
=
[
'batch-date'
]
aggregate_dict
=
ExtruderConstants
.
aggregate_dict
df_extruder_grouped
=
extruder_merged_df_final
.
groupby
(
grouped_cols
)
.
agg
(
aggregate_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregate_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_extruder_grouped
=
df_extruder_grouped
.
rename
(
columns
=
col_renamer
)
df_extruder_grouped
=
df_extruder_grouped
.
fillna
(
df_extruder_grouped
.
mean
())
df_extruder_grouped
=
round
(
df_extruder_grouped
,
6
)
return
df_extruder_grouped
import
math
import
warnings
import
traceback
from
datetime
import
datetime
import
numpy
as
np
import
pandas
as
pd
from
loguru
import
logger
from
scripts.constants.constants
import
BofConstants
warnings
.
filterwarnings
(
"ignore"
)
def
mixer_section_start_end_time
(
raw_df
,
index_no
):
try
:
mixer_cols
=
BofConstants
.
bof_mixer_cols
mixer_df
=
raw_df
[
mixer_cols
]
mixer_df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
mixer_df
[
'Time Stamp'
])
mixer_df
=
mixer_df
.
sort_values
(
by
=
'Time Stamp'
)
numeric_cols
=
mixer_df
.
select_dtypes
(
include
=
[
'int'
,
'float'
])
.
columns
# Convert numeric columns to float
mixer_df
[
numeric_cols
]
=
mixer_df
[
numeric_cols
]
.
astype
(
float
)
mixer_df
[
'day'
]
=
mixer_df
[
'Time Stamp'
]
.
dt
.
date
mixer_df
=
mixer_df
[
mixer_df
[
"Size No (INDEX No).3"
]
==
index_no
]
mixer_df
=
mixer_df
[
mixer_df
[
"Mixing batch number"
]
!=
0
]
mixer_df
[
'time_min'
]
=
mixer_df
[
'Time Stamp'
]
mixer_df
[
'time_max'
]
=
mixer_df
[
'Time Stamp'
]
aggregation_dict
=
{
'time_min'
:
'min'
,
'time_max'
:
'max'
,
}
group_by
=
[
'day'
,
'Mixing batch number'
]
df_mixer_grouped
=
mixer_df
.
groupby
(
group_by
)
.
agg
(
aggregation_dict
)
.
reset_index
()
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'time_max'
]
-
df_mixer_grouped
[
'time_min'
]
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
=
df_mixer_grouped
[
'mixer_section_time_diff_second'
]
.
dt
.
total_seconds
()
df_mixer_grouped
[
'batch-date'
]
=
'Batch_'
+
df_mixer_grouped
[
'Mixing batch number'
]
.
astype
(
str
)
+
'_'
+
\
df_mixer_grouped
[
'day'
]
.
astype
(
str
)
date_dict
=
{}
batch_lis
=
list
(
df_mixer_grouped
[
'batch-date'
]
.
unique
())
for
each_bt
in
batch_lis
:
df_nw
=
df_mixer_grouped
[
df_mixer_grouped
[
'batch-date'
]
==
each_bt
]
date_dict
[
each_bt
]
=
{
"start_time"
:
str
(
list
(
df_nw
[
'time_min'
])[
0
]),
'end_time'
:
str
(
list
(
df_nw
[
'time_max'
])[
0
])}
return
date_dict
except
Exception
as
err
:
logger
.
error
(
f
'Error in fetching mixer batch date dictionary: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
def
return_batch_no_df
(
raw_df
,
viscosity_df
,
date_dict
,
index_number
):
try
:
logger
.
info
(
'Getting bof batch number'
)
raw_df
[
'day'
]
=
raw_df
[
'Time Stamp'
]
.
dt
.
date
raw_df
[
'day'
]
=
raw_df
[
'day'
]
.
astype
(
'str'
)
raw_df
[
'Mixing batch number'
]
=
raw_df
[
'Mixing batch number'
]
.
astype
(
'float'
)
raw_df
[
'batch-date'
]
=
'Batch_'
+
raw_df
[
'Mixing batch number'
]
.
astype
(
'str'
)
+
'_'
+
raw_df
[
'day'
]
.
astype
(
'str'
)
bof_add_cols
=
BofConstants
.
bof_add_cols
bof_df
=
raw_df
[
BofConstants
.
bof_cols
+
bof_add_cols
]
sorted_bof_df
=
bof_df
.
sort_values
(
by
=
"Time Stamp"
,
ascending
=
True
)
sorted_bof_df
=
sorted_bof_df
[
sorted_bof_df
[
'Size No (INDEX No).5'
]
==
index_number
]
dt_list
=
list
(
sorted_bof_df
[
'day'
]
.
unique
())
day_length_dic
=
{}
for
each_day
in
dt_list
:
day_df
=
sorted_bof_df
[
sorted_bof_df
[
'day'
]
==
each_day
]
if
(
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
())
<=
0
:
value
=
0
else
:
value
=
day_df
[
'length passed through'
]
.
max
()
-
day_df
[
'length passed through'
]
.
min
()
day_length_dic
[
each_day
]
=
value
sorted_viscosity_df
=
viscosity_df
.
sort_values
(
by
=
"Mixing date"
,
ascending
=
True
)
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'Mixing date'
]
.
dt
.
date
sorted_viscosity_df
[
'day'
]
=
sorted_viscosity_df
[
'day'
]
.
astype
(
'str'
)
extrud_visc_df
=
sorted_viscosity_df
[[
'Batch No.'
,
'Input rubber weight(0.1kg)'
,
'day'
,
'Mixing date'
]]
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'day'
]
.
map
(
day_length_dic
)
extrud_visc_df
[
'length_from_extruder'
]
=
extrud_visc_df
[
'length_from_extruder'
]
.
fillna
(
0
)
daily_sum_weight
=
extrud_visc_df
.
groupby
(
'day'
)[
'Input rubber weight(0.1kg)'
]
.
sum
()
/
10
# Add a new column 'm/kg' by dividing 'length_from_extruder' by the sum for each day
extrud_visc_df
[
'm/kg'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'length_from_extruder'
]
/
daily_sum_weight
[
row
[
'day'
]],
axis
=
1
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
.
apply
(
lambda
row
:
row
[
'm/kg'
]
*
row
[
'Input rubber weight(0.1kg)'
]
/
10
,
axis
=
1
)
.
astype
(
'float64'
)
extrud_visc_df
[
'batch_length'
]
=
extrud_visc_df
[
'batch_length'
]
.
apply
(
math
.
ceil
)
extrud_visc_df
[
'cumulative_length'
]
=
extrud_visc_df
.
groupby
(
'day'
)[
'batch_length'
]
.
cumsum
()
discharge_dict
=
extrud_visc_df
.
groupby
(
'day'
)
.
apply
(
lambda
group
:
group
.
set_index
(
'Batch No.'
)
.
to_dict
()[
'cumulative_length'
])
.
to_dict
()
test_sorted_extr_df
=
sorted_bof_df
test_df
=
test_sorted_extr_df
# Initialize an empty list to store batch numbers
batch_numbers
=
[]
# Iterate through each row in the DataFrame
for
index
,
row
in
test_df
.
iterrows
():
day
=
row
[
'day'
]
discharge_length
=
row
[
'length passed through'
]
if
discharge_length
==
0
:
batch_numbers
.
append
(
0
)
else
:
# Check if the day is in the dictionary
if
day
in
discharge_dict
:
# Check if discharge length is less than or equal to the corresponding batch length
batch_length_dict
=
discharge_dict
[
day
]
for
batch_no
,
batch_length
in
batch_length_dict
.
items
():
if
discharge_length
<=
batch_length
:
batch_numbers
.
append
(
batch_no
)
break
else
:
# If no match is found in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
batch_numbers
[
-
1
])
else
:
# If day is not in the dictionary, assign NaN to batch number
batch_numbers
.
append
(
np
.
nan
)
# Add the 'batch_no' column to the DataFrame
test_df
[
'batch_no'
]
=
batch_numbers
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
current_day
=
None
for
value
,
day
in
zip
(
list
(
test_df
[
'lower door open'
]),
list
(
test_df
[
'day'
])):
if
current_day
!=
day
:
current_day
=
day
batch_number
=
0
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
batch_number
=
0
batch_list
=
[]
started_with_one
=
False
for
value
in
test_df
[
'lower door open'
]:
if
value
==
1
:
if
not
started_with_one
:
batch_number
+=
1
started_with_one
=
True
batch_list
.
append
(
batch_number
)
else
:
batch_list
.
append
(
batch_number
)
started_with_one
=
False
test_df
[
'batch_no'
]
=
test_df
[
'batch_no'
]
.
astype
(
'float'
)
test_df
[
'bof_batch_date'
]
=
'Batch_'
+
test_df
[
'batch_no'
]
.
astype
(
'str'
)
+
'_'
+
test_df
[
'day'
]
.
astype
(
'str'
)
extruder_flag_list
=
[]
extrud_flg_vms
=
[]
for
i
,
value
in
test_df
.
iterrows
():
if
value
[
'batch_no'
]
==
0.0
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
else
:
start_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"start_time"
)
end_time
=
date_dict
.
get
(
value
[
"bof_batch_date"
])
.
get
(
"end_time"
)
if
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
>
datetime
.
strptime
(
start_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
&
\
(
datetime
.
strptime
(
str
(
value
[
"Time Stamp"
])
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)
<
datetime
.
strptime
(
end_time
.
split
(
'+'
)[
0
],
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
)):
extruder_flag_list
.
append
(
'true'
)
extrud_flg_vms
.
append
(
1
)
else
:
extruder_flag_list
.
append
(
'false'
)
extrud_flg_vms
.
append
(
0
)
test_df
[
'bof_flag'
]
=
extruder_flag_list
test_df
[
'bof_batch_diff'
]
=
extrud_flg_vms
# test_df['updated_bt_list'] = batch_list
test_df
[
'bof_batch_number'
]
=
test_df
[
'batch_no'
]
-
test_df
[
'bof_batch_diff'
]
.
astype
(
'float'
)
test_df
[
'batch-date'
]
=
'Batch_'
+
test_df
[
'bof_batch_number'
]
.
astype
(
'str'
)
+
'_'
+
test_df
[
'day'
]
.
astype
(
'str'
)
return
test_df
except
Exception
as
er
:
logger
.
error
(
f
'Error in adding batch data to bof section: {str(er)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
er
))
def
preprocess_bof_section
(
df
,
index_number
,
vis_df
):
try
:
df
[
'Time Stamp'
]
=
pd
.
to_datetime
(
df
[
'Time Stamp'
])
df
=
df
.
sort_values
(
by
=
'Time Stamp'
)
df
[
'day'
]
=
df
[
'Time Stamp'
]
.
dt
.
date
df
[
'day'
]
=
df
[
'day'
]
.
astype
(
'str'
)
date_dict
=
mixer_section_start_end_time
(
df
,
index_number
)
bof_merged_df_final
=
return_batch_no_df
(
df
,
vis_df
,
date_dict
,
index_number
)
bof_merged_df_final
=
bof_merged_df_final
[
bof_merged_df_final
[
'bof_batch_number'
]
!=
0
]
# print(bof_merged_df_final.columns)
grouped_cols
=
[
'batch-date'
]
aggregate_dict
=
BofConstants
.
bof_aggregate_dict
df_bof_grouped
=
bof_merged_df_final
.
groupby
(
grouped_cols
)
.
agg
(
aggregate_dict
)
.
reset_index
()
col_renamer
=
{}
for
col
,
col_agg
in
aggregate_dict
.
items
():
if
col
not
in
[
'viscosity'
,
'time_min'
,
'time_max'
,
'Mixing Weight (Integrated Value)_diff'
,
'max_rpm_count'
]:
renamed_col
=
f
'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'
.
lower
()
col_renamer
[
col
]
=
renamed_col
else
:
col_renamer
[
col
]
=
col
df_bof_grouped
=
df_bof_grouped
.
rename
(
columns
=
col_renamer
)
df_bof_grouped_rest
=
df_bof_grouped
.
drop
(
'batch-date'
,
axis
=
1
)
df_bof_grouped_rest
=
df_bof_grouped_rest
.
fillna
(
df_bof_grouped_rest
.
mean
())
df_bof_grouped_rest
=
round
(
df_bof_grouped_rest
,
6
)
df_bof_grouped_rest
[
'batch-date'
]
=
df_bof_grouped
[
'batch-date'
]
return
df_bof_grouped_rest
except
Exception
as
err
:
logger
.
error
(
f
'Error in fetching the bof preprocess data: {str(err)}'
)
logger
.
error
(
traceback
.
format_exc
())
raise
Exception
(
str
(
err
))
class
ModelLoader
(
object
):
def
__init__
(
self
,
model_info
):
self
.
model_info
=
model_info
def
load_model
(
self
):
logger
.
info
(
"Loading the Model"
)
if
self
.
model_info
[
"type"
]
==
"mlflow.sklearn"
:
return
self
.
_load_mlflow_sklearn_model
()
else
:
logger
.
info
(
"Unsupported Model Type"
)
def
_load_mlflow_sklearn_model
(
self
):
try
:
_model
=
mlflow
.
sklearn
.
load_model
(
self
.
model_info
[
"path"
])
logger
.
debug
(
"Model loaded successfully!"
)
return
_model
except
Exception
as
e
:
logger
.
error
(
"Error while loading mlflow.sklearn model : {}"
.
format
(
str
(
e
)))
class
RawConstants
:
columns
=
[
'Time Stamp'
,
'Shipper size No.'
,
'Shipper No.1 DH'
,
'Shipper No.1 Pallet'
,
'Shipper No.3 DH'
,
'Shipper No.2 Pallet'
,
'Shipper No.3 DH.1'
,
'Shipper No.3 Pallet'
,
'Size No (INDEX No)'
,
'Weighing times'
,
'Process mass'
,
'Mass'
,
'Material detection'
,
'Surface temperature (mixer side)'
,
'Surface temperature (center)'
,
'Surface temperature (receiving side)'
,
'temperature'
,
'humidity'
,
'Weighing command No.'
,
'spare'
,
'spare.1'
,
'spare.2'
,
'spare.3'
,
'spare.4'
,
'Size No (INDEX No).1'
,
'Weighing times.1'
,
'Process mass.1'
,
'real mass'
,
'spare.5'
,
'spare.6'
,
'spare.7'
,
'Size No (INDEX No).2'
,
'Weighing times.2'
,
'Process mass.2'
,
'CB weighing machine measurement'
,
'Dust collection duct (Immediately after ****)'
,
'Dust collection duct (before dust collector)
\n
'
,
'CB slot open'
,
'CB slot closed'
,
'carbon cycle'
,
'carbon2 cycle'
,
'spare.8'
,
'spare.9'
,
'spare.10'
,
'spare.11'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'Mixing Weight (Integrated Value)'
,
'Rotor actual rpm'
,
'Mixing timer value'
,
'Temperature (DS side)'
,
'Temperature (WS side)'
,
'Electric power'
,
'Electric energy'
,
'Mixing electric power average'
,
'Ram pressure'
,
'Ram rising'
,
'Ram down'
,
'Ram position'
,
'front door open'
,
'Front door closed'
,
'lower door open'
,
'lower door closed'
,
'Before mixer rotation detection'
,
'After mixer rotation detection'
,
'Drilled side left Inlet side Cooling water temperature'
,
'Drilled side left Exit side Cooling water temperature'
,
'Drilled side right Inlet side Cooling water temperature'
,
'Drilled side right Exit side Cooling water temperature'
,
'Mixer rotor left inlet side Coolant temperature'
,
'Mixer rotor left output side Cooling water temperature'
,
'Mixer rotor right inlet side Coolant temperature'
,
'Mixer rotor right exit side Cooling water temperature'
,
'Mixer body temperature'
,
'Drilled side left Inlet side Cooling water flow rate'
,
'Drilled side left Exit side Cooling water flow rate'
,
'Drilled side right Inlet side Cooling water flow rate'
,
'Drilled side right Exit side Cooling water flow rate'
,
'Mixer rotor left inlet side Cooling water flow rate'
,
'Mixer rotor left outlet side Cooling water flow rate'
,
'Mixer rotor right inlet side Cooling water flow rate'
,
'Mixer rotor right outlet side Cooling water flow rate'
,
'temperature.1'
,
'humidity.1'
,
'idle time between batches'
,
'spare.12'
,
'spare.13'
,
'spare.14'
,
'spare.15'
,
'spare.16'
,
'spare.17'
,
'spare.18'
,
'Size No (INDEX No).4'
,
'discharge length'
,
'Hopper bank upper limit'
,
'middle of hopper bank'
,
'Hopper bank lower limit'
,
'Hopper bank below lower limit'
,
'Extruder rpm'
,
'Extruder current'
,
'Calendar rpm'
,
' Calendar current'
,
'Calendar bank load'
,
'Calendar GAP Operation side'
,
'Calendar GAP Opposite operation side'
,
'Residence time'
,
'Screw operation side Inlet side Cooling water temperature'
,
'Screw operation side Outlet side Cooling water temperature'
,
'Screw Opposite operation side Inlet side Cooling water temperature'
,
'Screw Opposite operation side Outlet side Cooling water temperature'
,
'Calender roll Lower side Inlet side Cooling water temperature'
,
'Calender roll Lower side Outlet side Cooling water temperature'
,
'Calender roll upper side Inlet side Cooling water temperature'
,
'Calender roll Upper side Outlet side Cooling water temperature'
,
'Screw operation side Inlet side Cooling water flow rate'
,
'Screw operation side Outlet side Cooling water flow rate'
,
'Screw Opposite operation side Inlet side Cooling water flow rate'
,
'Screw Opposite operation side Outlet side Cooling water flow rate'
,
'Calender roll Lower side Inlet side Cooling water flow rate'
,
'Calender roll Lower side Outlet side Cooling water flow rate'
,
'Calender roll upper side Inlet side Cooling water flow rate'
,
'Calender roll Upper side Outlet side Cooling water flow rate'
,
'Extruder body temperature'
,
'spare.19'
,
'spare.20'
,
'spare.21'
,
'spare.22'
,
'spare.23'
,
'spare.24'
,
'spare.25'
,
'Size No (INDEX No).5'
,
'length passed through'
,
'Material detection.1'
,
'Sheet temperature immediately after calendering'
,
'Withdrawal CV speed'
,
'DUST CV
\n
speed'
,
'spare.26'
,
'spare.27'
,
'spare.28'
,
'Size No (INDEX No).6'
,
'length passed through.1'
,
'Material detection.2'
,
'Seat temperature immediately after BOF'
,
'temperature.2'
,
'humidity.2'
,
'spare.29'
,
'spare.30'
,
'spare.31'
,
'spare.32'
,
'Size No (INDEX No).7'
,
'Setting length'
,
'length passed through(Integrated Value)'
,
'Mass
\n
(Integrated Value)'
,
'Pallet No.'
,
'Loading completion flag'
,
'spare.33'
,
'spare.34'
,
'spare.35'
,
'spare.36'
,
'mixer cooling water'
,
'Under cooling water'
]
class
ViscosityConstants
:
rubber_cols
=
[
'Quantity using type1 bale'
,
'PO_type1'
,
'DIRT_type1'
,
'ASH_type1'
,
'VM_type1'
,
'PRI_type1'
,
'NITROGEN_type1'
,
'Temperature during transportation_type1[℃]'
,
'Humidity during transportation_type1[
%
]'
,
'Quantity using type2 bale'
,
'PO_type2'
,
'DIRT_type1.1'
,
'ASH_type2'
,
'VM_type2'
,
'PRI_type2'
,
'NITROGEN_type2'
,
'Temperature during transportation_type2[℃]'
,
'Humidity during transportation__type2[
%
]'
]
req_cols
=
[
'Rubber No.'
,
'Batch No.'
,
'Index No'
,
'Chemical weight (g)'
,
'Input rubber weight(0.1kg)'
,
'date'
,
'batch-date'
,
'Weight_type1'
,
'Weight_type2'
,
'Weighted_PO_type'
,
'Weighted_DIRT_type'
,
'Weighted_ASH_type'
,
'Weighted_VM_type'
,
'Weighted_PRI_type'
,
'Weighted_NITROGEN_type'
,
'Weighted_Temperature during transportation_type[℃]'
,
'Weighted_Humidity during transportation__type[
%
]'
,
'Weighted Sum'
,
'viscosity'
,
'status'
]
class
SheetConstants
:
sheet_supply_column
=
[
'Time Stamp'
,
'Shipper size No.'
,
'Shipper No.1 DH'
,
'Shipper No.1 Pallet'
,
'Shipper No.3 DH'
,
'Shipper No.2 Pallet'
,
'Shipper No.3 DH.1'
,
'Shipper No.3 Pallet'
,
'Size No (INDEX No)'
,
'Weighing times'
,
'Process mass'
,
'Mass'
,
'Material detection'
,
'Surface temperature (mixer side)'
,
'Surface temperature (center)'
,
'Surface temperature (receiving side)'
,
'temperature'
,
'humidity'
,
'Weighing command No.'
,
'spare'
,
'spare.1'
,
'spare.2'
,
'spare.3'
,
'spare.4'
]
aggregation_dict
=
{
"Surface temperature (mixer side)"
:
"mean"
,
"Surface temperature (center)"
:
"std"
,
"Surface temperature (receiving side)"
:
"mean"
,
"temperature"
:
"mean"
,
"humidity"
:
"mean"
,
'Process mass'
:
'mean'
,
}
class
MixerConstants
:
mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'Mixing Weight (Integrated Value)'
,
'Rotor actual rpm'
,
'Mixing timer value'
,
'Temperature (DS side)'
,
'Temperature (WS side)'
,
'Electric power'
,
'Electric energy'
,
'Mixing electric power average'
,
'Ram pressure'
,
'Ram rising'
,
'Ram down'
,
'Ram position'
,
'front door open'
,
'Front door closed'
,
'lower door open'
,
'lower door closed'
,
'Before mixer rotation detection'
,
'After mixer rotation detection'
,
'Drilled side left Inlet side Cooling water temperature'
,
'Drilled side left Exit side Cooling water temperature'
,
'Drilled side right Inlet side Cooling water temperature'
,
'Drilled side right Exit side Cooling water temperature'
,
'Mixer rotor left inlet side Coolant temperature'
,
'Mixer rotor left output side Cooling water temperature'
,
'Mixer rotor right inlet side Coolant temperature'
,
'Mixer rotor right exit side Cooling water temperature'
,
'Mixer body temperature'
,
'Drilled side left Inlet side Cooling water flow rate'
,
'Drilled side left Exit side Cooling water flow rate'
,
'Drilled side right Inlet side Cooling water flow rate'
,
'Drilled side right Exit side Cooling water flow rate'
,
'Mixer rotor left inlet side Cooling water flow rate'
,
'Mixer rotor left outlet side Cooling water flow rate'
,
'Mixer rotor right inlet side Cooling water flow rate'
,
'Mixer rotor right outlet side Cooling water flow rate'
,
'temperature.1'
,
'humidity.1'
,
'idle time between batches'
,
]
aggregation_dict
=
{
'Mixing timer value'
:
'max'
,
'Temperature (DS side)'
:
'mean'
,
'Temperature (WS side)'
:
'std'
,
'Electric power'
:
'mean'
,
'Electric energy'
:
'mean'
,
'Mixing electric power average'
:
'mean'
,
'Ram pressure'
:
'mean'
,
# 'Ram rising': '',
# 'Ram down': '',
'Ram position'
:
'std'
,
# 'front door open': '',
# 'Front door closed': '',
# 'lower door open': '',
# 'lower door closed': '',
# 'Before mixer rotation detection': '',
# 'After mixer rotation detection': '',
'Drilled side left Inlet side Cooling water temperature'
:
'std'
,
'Drilled side left Exit side Cooling water temperature'
:
'mean'
,
#
'Drilled side right Inlet side Cooling water temperature'
:
'mean'
,
'Drilled side right Exit side Cooling water temperature'
:
'std'
,
'Mixer rotor left inlet side Coolant temperature'
:
'std'
,
'Mixer rotor left output side Cooling water temperature'
:
'mean'
,
'Mixer rotor right inlet side Coolant temperature'
:
'mean'
,
'Mixer rotor right exit side Cooling water temperature'
:
'std'
,
'Mixer body temperature'
:
'mean'
,
'Drilled side left Inlet side Cooling water flow rate'
:
'std'
,
'Drilled side left Exit side Cooling water flow rate'
:
'mean'
,
#
'Drilled side right Inlet side Cooling water flow rate'
:
'mean'
,
'Drilled side right Exit side Cooling water flow rate'
:
'std'
,
#
'Mixer rotor left inlet side Cooling water flow rate'
:
'std'
,
'Mixer rotor left outlet side Cooling water flow rate'
:
'mean'
,
'Mixer rotor right inlet side Cooling water flow rate'
:
'mean'
,
'Mixer rotor right outlet side Cooling water flow rate'
:
'std'
,
'temperature.1'
:
'mean'
,
'humidity.1'
:
'mean'
,
'idle time between batches'
:
'mean'
,
'Mixing Weight (Integrated Value)_diff'
:
'max'
,
# any agg will work
'max_rpm_count'
:
'max'
# any agg will work
}
class
ExtruderConstants
:
extruder_cols
=
[
'Size No (INDEX No).4'
,
'discharge length'
,
'Hopper bank upper limit'
,
'middle of hopper bank'
,
'Hopper bank lower limit'
,
'Hopper bank below lower limit'
,
'Extruder rpm'
,
'Extruder current'
,
'Calendar rpm'
,
' Calendar current'
,
'Calendar bank load'
,
'Calendar GAP Operation side'
,
'Calendar GAP Opposite operation side'
,
'Residence time'
,
'Screw operation side Inlet side Cooling water temperature'
,
'Screw operation side Outlet side Cooling water temperature'
,
'Screw Opposite operation side Inlet side Cooling water temperature'
,
'Screw Opposite operation side Outlet side Cooling water temperature'
,
'Calender roll Lower side Inlet side Cooling water temperature'
,
'Calender roll Lower side Outlet side Cooling water temperature'
,
'Calender roll upper side Inlet side Cooling water temperature'
,
'Calender roll Upper side Outlet side Cooling water temperature'
,
'Screw operation side Inlet side Cooling water flow rate'
,
'Screw operation side Outlet side Cooling water flow rate'
,
'Screw Opposite operation side Inlet side Cooling water flow rate'
,
'Screw Opposite operation side Outlet side Cooling water flow rate'
,
'Calender roll Lower side Inlet side Cooling water flow rate'
,
'Calender roll Lower side Outlet side Cooling water flow rate'
,
'Calender roll upper side Inlet side Cooling water flow rate'
,
'Calender roll Upper side Outlet side Cooling water flow rate'
,
'Extruder body temperature'
,
'spare.19'
,
'spare.20'
,
'spare.21'
,
'spare.22'
,
'spare.23'
,
'spare.24'
,
'spare.25'
]
aggregate_dict
=
{
'discharge length'
:
"max"
,
'Extruder rpm'
:
"mean"
,
'Extruder current'
:
"std"
,
'Calendar rpm'
:
"std"
,
' Calendar current'
:
"mean"
,
'Calendar bank load'
:
"max"
,
'Calendar GAP Operation side'
:
"median"
,
'Calendar GAP Opposite operation side'
:
"std"
,
'Residence time'
:
"max"
,
'Screw operation side Inlet side Cooling water temperature'
:
"mean"
,
'Screw operation side Outlet side Cooling water temperature'
:
"std"
,
'Screw Opposite operation side Inlet side Cooling water temperature'
:
"mean"
,
'Screw Opposite operation side Outlet side Cooling water temperature'
:
"std"
,
'Calender roll Lower side Inlet side Cooling water temperature'
:
"mean"
,
'Calender roll Lower side Outlet side Cooling water temperature'
:
"std"
,
'Calender roll upper side Inlet side Cooling water temperature'
:
"mean"
,
'Calender roll Upper side Outlet side Cooling water temperature'
:
"std"
,
'Screw operation side Inlet side Cooling water flow rate'
:
"mean"
,
'Screw operation side Outlet side Cooling water flow rate'
:
"std"
,
'Screw Opposite operation side Inlet side Cooling water flow rate'
:
"mean"
,
'Screw Opposite operation side Outlet side Cooling water flow rate'
:
"std"
,
'Calender roll Lower side Inlet side Cooling water flow rate'
:
"mean"
,
'Calender roll Lower side Outlet side Cooling water flow rate'
:
"std"
,
'Calender roll upper side Inlet side Cooling water flow rate'
:
"mean"
,
'Calender roll Upper side Outlet side Cooling water flow rate'
:
"std"
,
'Extruder body temperature'
:
"mean"
}
class
PickupConstants
:
pick_cols
=
[
'Size No (INDEX No).6'
,
'length passed through.1'
,
'Material detection.2'
,
'Seat temperature immediately after BOF'
,
'temperature.2'
,
'humidity.2'
,
'spare.29'
,
'spare.30'
,
'spare.31'
,
'spare.32'
]
pick_imp_mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'idle time between batches'
,
]
pick_imp_bof_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).5'
,
'bof_batch_number'
]
pick_additional_cols
=
[
'day'
,
'Time Stamp'
,
'length passed through'
,
'discharge length'
]
pick_aggregate_dict
=
{
'Seat temperature immediately after BOF'
:
'mean'
,
'viscosity'
:
'mean'
}
pick_grouped_cols
=
[
'batch-date'
]
class
BofConstants
:
bof_cols
=
[
'Size No (INDEX No).5'
,
'length passed through'
,
'Material detection.1'
,
'Sheet temperature immediately after calendering'
,
'Withdrawal CV speed'
,
'DUST CV
\n
speed'
,
'spare.26'
,
'spare.27'
,
'spare.28'
,
'lower door open'
]
bof_add_cols
=
[
'Time Stamp'
,
'day'
,
'lower door open'
]
bof_mixer_cols
=
[
'Time Stamp'
,
'Size No (INDEX No).3'
,
'Size name'
,
'Mixing batch number'
,
'idle time between batches'
]
bof_aggregate_dict
=
aggregate_dict
=
{
'Sheet temperature immediately after calendering'
:
'mean'
,
'Withdrawal CV speed'
:
'mean'
,
'DUST CV
\n
speed'
:
'std'
}
def
model_trainer
(
df_grouped
,
index_no
,
model_path
):
cols_x
,
cols_y
,
saved_model
=
None
,
None
,
None
if
index_no
==
1250
:
cols_x
=
[
'temperature_ws_side_std'
,
'_calendar_current_mean'
,
'Weighted_NITROGEN_type'
,
'ram_pressure_mean'
,
'electric_energy_mean'
,
'screw_operation_side_outlet_side_cooling_water_flow_rate_std'
,
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean'
,
'Weighted_VM_type'
,
'seat_temperature_immediately_after_bof_mean'
,
'Weighted_DIRT_type'
,
'surface_temperature_center_std'
,
'residence_time_max'
,
'drilled_side_left_exit_side_cooling_water_temperature_mean'
,
'Weighted_PRI_type'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std'
,
'Weighted_ASH_type'
,
'Weighted_PO_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'drilled_side_right_exit_side_cooling_water_flow_rate_std'
,
'Weighted_Humidity during transportation__type[
%
]'
]
cols_y
=
"viscosity"
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
elif
index_no
==
3294
:
cols_x
=
[
'Weighted_ASH_type'
,
'Weighted_NITROGEN_type'
,
'electric_energy_mean'
,
'drilled_side_left_inlet_side_cooling_water_temperature_std'
,
'seat_temperature_immediately_after_bof_mean'
,
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean'
,
'humidity_mean'
,
'drilled_side_left_exit_side_cooling_water_flow_rate_mean'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'calendar_bank_load_max'
,
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean'
,
'Weighted_PRI_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'temperature_ws_side_std'
,
'dust_cv
\n
speed_std'
,
'mixer_rotor_right_inlet_side_coolant_temperature_mean'
,
'ram_position_std'
,
'drilled_side_right_exit_side_cooling_water_temperature_std'
,
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std'
,
'Weighted_Temperature during transportation_type[℃]'
]
cols_y
=
"viscosity"
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
req_cols
=
cols_x
+
[
'viscosity'
]
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
# df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets
x_train
,
x_test
,
y_train
,
y_test
=
train_test_split
(
features
,
labels
,
random_state
=
42
,
test_size
=
0.25
)
print
(
f
'x_train shape - {x_train.shape}'
)
print
(
f
'x_test shape - {x_test.shape}'
)
print
(
f
'y_train shape - {y_train.shape}'
)
print
(
f
'y_test shape - {y_test.shape}'
)
y_pred
=
saved_model
.
predict
(
x_test
)
predictions
=
[
round
(
value
,
2
)
for
value
in
y_pred
]
metric_dictionary
=
dict
()
mae
=
metrics
.
mean_absolute_error
(
y_test
,
predictions
)
mse
=
metrics
.
mean_squared_error
(
y_test
,
predictions
)
mape
=
metrics
.
mean_absolute_percentage_error
(
y_test
,
predictions
)
explained_variance_score
=
metrics
.
explained_variance_score
(
y_test
,
predictions
)
max_error
=
metrics
.
max_error
(
y_test
,
predictions
)
r2_score
=
metrics
.
r2_score
(
y_test
,
predictions
)
median_absolute_error
=
metrics
.
median_absolute_error
(
y_test
,
predictions
)
mean_poisson_deviance
=
metrics
.
mean_poisson_deviance
(
y_test
,
predictions
)
mean_gamma_deviance
=
metrics
.
mean_gamma_deviance
(
y_test
,
predictions
)
metric_dictionary
[
"Mean Absolute Error (MAE)"
]
=
mae
metric_dictionary
[
"Mean Squared Error (MSE)"
]
=
mse
metric_dictionary
[
"Root Mean Squared Error (RMSE)"
]
=
np
.
sqrt
(
mse
)
metric_dictionary
[
"Mean Absolute Percentage Error (MAPE)"
]
=
mape
metric_dictionary
[
"Explained Variance Score"
]
=
explained_variance_score
metric_dictionary
[
"Max Error"
]
=
max_error
metric_dictionary
[
"Median Absolute Error"
]
=
median_absolute_error
metric_dictionary
[
"R2 Score"
]
=
r2_score
metric_dictionary
[
"Mean Gamma Deviance"
]
=
mean_gamma_deviance
metric_dictionary
[
"Mean Poisson Deviance"
]
=
mean_poisson_deviance
print
(
metric_dictionary
)
def
read_raw_data
(
raw_path
,
raw_skip_rows
):
try
:
df
=
pd
.
read_excel
(
raw_path
,
skiprows
=
raw_skip_rows
)
except
Exception
as
e
:
df
=
pd
.
read_csv
(
raw_path
)
if
len
(
df
.
columns
)
==
len
(
RawConstants
.
columns
):
logger
.
info
(
f
"Total cols are {len(RawConstants.columns)} and are same as the df cols length"
)
df
.
columns
=
RawConstants
.
columns
else
:
missed_cols
=
RawConstants
.
columns
[
len
(
df
.
columns
):]
logger
.
info
(
f
"missed cols are {missed_cols}"
)
for
col
in
missed_cols
:
df
[
col
]
=
float
(
'nan'
)
df
.
columns
=
RawConstants
.
columns
logger
.
info
(
f
"Shape of df is {df.shape}"
)
return
df
def
merged_all_sections
(
sheet_df
,
mixer_df
,
extruder_df
,
bof_df
,
pickup_df
,
viscosity_df
):
merged_df
=
pd
.
merge
(
sheet_df
,
mixer_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
extruder_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
bof_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
pickup_df
,
on
=
'batch-date'
,
how
=
'left'
)
df_grouped
=
pd
.
merge
(
merged_df
,
viscosity_df
,
on
=
'batch-date'
,
how
=
'left'
)
selected_cols
=
df_grouped
.
columns
df_grouped
=
df_grouped
[
df_grouped
[
'status'
]
==
True
]
df_grouped
=
df_grouped
[
selected_cols
]
viscosity_rubber_cols
=
[
'Weight_type1'
,
'Weight_type2'
,
'Weighted_PO_type'
,
'Weighted_DIRT_type'
,
'Weighted_ASH_type'
,
'Weighted_VM_type'
,
'Weighted_PRI_type'
,
'Weighted_NITROGEN_type'
,
'Weighted_Temperature during transportation_type[℃]'
,
'Weighted_Humidity during transportation__type[
%
]'
,
'Weighted Sum'
,
'viscosity'
]
# Replace 0 values with NaN
for
col
in
viscosity_rubber_cols
:
df_grouped
[
col
]
=
df_grouped
[
col
]
.
replace
(
0
,
np
.
nan
)
df_grouped
[
col
]
=
df_grouped
[
col
]
.
fillna
(
df_grouped
[
col
]
.
mean
())
# Extract batch number and date
batch_number
=
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'Batch_(\d+\.\d+)_'
)[
0
]
.
astype
(
float
)
date
=
pd
.
to_datetime
(
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'_(\d{4}-\d{2}-\d{2})$'
)[
0
])
# Add extracted data as separate columns
df_grouped
[
'Batch Number'
]
=
batch_number
df_grouped
[
'Date'
]
=
date
# Sort by 'Batch Number' and 'Date'
df_grouped
=
df_grouped
.
sort_values
(
by
=
[
'Date'
,
'Batch Number'
])
df_grouped
=
round
(
df_grouped
,
6
)
return
df_grouped
def
load_and_predict
(
df_grouped
,
index_no
,
model_path
):
if
index_no
==
1250
:
logger
.
info
(
f
"Loading model for {index_no}"
)
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
cols_x
=
[
'temperature_ws_side_std'
,
'_calendar_current_mean'
,
'Weighted_NITROGEN_type'
,
'ram_pressure_mean'
,
'electric_energy_mean'
,
'screw_operation_side_outlet_side_cooling_water_flow_rate_std'
,
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean'
,
'Weighted_VM_type'
,
'seat_temperature_immediately_after_bof_mean'
,
'Weighted_DIRT_type'
,
'surface_temperature_center_std'
,
'residence_time_max'
,
'drilled_side_left_exit_side_cooling_water_temperature_mean'
,
'Weighted_PRI_type'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std'
,
'Weighted_ASH_type'
,
'Weighted_PO_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'drilled_side_right_exit_side_cooling_water_flow_rate_std'
,
'Weighted_Humidity during transportation__type[
%
]'
]
cols_y
=
"viscosity"
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
y_pred_full
=
saved_model
.
predict
(
features
)
df_grouped
[
'predicted_viscosity'
]
=
y_pred_full
final_df
=
df_grouped
[[
'Date'
,
'Batch Number'
,
'predicted_viscosity'
]]
final_df
.
to_csv
(
f
'{index_no}_final_predicted_viscosity.csv'
)
elif
index_no
==
3294
:
logger
.
info
(
f
"Loading model for {index_no}"
)
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
model_path
})
.
load_model
()
cols_x
=
[
'Weighted_ASH_type'
,
'Weighted_NITROGEN_type'
,
'electric_energy_mean'
,
'drilled_side_left_inlet_side_cooling_water_temperature_std'
,
'seat_temperature_immediately_after_bof_mean'
,
'mixer_rotor_left_outlet_side_cooling_water_flow_rate_mean'
,
'humidity_mean'
,
'drilled_side_left_exit_side_cooling_water_flow_rate_mean'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'calendar_bank_load_max'
,
'drilled_side_right_inlet_side_cooling_water_flow_rate_mean'
,
'Weighted_PRI_type'
,
'mixer_rotor_right_inlet_side_cooling_water_flow_rate_mean'
,
'temperature_ws_side_std'
,
'dust_cv
\n
speed_std'
,
'mixer_rotor_right_inlet_side_coolant_temperature_mean'
,
'ram_position_std'
,
'drilled_side_right_exit_side_cooling_water_temperature_std'
,
'calender_roll_upper_side__outlet__side_cooling_water_temperature_std'
,
'Weighted_Temperature during transportation_type[℃]'
]
cols_y
=
"viscosity"
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
y_pred_full
=
saved_model
.
predict
(
features
)
df_grouped
[
'predicted_viscosity'
]
=
y_pred_full
final_df
=
df_grouped
[[
'Date'
,
'Batch Number'
,
'predicted_viscosity'
]]
final_df
.
to_csv
(
f
'{index_no}_final_predicted_viscosity.csv'
)
def
start_prediction
(
raw_path
,
viscosity_path
,
index_no
,
raw_skip_rows
,
viscosity_skip_rows
):
logger
.
info
(
f
"Starting prediction for {index_no}"
)
logger
.
info
(
"Reading raw file data"
)
df
=
read_raw_data
(
raw_path
,
raw_skip_rows
)
logger
.
info
(
f
"Shape of raw df is {df.shape}"
)
logger
.
info
(
"Starting preprocessing material section"
)
visc_df
=
pd
.
read_excel
(
viscosity_path
,
skiprows
=
viscosity_skip_rows
)
viscosity_df
,
raw_viscosity_df
=
preprocess_viscosity_section
(
visc_df
,
index_no
)
# viscosity_df.to_csv('viscosity-agg.csv')
logger
.
info
(
f
"The shape of the viscosity df is {viscosity_df.shape}"
)
logger
.
info
(
"Completed material section preprocessing"
)
logger
.
info
(
"Starting preprocessing sheet section"
)
df_sheet_grouped
=
preprocess_sheet_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Sheet df is {df_sheet_grouped.shape}"
)
logger
.
info
(
"Completed sheet section preprocessing"
)
# df_sheet_grouped.to_csv('sheet-agg.csv')
logger
.
info
(
"Starting preprocessing mixer section"
)
df_mixer_grouped
=
preprocess_mixer_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Mixer df is {df_mixer_grouped.shape}"
)
logger
.
info
(
"Completed mixer section preprocessing"
)
# df_mixer_grouped.to_csv('mixer-agg.csv')
logger
.
info
(
"Starting preprocessing extruder section"
)
df_extruder_grouped
=
preprocess_extruder_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_extruder_grouped.shape}"
)
logger
.
info
(
"Completed extruder section preprocessing"
)
# df_extruder_grouped.to_csv('extruder-agg.csv')
logger
.
info
(
"Starting preprocessing bof section"
)
df_bof_grouped
=
preprocess_bof_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the BOF df is {df_bof_grouped.shape}"
)
logger
.
info
(
"Completed bof section preprocessing"
)
# df_bof_grouped.to_csv('bof-agg.csv')
# bof_desc = df_bof_grouped.describe()
# bof_desc.to_csv('bof-describe.csv')
logger
.
info
(
"Starting preprocessing pickup section"
)
df_pickup_grouped
=
preprocess_pickup_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_pickup_grouped.shape}"
)
logger
.
info
(
"Completed pickup section preprocessing"
)
# df_pickup_grouped.to_csv('pickup-agg.csv')
# df = pd.read_csv('pickup-agg.csv')
# print(df.describe())
df_grouped
=
merged_all_sections
(
df_sheet_grouped
,
df_mixer_grouped
,
df_extruder_grouped
,
df_bof_grouped
,
df_pickup_grouped
,
viscosity_df
)
load_and_predict
(
df_grouped
,
index_no
,
model_path
)
# model_trainer(df_grouped, index_no, model_path)
if
__name__
==
"__main__"
:
try
:
raw_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-r'
)
+
1
]
viscosity_file_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-v'
)
+
1
]
raw_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sr'
)
+
1
])
viscosity_file_skip_rows
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-sv'
)
+
1
])
index_number
=
int
(
sys
.
argv
[
sys
.
argv
.
index
(
'-index'
)
+
1
])
model_path
=
sys
.
argv
[
sys
.
argv
.
index
(
'-m'
)
+
1
]
start_prediction
(
raw_file_path
,
viscosity_file_path
,
index_number
,
raw_file_skip_rows
,
viscosity_file_skip_rows
)
except
Exception
as
e
:
logger
.
exception
(
f
"Module failed because of error {e}"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment