Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bsj-models-for-rubbers
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
CI / CD Analytics
Repository Analytics
Value Stream Analytics
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dasharatha.vamshi
bsj-models-for-rubbers
Commits
648a1daf
Commit
648a1daf
authored
Dec 20, 2023
by
dasharatha.vamshi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
tested the pipeline for Fy676A
parent
685bff63
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
103 additions
and
12 deletions
+103
-12
app.py
app.py
+78
-11
models/fy676a/MLmodel
models/fy676a/MLmodel
+11
-0
models/fy676a/conda.yaml
models/fy676a/conda.yaml
+10
-0
models/fy676a/model.pkl
models/fy676a/model.pkl
+0
-0
models/fy676a/requirements.txt
models/fy676a/requirements.txt
+3
-0
scripts/core/model_trainer.py
scripts/core/model_trainer.py
+1
-1
No files found.
app.py
View file @
648a1daf
import
warnings
import
warnings
from
loguru
import
logger
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
loguru
import
logger
from
scripts.constants.constants
import
RawConstants
from
scripts.constants.constants
import
RawConstants
from
scripts.core.model_loader
import
ModelLoader
from
scripts.section_utils.bof_section
import
preprocess_bof_section
from
scripts.section_utils.bof_section
import
preprocess_bof_section
from
scripts.section_utils.extruder_section
import
preprocess_extruder_section
from
scripts.section_utils.extruder_section
import
preprocess_extruder_section
from
scripts.section_utils.material_section
import
preprocess_viscosity_section
from
scripts.section_utils.material_section
import
preprocess_viscosity_section
...
@@ -28,6 +31,66 @@ def read_raw_data(raw_path, raw_skip_rows):
...
@@ -28,6 +31,66 @@ def read_raw_data(raw_path, raw_skip_rows):
return
df
return
df
def
merged_all_sections
(
sheet_df
,
mixer_df
,
extruder_df
,
bof_df
,
pickup_df
,
viscosity_df
):
merged_df
=
pd
.
merge
(
sheet_df
,
mixer_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
extruder_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
bof_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
pickup_df
,
on
=
'batch-date'
,
how
=
'left'
)
df_grouped
=
pd
.
merge
(
merged_df
,
viscosity_df
,
on
=
'batch-date'
,
how
=
'left'
)
selected_cols
=
df_grouped
.
columns
df_grouped
=
df_grouped
[
selected_cols
]
viscosity_rubber_cols
=
[
'Weight_type1'
,
'Weight_type2'
,
'Weighted_PO_type'
,
'Weighted_DIRT_type'
,
'Weighted_ASH_type'
,
'Weighted_VM_type'
,
'Weighted_PRI_type'
,
'Weighted_NITROGEN_type'
,
'Weighted_Temperature during transportation_type[℃]'
,
'Weighted_Humidity during transportation__type[
%
]'
,
'Weighted Sum'
,
'viscosity'
]
# Replace 0 values with NaN
for
col
in
viscosity_rubber_cols
:
df_grouped
[
col
]
=
df_grouped
[
col
]
.
replace
(
0
,
np
.
nan
)
df_grouped
[
col
]
=
df_grouped
[
col
]
.
fillna
(
df_grouped
[
col
]
.
mean
())
# Extract batch number and date
batch_number
=
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'Batch_(\d+\.\d+)_'
)[
0
]
.
astype
(
float
)
date
=
pd
.
to_datetime
(
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'_(\d{4}-\d{2}-\d{2})$'
)[
0
])
# Add extracted data as separate columns
df_grouped
[
'Batch Number'
]
=
batch_number
df_grouped
[
'Date'
]
=
date
# Sort by 'Batch Number' and 'Date'
df_grouped
=
df_grouped
.
sort_values
(
by
=
[
'Date'
,
'Batch Number'
])
df_grouped
=
round
(
df_grouped
,
6
)
return
df_grouped
def
load_and_predict
(
df_grouped
,
index_no
):
if
index_no
==
1250
:
logger
.
info
(
f
"Loading model for {index_no}"
)
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
"models/fy676a"
})
.
load_model
()
cols_x
=
[
'temperature_ws_side_std'
,
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean'
,
'_calendar_current_mean'
,
'electric_energy_mean'
,
'seat_temperature_immediately_after_bof_mean'
,
'Weighted_NITROGEN_type'
,
'ram_pressure_mean'
,
'surface_temperature_center_std'
,
'drilled_side_left_exit_side_cooling_water_temperature_mean'
,
'Weighted_VM_type'
,
'screw_operation_side_outlet_side_cooling_water_flow_rate_std'
,
'Weighted_DIRT_type'
,
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std'
,
'residence_time_max'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'Weighted_ASH_type'
,
'Weighted_PO_type'
,
'drilled_side_right_exit_side_cooling_water_flow_rate_std'
]
cols_y
=
"viscosity"
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
y_pred_full
=
saved_model
.
predict
(
features
)
df_grouped
[
'predicted_viscosity'
]
=
y_pred_full
final_df
=
df_grouped
[[
'Date'
,
'Batch Number'
,
'predicted_viscosity'
]]
final_df
.
to_csv
(
'final_predicted_viscosity.csv'
)
elif
index_no
==
3294
:
logger
.
info
(
f
"Loading model for {index_no}"
)
def
start_prediction
(
raw_path
,
viscosity_path
,
index_no
,
raw_skip_rows
,
viscosity_skip_rows
):
def
start_prediction
(
raw_path
,
viscosity_path
,
index_no
,
raw_skip_rows
,
viscosity_skip_rows
):
logger
.
info
(
"Reading raw file data"
)
logger
.
info
(
"Reading raw file data"
)
df
=
read_raw_data
(
raw_path
,
raw_skip_rows
)
df
=
read_raw_data
(
raw_path
,
raw_skip_rows
)
...
@@ -35,7 +98,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
...
@@ -35,7 +98,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
logger
.
info
(
"Starting preprocessing material section"
)
logger
.
info
(
"Starting preprocessing material section"
)
viscosity_df
,
raw_viscosity_df
=
preprocess_viscosity_section
(
viscosity_path
,
index_no
,
viscosity_skip_rows
)
viscosity_df
,
raw_viscosity_df
=
preprocess_viscosity_section
(
viscosity_path
,
index_no
,
viscosity_skip_rows
)
viscosity_df
.
to_csv
(
'viscosity-agg.csv'
)
#
viscosity_df.to_csv('viscosity-agg.csv')
logger
.
info
(
f
"The shape of the viscosity df is {viscosity_df.shape}"
)
logger
.
info
(
f
"The shape of the viscosity df is {viscosity_df.shape}"
)
logger
.
info
(
"Completed material section preprocessing"
)
logger
.
info
(
"Completed material section preprocessing"
)
...
@@ -43,35 +106,39 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
...
@@ -43,35 +106,39 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_sheet_grouped
=
preprocess_sheet_section
(
df
,
index_no
)
df_sheet_grouped
=
preprocess_sheet_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Sheet df is {df_sheet_grouped.shape}"
)
logger
.
info
(
f
"The shape of the Sheet df is {df_sheet_grouped.shape}"
)
logger
.
info
(
"Completed sheet section preprocessing"
)
logger
.
info
(
"Completed sheet section preprocessing"
)
df_sheet_grouped
.
to_csv
(
'sheet-agg.csv'
)
#
df_sheet_grouped.to_csv('sheet-agg.csv')
logger
.
info
(
"Starting preprocessing mixer section"
)
logger
.
info
(
"Starting preprocessing mixer section"
)
df_mixer_grouped
=
preprocess_mixer_section
(
df
,
index_no
)
df_mixer_grouped
=
preprocess_mixer_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Mixer df is {df_mixer_grouped.shape}"
)
logger
.
info
(
f
"The shape of the Mixer df is {df_mixer_grouped.shape}"
)
logger
.
info
(
"Completed mixer section preprocessing"
)
logger
.
info
(
"Completed mixer section preprocessing"
)
df_mixer_grouped
.
to_csv
(
'mixer-agg.csv'
)
#
df_mixer_grouped.to_csv('mixer-agg.csv')
logger
.
info
(
"Starting preprocessing extruder section"
)
logger
.
info
(
"Starting preprocessing extruder section"
)
df_extruder_grouped
=
preprocess_extruder_section
(
df
,
index_no
,
raw_viscosity_df
)
df_extruder_grouped
=
preprocess_extruder_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_extruder_grouped.shape}"
)
logger
.
info
(
f
"The shape of the Extruder df is {df_extruder_grouped.shape}"
)
logger
.
info
(
"Completed extruder section preprocessing"
)
logger
.
info
(
"Completed extruder section preprocessing"
)
df_extruder_grouped
.
to_csv
(
'extruder-agg.csv'
)
#
df_extruder_grouped.to_csv('extruder-agg.csv')
logger
.
info
(
"Starting preprocessing bof section"
)
logger
.
info
(
"Starting preprocessing bof section"
)
df_bof_grouped
=
preprocess_bof_section
(
df
,
index_no
,
raw_viscosity_df
)
df_bof_grouped
=
preprocess_bof_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the BOF df is {df_bof_grouped.shape}"
)
logger
.
info
(
f
"The shape of the BOF df is {df_bof_grouped.shape}"
)
logger
.
info
(
"Completed bof section preprocessing"
)
logger
.
info
(
"Completed bof section preprocessing"
)
df_bof_grouped
.
to_csv
(
'bof-agg.csv'
)
#
df_bof_grouped.to_csv('bof-agg.csv')
bof_desc
=
df_bof_grouped
.
describe
()
#
bof_desc = df_bof_grouped.describe()
bof_desc
.
to_csv
(
'bof-describe.csv'
)
#
bof_desc.to_csv('bof-describe.csv')
logger
.
info
(
"Starting preprocessing pickup section"
)
logger
.
info
(
"Starting preprocessing pickup section"
)
df_pickup_grouped
=
preprocess_pickup_section
(
df
,
index_no
,
raw_viscosity_df
)
df_pickup_grouped
=
preprocess_pickup_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_pickup_grouped.shape}"
)
logger
.
info
(
f
"The shape of the Extruder df is {df_pickup_grouped.shape}"
)
logger
.
info
(
"Completed pickup section preprocessing"
)
logger
.
info
(
"Completed pickup section preprocessing"
)
df_pickup_grouped
.
to_csv
(
'pickup-agg.csv'
)
# df_pickup_grouped.to_csv('pickup-agg.csv')
df
=
pd
.
read_csv
(
'pickup-agg.csv'
)
# df = pd.read_csv('pickup-agg.csv')
print
(
df
.
describe
())
# print(df.describe())
df_grouped
=
merged_all_sections
(
df_sheet_grouped
,
df_mixer_grouped
,
df_extruder_grouped
,
df_bof_grouped
,
df_pickup_grouped
,
viscosity_df
)
load_and_predict
(
df_grouped
,
index_no
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
models/fy676a/MLmodel
0 → 100644
View file @
648a1daf
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.13
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-20 06:24:57.321465'
models/fy676a/conda.yaml
0 → 100644
View file @
648a1daf
channels
:
-
conda-forge
dependencies
:
-
python=3.10.13
-
pip
-
pip
:
-
mlflow
-
cloudpickle==3.0.0
-
scikit-learn==1.2.2
name
:
mlflow-env
models/fy676a/model.pkl
0 → 100644
View file @
648a1daf
File added
models/fy676a/requirements.txt
0 → 100644
View file @
648a1daf
mlflow
cloudpickle==3.0.0
scikit-learn==1.2.2
\ No newline at end of file
scripts/core/model_trainer.py
View file @
648a1daf
...
@@ -33,7 +33,7 @@ def model_trainer():
...
@@ -33,7 +33,7 @@ def model_trainer():
merged_df
=
pd
.
merge
(
merged_df
,
bof_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
bof_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
pickup_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
pickup_df
,
on
=
'batch-date'
,
how
=
'left'
)
df_grouped
=
pd
.
merge
(
merged_df
,
viscosity_df
,
on
=
'batch-date'
,
how
=
'left'
)
df_grouped
=
pd
.
merge
(
merged_df
,
viscosity_df
,
on
=
'batch-date'
,
how
=
'left'
)
print
(
df_grouped
.
columns
)
#
print(df_grouped.columns)
selected_cols
=
df_grouped
.
columns
selected_cols
=
df_grouped
.
columns
df_grouped
=
df_grouped
[
selected_cols
]
df_grouped
=
df_grouped
[
selected_cols
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment