Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bsj-models-for-rubbers
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
CI / CD Analytics
Repository Analytics
Value Stream Analytics
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dasharatha.vamshi
bsj-models-for-rubbers
Commits
648a1daf
Commit
648a1daf
authored
Dec 20, 2023
by
dasharatha.vamshi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
tested the pipeline for Fy676A
parent
685bff63
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
103 additions
and
12 deletions
+103
-12
app.py
app.py
+78
-11
models/fy676a/MLmodel
models/fy676a/MLmodel
+11
-0
models/fy676a/conda.yaml
models/fy676a/conda.yaml
+10
-0
models/fy676a/model.pkl
models/fy676a/model.pkl
+0
-0
models/fy676a/requirements.txt
models/fy676a/requirements.txt
+3
-0
scripts/core/model_trainer.py
scripts/core/model_trainer.py
+1
-1
No files found.
app.py
View file @
648a1daf
import
warnings
from
loguru
import
logger
import
numpy
as
np
import
pandas
as
pd
from
loguru
import
logger
from
scripts.constants.constants
import
RawConstants
from
scripts.core.model_loader
import
ModelLoader
from
scripts.section_utils.bof_section
import
preprocess_bof_section
from
scripts.section_utils.extruder_section
import
preprocess_extruder_section
from
scripts.section_utils.material_section
import
preprocess_viscosity_section
...
...
@@ -28,6 +31,66 @@ def read_raw_data(raw_path, raw_skip_rows):
return
df
def
merged_all_sections
(
sheet_df
,
mixer_df
,
extruder_df
,
bof_df
,
pickup_df
,
viscosity_df
):
merged_df
=
pd
.
merge
(
sheet_df
,
mixer_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
extruder_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
bof_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
pickup_df
,
on
=
'batch-date'
,
how
=
'left'
)
df_grouped
=
pd
.
merge
(
merged_df
,
viscosity_df
,
on
=
'batch-date'
,
how
=
'left'
)
selected_cols
=
df_grouped
.
columns
df_grouped
=
df_grouped
[
selected_cols
]
viscosity_rubber_cols
=
[
'Weight_type1'
,
'Weight_type2'
,
'Weighted_PO_type'
,
'Weighted_DIRT_type'
,
'Weighted_ASH_type'
,
'Weighted_VM_type'
,
'Weighted_PRI_type'
,
'Weighted_NITROGEN_type'
,
'Weighted_Temperature during transportation_type[℃]'
,
'Weighted_Humidity during transportation__type[
%
]'
,
'Weighted Sum'
,
'viscosity'
]
# Replace 0 values with NaN
for
col
in
viscosity_rubber_cols
:
df_grouped
[
col
]
=
df_grouped
[
col
]
.
replace
(
0
,
np
.
nan
)
df_grouped
[
col
]
=
df_grouped
[
col
]
.
fillna
(
df_grouped
[
col
]
.
mean
())
# Extract batch number and date
batch_number
=
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'Batch_(\d+\.\d+)_'
)[
0
]
.
astype
(
float
)
date
=
pd
.
to_datetime
(
df_grouped
[
'batch-date'
]
.
str
.
extract
(
r'_(\d{4}-\d{2}-\d{2})$'
)[
0
])
# Add extracted data as separate columns
df_grouped
[
'Batch Number'
]
=
batch_number
df_grouped
[
'Date'
]
=
date
# Sort by 'Batch Number' and 'Date'
df_grouped
=
df_grouped
.
sort_values
(
by
=
[
'Date'
,
'Batch Number'
])
df_grouped
=
round
(
df_grouped
,
6
)
return
df_grouped
def
load_and_predict
(
df_grouped
,
index_no
):
if
index_no
==
1250
:
logger
.
info
(
f
"Loading model for {index_no}"
)
saved_model
=
ModelLoader
({
"type"
:
"mlflow.sklearn"
,
"path"
:
"models/fy676a"
})
.
load_model
()
cols_x
=
[
'temperature_ws_side_std'
,
'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean'
,
'_calendar_current_mean'
,
'electric_energy_mean'
,
'seat_temperature_immediately_after_bof_mean'
,
'Weighted_NITROGEN_type'
,
'ram_pressure_mean'
,
'surface_temperature_center_std'
,
'drilled_side_left_exit_side_cooling_water_temperature_mean'
,
'Weighted_VM_type'
,
'screw_operation_side_outlet_side_cooling_water_flow_rate_std'
,
'Weighted_DIRT_type'
,
'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std'
,
'residence_time_max'
,
'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean'
,
'Weighted_ASH_type'
,
'Weighted_PO_type'
,
'drilled_side_right_exit_side_cooling_water_flow_rate_std'
]
cols_y
=
"viscosity"
features
=
df_grouped
[
cols_x
]
labels
=
df_grouped
[
cols_y
]
y_pred_full
=
saved_model
.
predict
(
features
)
df_grouped
[
'predicted_viscosity'
]
=
y_pred_full
final_df
=
df_grouped
[[
'Date'
,
'Batch Number'
,
'predicted_viscosity'
]]
final_df
.
to_csv
(
'final_predicted_viscosity.csv'
)
elif
index_no
==
3294
:
logger
.
info
(
f
"Loading model for {index_no}"
)
def
start_prediction
(
raw_path
,
viscosity_path
,
index_no
,
raw_skip_rows
,
viscosity_skip_rows
):
logger
.
info
(
"Reading raw file data"
)
df
=
read_raw_data
(
raw_path
,
raw_skip_rows
)
...
...
@@ -35,7 +98,7 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
logger
.
info
(
"Starting preprocessing material section"
)
viscosity_df
,
raw_viscosity_df
=
preprocess_viscosity_section
(
viscosity_path
,
index_no
,
viscosity_skip_rows
)
viscosity_df
.
to_csv
(
'viscosity-agg.csv'
)
#
viscosity_df.to_csv('viscosity-agg.csv')
logger
.
info
(
f
"The shape of the viscosity df is {viscosity_df.shape}"
)
logger
.
info
(
"Completed material section preprocessing"
)
...
...
@@ -43,35 +106,39 @@ def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosit
df_sheet_grouped
=
preprocess_sheet_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Sheet df is {df_sheet_grouped.shape}"
)
logger
.
info
(
"Completed sheet section preprocessing"
)
df_sheet_grouped
.
to_csv
(
'sheet-agg.csv'
)
#
df_sheet_grouped.to_csv('sheet-agg.csv')
logger
.
info
(
"Starting preprocessing mixer section"
)
df_mixer_grouped
=
preprocess_mixer_section
(
df
,
index_no
)
logger
.
info
(
f
"The shape of the Mixer df is {df_mixer_grouped.shape}"
)
logger
.
info
(
"Completed mixer section preprocessing"
)
df_mixer_grouped
.
to_csv
(
'mixer-agg.csv'
)
#
df_mixer_grouped.to_csv('mixer-agg.csv')
logger
.
info
(
"Starting preprocessing extruder section"
)
df_extruder_grouped
=
preprocess_extruder_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_extruder_grouped.shape}"
)
logger
.
info
(
"Completed extruder section preprocessing"
)
df_extruder_grouped
.
to_csv
(
'extruder-agg.csv'
)
#
df_extruder_grouped.to_csv('extruder-agg.csv')
logger
.
info
(
"Starting preprocessing bof section"
)
df_bof_grouped
=
preprocess_bof_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the BOF df is {df_bof_grouped.shape}"
)
logger
.
info
(
"Completed bof section preprocessing"
)
df_bof_grouped
.
to_csv
(
'bof-agg.csv'
)
bof_desc
=
df_bof_grouped
.
describe
()
bof_desc
.
to_csv
(
'bof-describe.csv'
)
#
df_bof_grouped.to_csv('bof-agg.csv')
#
bof_desc = df_bof_grouped.describe()
#
bof_desc.to_csv('bof-describe.csv')
logger
.
info
(
"Starting preprocessing pickup section"
)
df_pickup_grouped
=
preprocess_pickup_section
(
df
,
index_no
,
raw_viscosity_df
)
logger
.
info
(
f
"The shape of the Extruder df is {df_pickup_grouped.shape}"
)
logger
.
info
(
"Completed pickup section preprocessing"
)
df_pickup_grouped
.
to_csv
(
'pickup-agg.csv'
)
df
=
pd
.
read_csv
(
'pickup-agg.csv'
)
print
(
df
.
describe
())
# df_pickup_grouped.to_csv('pickup-agg.csv')
# df = pd.read_csv('pickup-agg.csv')
# print(df.describe())
df_grouped
=
merged_all_sections
(
df_sheet_grouped
,
df_mixer_grouped
,
df_extruder_grouped
,
df_bof_grouped
,
df_pickup_grouped
,
viscosity_df
)
load_and_predict
(
df_grouped
,
index_no
)
if
__name__
==
"__main__"
:
...
...
models/fy676a/MLmodel
0 → 100644
View file @
648a1daf
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.13
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-20 06:24:57.321465'
models/fy676a/conda.yaml
0 → 100644
View file @
648a1daf
channels
:
-
conda-forge
dependencies
:
-
python=3.10.13
-
pip
-
pip
:
-
mlflow
-
cloudpickle==3.0.0
-
scikit-learn==1.2.2
name
:
mlflow-env
models/fy676a/model.pkl
0 → 100644
View file @
648a1daf
File added
models/fy676a/requirements.txt
0 → 100644
View file @
648a1daf
mlflow
cloudpickle==3.0.0
scikit-learn==1.2.2
\ No newline at end of file
scripts/core/model_trainer.py
View file @
648a1daf
...
...
@@ -33,7 +33,7 @@ def model_trainer():
merged_df
=
pd
.
merge
(
merged_df
,
bof_df
,
on
=
'batch-date'
,
how
=
'left'
)
merged_df
=
pd
.
merge
(
merged_df
,
pickup_df
,
on
=
'batch-date'
,
how
=
'left'
)
df_grouped
=
pd
.
merge
(
merged_df
,
viscosity_df
,
on
=
'batch-date'
,
how
=
'left'
)
print
(
df_grouped
.
columns
)
#
print(df_grouped.columns)
selected_cols
=
df_grouped
.
columns
df_grouped
=
df_grouped
[
selected_cols
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment