You need to sign in or sign up before continuing.
Commit a752455a authored by dasharatha.vamshi's avatar dasharatha.vamshi

added code for Fy676A

parent 402087d1
This diff is collapsed.
import warnings
from loguru import logger
import pandas as pd
from scripts.constants.constants import RawConstants
from scripts.section_utils.bof_section import preprocess_bof_section
from scripts.section_utils.extruder_section import preprocess_extruder_section
from scripts.section_utils.material_section import preprocess_viscosity_section
from scripts.section_utils.mixer_section import preprocess_mixer_section
from scripts.section_utils.pickup_section import preprocess_pickup_section
from scripts.section_utils.sheet_supply_section import preprocess_sheet_section
warnings.filterwarnings("ignore")
def read_raw_data(raw_path, raw_skip_rows):
df = pd.read_excel(raw_path, skiprows=raw_skip_rows)
if len(df.columns) == len(RawConstants.columns):
logger.info(f"Total cols are {len(RawConstants.columns)} and are same as the df cols length")
df.columns = RawConstants.columns
else:
missed_cols = RawConstants.columns[len(df.columns):]
logger.info(f"missed cols are {missed_cols}")
for col in missed_cols:
df[col] = float('nan')
df.columns = RawConstants.columns
logger.info(f"Shape of df is {df.shape}")
return df
def start_prediction(raw_path, viscosity_path, index_no, raw_skip_rows, viscosity_skip_rows):
logger.info("Reading raw file data")
df = read_raw_data(raw_path, raw_skip_rows)
logger.info(f"Shape of raw df is {df.shape}")
logger.info("Starting preprocessing material section")
viscosity_df, raw_viscosity_df = preprocess_viscosity_section(viscosity_path, index_no, viscosity_skip_rows)
viscosity_df.to_csv('viscosity-agg.csv')
logger.info(f"The shape of the viscosity df is {viscosity_df.shape}")
logger.info("Completed material section preprocessing")
logger.info("Starting preprocessing sheet section")
df_sheet_grouped = preprocess_sheet_section(df, index_no)
logger.info(f"The shape of the Sheet df is {df_sheet_grouped.shape}")
logger.info("Completed sheet section preprocessing")
df_sheet_grouped.to_csv('sheet-agg.csv')
logger.info("Starting preprocessing mixer section")
df_mixer_grouped = preprocess_mixer_section(df, index_no)
logger.info(f"The shape of the Mixer df is {df_mixer_grouped.shape}")
logger.info("Completed mixer section preprocessing")
df_mixer_grouped.to_csv('mixer-agg.csv')
logger.info("Starting preprocessing extruder section")
df_extruder_grouped = preprocess_extruder_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_extruder_grouped.shape}")
logger.info("Completed extruder section preprocessing")
df_extruder_grouped.to_csv('extruder-agg.csv')
logger.info("Starting preprocessing bof section")
df_bof_grouped = preprocess_bof_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the BOF df is {df_bof_grouped.shape}")
logger.info("Completed bof section preprocessing")
df_bof_grouped.to_csv('bof-agg.csv')
bof_desc = df_bof_grouped.describe()
bof_desc.to_csv('bof-describe.csv')
logger.info("Starting preprocessing pickup section")
df_pickup_grouped = preprocess_pickup_section(df, index_no, raw_viscosity_df)
logger.info(f"The shape of the Extruder df is {df_pickup_grouped.shape}")
logger.info("Completed pickup section preprocessing")
df_pickup_grouped.to_csv('pickup-agg.csv')
df = pd.read_csv('pickup-agg.csv')
print(df.describe())
if __name__ == "__main__":
try:
logger.info("Starting the model")
index_number = 1250
raw_file_path = 'FY676-A-WO_Visc.xlsx'
raw_file_skip_rows = 0
viscosity_file_path = 'viscosity_natural_rubber_data.xlsx'
viscosity_file_skip_rows = 3
start_prediction(raw_file_path, viscosity_file_path, index_number, raw_file_skip_rows, viscosity_file_skip_rows)
except Exception as e:
logger.exception(f"Module failed because of error {e}")
loguru==0.5.3
numpy==1.22.3
openpyxl==3.1.2
pandas==1.5.3
mlflow==1.20.2
protobuf==3.20.1
scikit-learn==1.2.2
loguru==0.5.3
mlflow==1.20.2
numpy==1.22.3
openpyxl==3.1.2
pandas==1.5.3
pytz==2023.3.post1
requests==2.31.0
scikit-base==0.5.2
scikit-learn==1.2.2
scikit-plot==0.3.7
scipy==1.10.1
six==1.16.0
statsmodels==0.14.1
xlrd==2.0.1
This diff is collapsed.
import warnings
from loguru import logger
import mlflow
warnings.filterwarnings("ignore")
class ModelLoader(object):
def __init__(self, model_info):
self.model_info = model_info
def load_model(self):
logger.info("Loading the Model")
if self.model_info["type"] == "mlflow.sklearn":
return self._load_mlflow_sklearn_model()
else:
logger.info("Unsupported Model Type")
def _load_mlflow_sklearn_model(self):
try:
_model = mlflow.sklearn.load_model(self.model_info["path"])
logger.debug("Model loaded successfully!")
return _model
except Exception as e:
logger.error("Error while loading mlflow.sklearn model : {}".format(str(e)))
import warnings
import mlflow
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
from scripts.core.model_loader import ModelLoader
from scripts.section_utils.mlflow_util import ModelLoaderSaver
warnings.filterwarnings("ignore")
def model_trainer():
sheet_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\sheet-agg.csv')
mixer_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\mixer-agg.csv')
extruder_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\extruder-agg.csv')
bof_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\bof-agg.csv')
pickup_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\pickup-agg.csv')
viscosity_df = pd.read_csv(r'D:\kalypso\bsj-model-inference\viscosity-agg.csv')
# viscosity_df = viscosity_df[['batch-date', 'viscosity']]
merged_df = pd.merge(sheet_df, mixer_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, extruder_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, bof_df, on='batch-date', how='left')
merged_df = pd.merge(merged_df, pickup_df, on='batch-date', how='left')
df_grouped = pd.merge(merged_df, viscosity_df, on='batch-date', how='left')
print(df_grouped.columns)
selected_cols = df_grouped.columns
df_grouped = df_grouped[selected_cols]
# Extract batch number and date
batch_number = df_grouped['batch-date'].str.extract(r'Batch_(\d+\.\d+)_')[0].astype(float)
date = pd.to_datetime(df_grouped['batch-date'].str.extract(r'_(\d{4}-\d{2}-\d{2})$')[0])
# Add extracted data as separate columns
df_grouped['Batch Number'] = batch_number
df_grouped['Date'] = date
# Sort by 'Batch Number' and 'Date'
df_grouped = df_grouped.sort_values(by=['Date', 'Batch Number'])
df_grouped = pd.read_csv(r"D:\kalypso\bsj-model-inference\test-agg-data.csv")
df_grouped = round(df_grouped, 6)
df_grouped.to_csv('grouped.csv')
cols_x = ['temperature_ws_side_std', 'Weighted_VM_type', 'electric_energy_mean', 'calender_roll_upper_side_inlet_side_cooling_water_temperature_mean', '_calendar_current_mean', 'Weighted_NITROGEN_type', 'ram_pressure_mean', 'seat_temperature_immediately_after_bof_mean', 'surface_temperature_center_std', 'screw_operation_side_outlet_side_cooling_water_flow_rate_std', 'Weighted_DIRT_type', 'drilled_side_left_exit_side_cooling_water_temperature_mean', 'sheet_temperature_immediately_after_calendering_mean', 'calender_roll_lower_side_inlet_side_cooling_water_temperature_mean', 'temperature_mean', 'calender_roll_lower_side_inlet_side_cooling_water_flow_rate_mean', 'screw_opposite_operation_side_outlet_side_cooling_water_temperature_std', 'temperature_ds_side_mean', 'Weighted_PRI_type', 'residence_time_max']
cols_y = "viscosity"
req_cols = cols_x + ['viscosity']
# df_grouped = round(df_grouped, 2)
features = df_grouped[cols_x]
# print(features.info())
# print(features.describe().to_csv('feature.csv'))
# print(df_grouped[req_cols].isnull().sum())
# df_grouped = round(df_grouped,2)
# df_grouped = pd.read_csv(r'D:\kalypso\bsj-model-inference\test-gr.csv')
labels = df_grouped[cols_y]
# print(df_grouped[cols_y].describe())
df_grouped[req_cols].to_csv('final.csv')
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features, labels, random_state=42, test_size=0.25)
print(f'x_train shape - {x_train.shape}')
print(f'x_test shape - {x_test.shape}')
print(f'y_train shape - {y_train.shape}')
print(f'y_test shape - {y_test.shape}')
params = {'bootstrap': False,
'ccp_alpha': 0.0,
'criterion': 'squared_error',
'max_depth': None,
'max_features': 1.0,
'max_leaf_nodes': None,
'max_samples': None,
'min_impurity_decrease': 0.0,
'min_samples_leaf': 1,
'min_samples_split': 2,
'min_weight_fraction_leaf': 0.0,
'n_estimators': 100,
'n_jobs': -1,
'oob_score': False,
'random_state': 123,
'verbose': 0,
'warm_start': False}
model = ExtraTreesRegressor(**params)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
predictions = [round(value, 2) for value in y_pred]
metric_dictionary = dict()
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
mape = metrics.mean_absolute_percentage_error(y_test, predictions)
explained_variance_score = metrics.explained_variance_score(y_test, predictions)
max_error = metrics.max_error(y_test, predictions)
r2_score = metrics.r2_score(y_test, predictions)
median_absolute_error = metrics.median_absolute_error(y_test, predictions)
mean_poisson_deviance = metrics.mean_poisson_deviance(y_test, predictions)
mean_gamma_deviance = metrics.mean_gamma_deviance(y_test, predictions)
metric_dictionary["Mean Absolute Error (MAE)"] = mae
metric_dictionary["Mean Squared Error (MSE)"] = mse
metric_dictionary["Root Mean Squared Error (RMSE)"] = np.sqrt(mse)
metric_dictionary["Mean Absolute Percentage Error (MAPE)"] = mape
metric_dictionary["Explained Variance Score"] = explained_variance_score
metric_dictionary["Max Error"] = max_error
metric_dictionary["Median Absolute Error"] = median_absolute_error
metric_dictionary["R2 Score"] = r2_score
metric_dictionary["Mean Gamma Deviance"] = mean_gamma_deviance
metric_dictionary["Mean Poisson Deviance"] = mean_poisson_deviance
print(metric_dictionary)
experiment_name = "BSJ-Models"
parent_run_name = model_save_name = model_type = "fy676a"
list_of_models = ['rf', 'xgboost', 'lr']
obj = ModelLoaderSaver(None, metric_dictionary, params, experiment_name, parent_run_name, model_save_name, model_type)
new_model = obj.get_latest_model()
y_pred = new_model.predict(x_test)
predictions = [round(value, 2) for value in y_pred]
metric_dictionary = dict()
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
mape = metrics.mean_absolute_percentage_error(y_test, predictions)
explained_variance_score = metrics.explained_variance_score(y_test, predictions)
max_error = metrics.max_error(y_test, predictions)
r2_score = metrics.r2_score(y_test, predictions)
median_absolute_error = metrics.median_absolute_error(y_test, predictions)
mean_poisson_deviance = metrics.mean_poisson_deviance(y_test, predictions)
mean_gamma_deviance = metrics.mean_gamma_deviance(y_test, predictions)
metric_dictionary["Mean Absolute Error (MAE)"] = mae
metric_dictionary["Mean Squared Error (MSE)"] = mse
metric_dictionary["Root Mean Squared Error (RMSE)"] = np.sqrt(mse)
metric_dictionary["Mean Absolute Percentage Error (MAPE)"] = mape
metric_dictionary["Explained Variance Score"] = explained_variance_score
metric_dictionary["Max Error"] = max_error
metric_dictionary["Median Absolute Error"] = median_absolute_error
metric_dictionary["R2 Score"] = r2_score
metric_dictionary["Mean Gamma Deviance"] = mean_gamma_deviance
metric_dictionary["Mean Poisson Deviance"] = mean_poisson_deviance
print(metric_dictionary)
# mlflow.sklearn.save_model(new_model, "models/fy676a")
saved_model = ModelLoader({
"type": "mlflow.sklearn",
"path": "models/fy676a"
}).load_model()
y_pred = saved_model.predict(x_test)
predictions = [round(value, 2) for value in y_pred]
metric_dictionary = dict()
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
mape = metrics.mean_absolute_percentage_error(y_test, predictions)
explained_variance_score = metrics.explained_variance_score(y_test, predictions)
max_error = metrics.max_error(y_test, predictions)
r2_score = metrics.r2_score(y_test, predictions)
median_absolute_error = metrics.median_absolute_error(y_test, predictions)
mean_poisson_deviance = metrics.mean_poisson_deviance(y_test, predictions)
mean_gamma_deviance = metrics.mean_gamma_deviance(y_test, predictions)
metric_dictionary["Mean Absolute Error (MAE)"] = mae
metric_dictionary["Mean Squared Error (MSE)"] = mse
metric_dictionary["Root Mean Squared Error (RMSE)"] = np.sqrt(mse)
metric_dictionary["Mean Absolute Percentage Error (MAPE)"] = mape
metric_dictionary["Explained Variance Score"] = explained_variance_score
metric_dictionary["Max Error"] = max_error
metric_dictionary["Median Absolute Error"] = median_absolute_error
metric_dictionary["R2 Score"] = r2_score
metric_dictionary["Mean Gamma Deviance"] = mean_gamma_deviance
metric_dictionary["Mean Poisson Deviance"] = mean_poisson_deviance
print(metric_dictionary)
model_trainer()
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.13
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 1.2.2
utc_time_created: '2023-12-18 12:12:42.185881'
channels:
- conda-forge
dependencies:
- python=3.10.13
- pip
- pip:
- mlflow
- cloudpickle==3.0.0
- scikit-learn==1.2.2
name: mlflow-env
mlflow
cloudpickle==3.0.0
scikit-learn==1.2.2
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
import warnings
import pandas as pd
from scripts.constants.constants import ViscosityConstants
warnings.filterwarnings("ignore")
def preprocess_viscosity_section(path, index_number,viscosity_skip_rows):
viscosity_df = pd.read_excel(path, skiprows=viscosity_skip_rows)
# adding date col to the viscosity df
viscosity_df = viscosity_df.sort_values(by='Mixing date')
raw_viscosity_df = viscosity_df.sort_values(by='Mixing date')
viscosity_df['date'] = viscosity_df['Mixing date'].dt.date
viscosity_df['batch-date'] = 'Batch_' + viscosity_df['Batch No.'].astype(str) + '_' + viscosity_df['date'].astype(
str)
viscosity_df = viscosity_df[viscosity_df['Index No'] == index_number]
rubber_cols = ViscosityConstants.rubber_cols
# Replace '-' with 0 for numerical and float columns
viscosity_df[rubber_cols] = viscosity_df[rubber_cols].replace('-', 0)
viscosity_df[rubber_cols] = viscosity_df[rubber_cols].apply(pd.to_numeric, errors='coerce')
# Identify numerical and float columns
numerical_cols = viscosity_df.columns[
viscosity_df.dtypes.apply(lambda x: pd.api.types.is_numeric_dtype(x) or pd.api.types.is_float_dtype(x))]
integer_cols = viscosity_df.columns[viscosity_df.dtypes == 'int64']
# Convert integer columns to float
viscosity_df[integer_cols] = viscosity_df[integer_cols].astype(float)
# Calculate weights
viscosity_df['Weight_type1'] = round(viscosity_df['Quantity using type1 bale'] / (
viscosity_df['Quantity using type1 bale'] + viscosity_df['Quantity using type2 bale']), 2)
viscosity_df['Weight_type2'] = round(viscosity_df['Quantity using type2 bale'] / (
viscosity_df['Quantity using type1 bale'] + viscosity_df['Quantity using type2 bale']), 2)
viscosity_df['Weighted_PO_type'] = (
viscosity_df['PO_type1'] * viscosity_df['Weight_type1'] + viscosity_df[f'PO_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_DIRT_type'] = (
viscosity_df['DIRT_type1'] * viscosity_df['Weight_type1'] + viscosity_df['DIRT_type1.1'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_ASH_type'] = (
viscosity_df['ASH_type1'] * viscosity_df['Weight_type1'] + viscosity_df['ASH_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_VM_type'] = (
viscosity_df['VM_type1'] * viscosity_df['Weight_type1'] + viscosity_df['VM_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_PRI_type'] = (
viscosity_df['PRI_type1'] * viscosity_df['Weight_type1'] + viscosity_df[f'PRI_type2'] * viscosity_df[
'Weight_type2'])
viscosity_df['Weighted_NITROGEN_type'] = (
viscosity_df['NITROGEN_type1'] * viscosity_df['Weight_type1'] + viscosity_df['NITROGEN_type2'] *
viscosity_df['Weight_type2'])
viscosity_df['Weighted_Temperature during transportation_type[℃]'] = (
viscosity_df['Temperature during transportation_type1[℃]'] * viscosity_df['Weight_type1'] +
viscosity_df['Temperature during transportation_type2[℃]'] * viscosity_df['Weight_type2'])
viscosity_df['Weighted_Humidity during transportation__type[%]'] = (
viscosity_df['Humidity during transportation_type1[%]'] * viscosity_df['Weight_type1'] +
viscosity_df['Humidity during transportation__type2[%]'] * viscosity_df['Weight_type2'])
viscosity_df['Weighted Sum'] = viscosity_df['Weighted_PO_type'] + viscosity_df['Weighted_DIRT_type'] + viscosity_df[
'Weighted_ASH_type'] + viscosity_df['Weighted_VM_type'] + viscosity_df['Weighted_PRI_type'] + viscosity_df[
'Weighted_NITROGEN_type']
column_to_keep_at_end = 'viscosity'
# Reorder columns
new_order = [col for col in viscosity_df.columns if col != column_to_keep_at_end] + [column_to_keep_at_end]
viscosity_df = viscosity_df[new_order]
viscosity_df['batch-date'] = 'Batch_' + viscosity_df['Batch No.'].astype(str) + '_' + viscosity_df['date'].astype(
str)
req_cols = ViscosityConstants.req_cols
final_viscosity_df = viscosity_df[req_cols]
final_viscosity_df = round(final_viscosity_df, 6)
return final_viscosity_df, raw_viscosity_df
import warnings
import numpy as np
import pandas as pd
from loguru import logger
from scripts.constants.constants import MixerConstants
warnings.filterwarnings("ignore")
def preprocess(df):
logger.info("Starting Preprocessing the Data")
# Replace 'nan' with NaN
df = df.replace('nan', np.nan)
# Calculate the number of missing values in each column
missing_counts = df.isnull().sum()
# Get the column names where the number of missing values is equal to the number of rows
cols_to_remove = missing_counts[missing_counts == len(df)].index
df = df.drop(cols_to_remove, axis=1)
df = df.loc[df['Mixing batch number'] != 0]
# Drop rows where 'Batch Number' is NaN
df = df.dropna(subset=['Mixing batch number'])
# Identify constant columns
constant_columns = df.columns[df.nunique() == 1]
# Drop constant columns
df.drop(columns=constant_columns, inplace=True)
logger.info(f"Preprocessing completed and the final shape is {df.shape}")
columns_with_missing_values = df.columns[df.isnull().sum() > 0].tolist()
return df
def preprocess_mixer_section(df, index_number):
mixer_cols = MixerConstants.mixer_cols
mixer_df = df[mixer_cols]
mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp'])
mixer_df = mixer_df.sort_values(by='Time Stamp')
numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
mixer_df['day'] = mixer_df['Time Stamp'].dt.date
mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_number]
mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
mixer_df['Mixing Weight (Integrated Value)_diff'] = mixer_df.groupby(['day', 'Mixing batch number'])[
'Mixing Weight (Integrated Value)'].transform(lambda x: x.max() - x.min())
mixer_cleaned_df = preprocess(mixer_df)
mixer_cleaned_df["day"] = mixer_cleaned_df['Time Stamp'].dt.date
mixer_cleaned_df['mixer_on_or_off'] = mixer_cleaned_df['Mixing timer value'].apply(lambda x: 0 if x == 0 else 1)
mixer_cleaned_df['batch-date'] = 'Batch_' + mixer_cleaned_df['Mixing batch number'].astype(str) + '_' + \
mixer_cleaned_df['day'].astype(str)
mixer_cleaned_df = mixer_cleaned_df.sort_values(by='Time Stamp')
# Group by 'batch-date' and add a new column 'rubber_addition'
df['rubber_addition'] = 0
def apply_conditions(group):
max_value_index = group['Mixing timer value'].idxmax()
group.loc[group['Mixing timer value'] != group['Mixing timer value'].max(), 'rubber_addition'] = 1
group.loc[max_value_index, 'rubber_addition'] = 1
return group
mixer_cleaned_df = mixer_cleaned_df.groupby('batch-date').apply(apply_conditions)
# Add 'process_on_or_off' column based on conditions
mixer_cleaned_df['process_on_or_off'] = 0
mixer_cleaned_df.loc[(mixer_cleaned_df['mixer_on_or_off'] == 1) & (
mixer_cleaned_df['rubber_addition'] == 1), 'process_on_or_off'] = 1
numeric_cols = mixer_cleaned_df.select_dtypes(include=['number', 'float']).columns
process_on_df = mixer_cleaned_df[mixer_cleaned_df['process_on_or_off'] == 1]
df_full = process_on_df[process_on_df.columns]
# Define a dictionary for data type conversions
conversion_dict = {col: float for col in df_full.select_dtypes(include='number').columns}
# Apply the data type conversions
df_full = df_full.astype(conversion_dict)
rpm_count = df_full[df_full['Rotor actual rpm'] == 60.0].groupby('batch-date')['Rotor actual rpm'].count()
df_full = df_full.merge(rpm_count, left_on='batch-date', right_index=True, suffixes=('', '_count'))
df_full.rename(columns={'Rotor actual rpm_count': 'max_rpm_count'}, inplace=True)
aggregation_dict = MixerConstants.aggregation_dict
group_by = ['day', 'Mixing batch number']
df_mixer_grouped = df_full.groupby(group_by).agg(aggregation_dict).reset_index()
col_renamer = {}
for col, col_agg in aggregation_dict.items():
if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']:
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
col_renamer[col] = renamed_col
else:
col_renamer[col] = col
df_mixer_grouped = df_mixer_grouped.rename(columns=col_renamer)
df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
df_mixer_grouped['day'].astype(str)
df_mixer_grouped = round(df_mixer_grouped, 6)
return df_mixer_grouped
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
import warnings
import pandas as pd
from scripts.constants.constants import SheetConstants
warnings.filterwarnings("ignore")
def preprocess_sheet_section(df, index_number):
sheet_supply_column = SheetConstants.sheet_supply_column
sheet_supply_df = df[sheet_supply_column]
sheet_supply_df['Time Stamp'] = pd.to_datetime(sheet_supply_df['Time Stamp'])
sheet_supply_df = sheet_supply_df.sort_values(by='Time Stamp')
numeric_cols = sheet_supply_df.select_dtypes(include=['int', 'float']).columns
# Convert numeric columns to float
sheet_supply_df[numeric_cols] = sheet_supply_df[numeric_cols].astype(float)
sheet_supply_df['day'] = sheet_supply_df['Time Stamp'].dt.date
sheet_supply_df['batch-date'] = 'Batch_' + sheet_supply_df['Weighing times'].astype(str) + '_' + sheet_supply_df[
'day'].astype(str)
sheet_supply_df = sheet_supply_df[sheet_supply_df["Size No (INDEX No)"] == index_number]
sheet_supply_df = sheet_supply_df[sheet_supply_df["Weighing times"] != 0]
aggregation_dict = SheetConstants.aggregation_dict
group_by = ['day', 'Weighing times']
df_sheet_grouped = sheet_supply_df.groupby(group_by).agg(aggregation_dict).reset_index()
col_renamer = {}
for col, col_agg in aggregation_dict.items():
if col not in ['viscosity', 'time_min', 'time_max']:
renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
col_renamer[col] = renamed_col
else:
col_renamer[col] = col
df_sheet_grouped = df_sheet_grouped.rename(columns=col_renamer)
df_sheet_grouped['batch-date'] = 'Batch_' + df_sheet_grouped['Weighing times'].astype(str) + '_' + df_sheet_grouped[
'day'].astype(str)
df_sheet_grouped = round(df_sheet_grouped, 6)
return df_sheet_grouped
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment