import warnings

import numpy as np
import pandas as pd
from loguru import logger

from scripts.constants.constants import MixerConstants

warnings.filterwarnings("ignore")


def preprocess(df):
    logger.info("Starting Preprocessing the Data")
    # Replace 'nan' with NaN
    df = df.replace('nan', np.nan)
    # Calculate the number of missing values in each column
    missing_counts = df.isnull().sum()
    # Get the column names where the number of missing values is equal to the number of rows
    cols_to_remove = missing_counts[missing_counts == len(df)].index
    df = df.drop(cols_to_remove, axis=1)
    df = df.loc[df['Mixing batch number'] != 0]
    # Drop rows where 'Batch Number' is NaN
    df = df.dropna(subset=['Mixing batch number'])
    # Identify constant columns
    constant_columns = df.columns[df.nunique() == 1]
    # Drop constant columns
    df.drop(columns=constant_columns, inplace=True)
    logger.info(f"Preprocessing completed and the final shape is {df.shape}")
    columns_with_missing_values = df.columns[df.isnull().sum() > 0].tolist()
    return df


def preprocess_mixer_section(df, index_number):
    mixer_cols = MixerConstants.mixer_cols
    mixer_df = df[mixer_cols]
    mixer_df['Time Stamp'] = pd.to_datetime(mixer_df['Time Stamp'])
    mixer_df = mixer_df.sort_values(by='Time Stamp')
    numeric_cols = mixer_df.select_dtypes(include=['int', 'float']).columns
    # Convert numeric columns to float
    mixer_df[numeric_cols] = mixer_df[numeric_cols].astype(float)
    mixer_df['day'] = mixer_df['Time Stamp'].dt.date
    mixer_df = mixer_df[mixer_df["Size No (INDEX No).3"] == index_number]
    mixer_df = mixer_df[mixer_df["Mixing batch number"] != 0]
    mixer_df['Mixing Weight (Integrated Value)_diff'] = mixer_df.groupby(['day', 'Mixing batch number'])[
        'Mixing Weight (Integrated Value)'].transform(lambda x: x.max() - x.min())
    mixer_cleaned_df = preprocess(mixer_df)
    mixer_cleaned_df["day"] = mixer_cleaned_df['Time Stamp'].dt.date
    mixer_cleaned_df['mixer_on_or_off'] = mixer_cleaned_df['Mixing timer value'].apply(lambda x: 0 if x == 0 else 1)
    mixer_cleaned_df['batch-date'] = 'Batch_' + mixer_cleaned_df['Mixing batch number'].astype(str) + '_' + \
                                     mixer_cleaned_df['day'].astype(str)
    mixer_cleaned_df = mixer_cleaned_df.sort_values(by='Time Stamp')
    # Group by 'batch-date' and add a new column 'rubber_addition'
    df['rubber_addition'] = 0

    def apply_conditions(group):
        max_value_index = group['Mixing timer value'].idxmax()
        group.loc[group['Mixing timer value'] != group['Mixing timer value'].max(), 'rubber_addition'] = 1
        group.loc[max_value_index, 'rubber_addition'] = 1
        return group

    mixer_cleaned_df = mixer_cleaned_df.groupby('batch-date').apply(apply_conditions)
    # Add 'process_on_or_off' column based on conditions
    mixer_cleaned_df['process_on_or_off'] = 0
    mixer_cleaned_df.loc[(mixer_cleaned_df['mixer_on_or_off'] == 1) & (
                mixer_cleaned_df['rubber_addition'] == 1), 'process_on_or_off'] = 1
    numeric_cols = mixer_cleaned_df.select_dtypes(include=['number', 'float']).columns
    process_on_df = mixer_cleaned_df[mixer_cleaned_df['process_on_or_off'] == 1]
    df_full = process_on_df[process_on_df.columns]
    # Define a dictionary for data type conversions
    conversion_dict = {col: float for col in df_full.select_dtypes(include='number').columns}

    # Apply the data type conversions
    df_full = df_full.astype(conversion_dict)
    rpm_count = df_full[df_full['Rotor actual rpm'] == 60.0].groupby('batch-date')['Rotor actual rpm'].count()
    df_full = df_full.merge(rpm_count, left_on='batch-date', right_index=True, suffixes=('', '_count'))
    df_full.rename(columns={'Rotor actual rpm_count': 'max_rpm_count'}, inplace=True)
    aggregation_dict = MixerConstants.aggregation_dict
    group_by = ['day', 'Mixing batch number']
    df_mixer_grouped = df_full.groupby(group_by).agg(aggregation_dict).reset_index()
    col_renamer = {}
    for col, col_agg in aggregation_dict.items():
        if col not in ['viscosity', 'time_min', 'time_max', 'Mixing Weight (Integrated Value)_diff', 'max_rpm_count']:
            renamed_col = f'{col.replace("(", "").replace(")", "").replace(" ", "_")}_{col_agg}'.lower()
            col_renamer[col] = renamed_col
        else:
            col_renamer[col] = col
    df_mixer_grouped = df_mixer_grouped.rename(columns=col_renamer)
    df_mixer_grouped['batch-date'] = 'Batch_' + df_mixer_grouped['Mixing batch number'].astype(str) + '_' + \
                                     df_mixer_grouped['day'].astype(str)
    df_mixer_grouped = round(df_mixer_grouped, 6)
    return df_mixer_grouped