clenaup

5f5eb49e · dasharatha.vamshi · 88061537 · 5f5eb49e · 5f5eb49e · 5f5eb49e
Commit 5f5eb49e authored Mar 20, 2023 by dasharatha.vamshi
4 changed files
--- a/conf/application.conf
+++ b/conf/application.conf
-[TIMEZONE]
-required_tz=$REQUIRED_TZ
 [MLFLOW]
 mlflow_tracking_uri=$MLFLOW_TRACKING_URI
 mlflow_tracking_username=$MLFLOW_TRACKING_USERNAME

--- a/config.env
+++ b/config.env
-REQUIRED_TZ=Asia/Kolkata
 MLFLOW_TRACKING_URI=https://qa.unifytwin.com/mlflow/
 MLFLOW_TRACKING_USERNAME=mlflow
 MLFLOW_TRACKING_PASSWORD=MlFlOwQA#4321

--- a/scripts/constants/app_configuration.py
+++ b/scripts/constants/app_configuration.py
@@ -42,11 +42,6 @@ class Logging:
 # Configuration Variables
-REQUIRED_TZ = config['TIMEZONE']['required_tz']
 class MlflowMetaData:
    MLFLOW_TRACKING_URI = config['MLFLOW']['mlflow_tracking_uri']
    MLFLOW_TRACKING_USERNAME = config['MLFLOW']['mlflow_tracking_username']

--- a/scripts/core/mlflow_util.py
+++ b/scripts/core/mlflow_util.py
 import os
-import re
-from datetime import datetime
 import mlflow
-import pandas as pd
-import pytz
-from dateutil import tz
-from loguru import logger
 from azure.storage.blob import BlobServiceClient
+from loguru import logger
-from scripts.constants.app_configuration import REQUIRED_TZ, MlflowMetaData
+from scripts.constants.app_configuration import MlflowMetaData
 from scripts.constants.app_constants import MODEL_NAME
 mlflow_tracking_uri = MlflowMetaData.MLFLOW_TRACKING_URI
@@ -26,65 +21,6 @@ client = mlflow.tracking.MlflowClient()
 class MlFlowUtil:
-    @staticmethod
-    def get_last_run_time_diff(run_info):
-        try:
-            logger.info(f"Checking the time difference in days")
-            df_time = run_info.copy()
-            df_time['end_time'] = pd.to_datetime(df_time['end_time']).dt.tz_convert(REQUIRED_TZ)
-            to_zone = tz.gettz(REQUIRED_TZ)
-            df_time["days"] = df_time['end_time'].dt.date
-            df_time["hours"] = df_time['end_time'].dt.hour
-            last_model_time = list(df_time['end_time'])[0].to_pydatetime()
-            today = datetime.now(pytz.utc)
-            central_current = today.astimezone(to_zone)
-            time_diff = central_current - last_model_time
-            return int(time_diff.days)
-        except Exception as e:
-            logger.warning(f"Exception while checking the last run time of the model - {e}")
-            return 0
-    @staticmethod
-    def log_model(model, model_name):
-        try:
-            mlflow.sklearn.log_model(model, model_name)
-            logger.info("logged the model")
-            return True
-        except Exception as e:
-            logger.exception(str(e))
-    @staticmethod
-    def log_metrics(metrics):
-        try:
-            updated_metric = {}
-            for key, value in metrics.items():
-                key = re.sub(r"[([{})\]]", "", key)
-                updated_metric[key] = value
-            mlflow.log_metrics(updated_metric)
-            return True
-        except Exception as e:
-            logger.exception(str(e))
-    @staticmethod
-    def log_hyper_param(hyper_params):
-        try:
-            mlflow.log_params(hyper_params)
-            return True
-        except Exception as e:
-            logger.exception(str(e))
-    @staticmethod
-    def set_tag(child_run_id, key, value):
-        try:
-            client.set_tag(run_id=child_run_id, key=key, value=value)
-        except Exception as e:
-            logger.exception(f"Exception while setting the tag - {e}")
-    @staticmethod
-    def remove_file_if_exists(path):
-        if os.path.exists(path):
-            os.remove(path)
    @staticmethod
    def delete_artifact(run_id, parent_run_name, artifact_uri, file_path, model_name):
        logger.info(f"Deleting artifact for {run_id} under {parent_run_name}")
@@ -167,20 +103,21 @@ class MlflowCleanUp:
        experiment_id = self.check_experiment()
        if experiment_id is not None:
            runs_df = self.check_runs_data(experiment_id)
-            if runs_df is not None:
+            if self.model_parent_run_id_key in runs_df.columns:
-                run_id_list = list(runs_df['run_id'])
+                if runs_df is not None:
-                run_name_list = list(runs_df['tags.mlflow.runName'])
+                    run_id_list = list(runs_df['run_id'])
-                run_name_mapping = {}
+                    run_name_list = list(runs_df['tags.mlflow.runName'])
-                for i in range(len(run_id_list)):
+                    run_name_mapping = {}
-                    run_name_mapping[run_id_list[i]] = run_name_list[i]
+                    for i in range(len(run_id_list)):
-                # getting runs who have a parent-id
+                        run_name_mapping[run_id_list[i]] = run_name_list[i]
-                df = runs_df[runs_df[self.model_parent_run_id_key].notna()]
+                    # getting runs who have a parent-id
-                # getting runs who have a model logged
+                    df = runs_df[runs_df[self.model_parent_run_id_key].notna()]
-                f_df = df[df[self.model_history_key].notna()]
+                    # getting runs who have a model logged
-                self.delete_run_model_data(f_df, run_name_mapping)
+                    f_df = df[df[self.model_history_key].notna()]
+                    self.delete_run_model_data(f_df, run_name_mapping)
+                else:
+                    logger.info('No runs found for experiment, so no cleanup')
            else:
-                logger.info('No runs found for experiment, so no cleanup')
+                logger.info('No parent runs found for experiment, so no cleanup')
-                return False
        else:
            logger.info("Not a valid experiment...")
-            return False