Commit 5f5eb49e authored by dasharatha.vamshi's avatar dasharatha.vamshi

clenaup

parent 88061537
[TIMEZONE]
required_tz=$REQUIRED_TZ
[MLFLOW] [MLFLOW]
mlflow_tracking_uri=$MLFLOW_TRACKING_URI mlflow_tracking_uri=$MLFLOW_TRACKING_URI
mlflow_tracking_username=$MLFLOW_TRACKING_USERNAME mlflow_tracking_username=$MLFLOW_TRACKING_USERNAME
......
REQUIRED_TZ=Asia/Kolkata
MLFLOW_TRACKING_URI=https://qa.unifytwin.com/mlflow/ MLFLOW_TRACKING_URI=https://qa.unifytwin.com/mlflow/
MLFLOW_TRACKING_USERNAME=mlflow MLFLOW_TRACKING_USERNAME=mlflow
MLFLOW_TRACKING_PASSWORD=MlFlOwQA#4321 MLFLOW_TRACKING_PASSWORD=MlFlOwQA#4321
......
...@@ -42,11 +42,6 @@ class Logging: ...@@ -42,11 +42,6 @@ class Logging:
# Configuration Variables # Configuration Variables
REQUIRED_TZ = config['TIMEZONE']['required_tz']
class MlflowMetaData: class MlflowMetaData:
MLFLOW_TRACKING_URI = config['MLFLOW']['mlflow_tracking_uri'] MLFLOW_TRACKING_URI = config['MLFLOW']['mlflow_tracking_uri']
MLFLOW_TRACKING_USERNAME = config['MLFLOW']['mlflow_tracking_username'] MLFLOW_TRACKING_USERNAME = config['MLFLOW']['mlflow_tracking_username']
......
import os import os
import re
from datetime import datetime
import mlflow import mlflow
import pandas as pd
import pytz
from dateutil import tz
from loguru import logger
from azure.storage.blob import BlobServiceClient from azure.storage.blob import BlobServiceClient
from loguru import logger
from scripts.constants.app_configuration import REQUIRED_TZ, MlflowMetaData from scripts.constants.app_configuration import MlflowMetaData
from scripts.constants.app_constants import MODEL_NAME from scripts.constants.app_constants import MODEL_NAME
mlflow_tracking_uri = MlflowMetaData.MLFLOW_TRACKING_URI mlflow_tracking_uri = MlflowMetaData.MLFLOW_TRACKING_URI
...@@ -26,65 +21,6 @@ client = mlflow.tracking.MlflowClient() ...@@ -26,65 +21,6 @@ client = mlflow.tracking.MlflowClient()
class MlFlowUtil: class MlFlowUtil:
@staticmethod
def get_last_run_time_diff(run_info):
try:
logger.info(f"Checking the time difference in days")
df_time = run_info.copy()
df_time['end_time'] = pd.to_datetime(df_time['end_time']).dt.tz_convert(REQUIRED_TZ)
to_zone = tz.gettz(REQUIRED_TZ)
df_time["days"] = df_time['end_time'].dt.date
df_time["hours"] = df_time['end_time'].dt.hour
last_model_time = list(df_time['end_time'])[0].to_pydatetime()
today = datetime.now(pytz.utc)
central_current = today.astimezone(to_zone)
time_diff = central_current - last_model_time
return int(time_diff.days)
except Exception as e:
logger.warning(f"Exception while checking the last run time of the model - {e}")
return 0
@staticmethod
def log_model(model, model_name):
try:
mlflow.sklearn.log_model(model, model_name)
logger.info("logged the model")
return True
except Exception as e:
logger.exception(str(e))
@staticmethod
def log_metrics(metrics):
try:
updated_metric = {}
for key, value in metrics.items():
key = re.sub(r"[([{})\]]", "", key)
updated_metric[key] = value
mlflow.log_metrics(updated_metric)
return True
except Exception as e:
logger.exception(str(e))
@staticmethod
def log_hyper_param(hyper_params):
try:
mlflow.log_params(hyper_params)
return True
except Exception as e:
logger.exception(str(e))
@staticmethod
def set_tag(child_run_id, key, value):
try:
client.set_tag(run_id=child_run_id, key=key, value=value)
except Exception as e:
logger.exception(f"Exception while setting the tag - {e}")
@staticmethod
def remove_file_if_exists(path):
if os.path.exists(path):
os.remove(path)
@staticmethod @staticmethod
def delete_artifact(run_id, parent_run_name, artifact_uri, file_path, model_name): def delete_artifact(run_id, parent_run_name, artifact_uri, file_path, model_name):
logger.info(f"Deleting artifact for {run_id} under {parent_run_name}") logger.info(f"Deleting artifact for {run_id} under {parent_run_name}")
...@@ -167,20 +103,21 @@ class MlflowCleanUp: ...@@ -167,20 +103,21 @@ class MlflowCleanUp:
experiment_id = self.check_experiment() experiment_id = self.check_experiment()
if experiment_id is not None: if experiment_id is not None:
runs_df = self.check_runs_data(experiment_id) runs_df = self.check_runs_data(experiment_id)
if runs_df is not None: if self.model_parent_run_id_key in runs_df.columns:
run_id_list = list(runs_df['run_id']) if runs_df is not None:
run_name_list = list(runs_df['tags.mlflow.runName']) run_id_list = list(runs_df['run_id'])
run_name_mapping = {} run_name_list = list(runs_df['tags.mlflow.runName'])
for i in range(len(run_id_list)): run_name_mapping = {}
run_name_mapping[run_id_list[i]] = run_name_list[i] for i in range(len(run_id_list)):
# getting runs who have a parent-id run_name_mapping[run_id_list[i]] = run_name_list[i]
df = runs_df[runs_df[self.model_parent_run_id_key].notna()] # getting runs who have a parent-id
# getting runs who have a model logged df = runs_df[runs_df[self.model_parent_run_id_key].notna()]
f_df = df[df[self.model_history_key].notna()] # getting runs who have a model logged
self.delete_run_model_data(f_df, run_name_mapping) f_df = df[df[self.model_history_key].notna()]
self.delete_run_model_data(f_df, run_name_mapping)
else:
logger.info('No runs found for experiment, so no cleanup')
else: else:
logger.info('No runs found for experiment, so no cleanup') logger.info('No parent runs found for experiment, so no cleanup')
return False
else: else:
logger.info("Not a valid experiment...") logger.info("Not a valid experiment...")
return False
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment