Commit 9a7dbd66 authored by dasharatha.vamshi's avatar dasharatha.vamshi

multi sites

parent b3ee4666
client,site,project_name,date,total_recommendations,followed,not_followed
Jubilant,PP2 Ac20 Reaction Section,Golden Batch for Ac20 Reaction,2022-08-01,85,85,0
Jubilant,PP2 Ac20 Reaction Section,Golden Batch for Ac20 Reaction,2022-08-02,93,93,0
This diff is collapsed.
client,site,project_name,date,total_recommendations,followed,not_followed
Jubilant,PP2 R5 Reaction Section,Golden Batch for R5 Reaction,2022-08-01,85,85,0
Jubilant,PP2 R5 Reaction Section,Golden Batch for R5 Reaction,2022-08-02,93,93,0
from scripts.constants.app_configuration import METADATA
from scripts.core.utils.compliance_util import Compliance
from scripts.core.utils.timestamp_util import get_timestamps
from loguru import logger
import pandas as pd
if __name__ == '__main__':
all_tags = METADATA['tags']
tags_data = {}
site_data = METADATA['site_data']
df = pd.DataFrame()
final_df_list = []
tags_type = ['upper', 'lower', 'live']
for param, param_data in all_tags.items():
for param_data_type in tags_type:
tags_data[f'{param}_{param_data_type}'] = param_data[param_data_type]
column_renamer = {v: k for k, v in tags_data.items()}
payload = METADATA['query']
start_date = METADATA['start_date']
end_date = METADATA['end_date']
all_timestamps = get_timestamps(start_date, end_date)
payload['metrics'][0]['tags']['c3'] = list(column_renamer.keys())
obj = Compliance(payload, column_renamer, all_tags)
obj.start_calculation(all_timestamps)
for site, site_metadata in site_data.items():
print(site_metadata)
logger.info(f"Generating for site {site}")
all_tags = site_metadata['tags']
tags_data = {}
for param, param_data in all_tags.items():
for param_data_type in tags_type:
tags_data[f'{param}_{param_data_type}'] = param_data[param_data_type]
column_renamer = {v: k for k, v in tags_data.items()}
start_date = site_metadata['start_date']
end_date = site_metadata['end_date']
all_timestamps = get_timestamps(start_date, end_date)
print(all_timestamps)
payload = METADATA['query']
payload['metrics'][0]['tags']['c3'] = list(column_renamer.keys())
obj = Compliance(site, payload, column_renamer, all_tags)
day_df = obj.start_calculation(all_timestamps)
final_df_list.append(day_df)
df = pd.concat(final_df_list)
df.to_csv('overall-compliance.csv',index=False)
This diff is collapsed.
client,site,project_name,date,total_recommendations,followed,not_followed
Jubilant,PP2 R5 Reaction Section,Golden Batch for R5 Reaction,2022-08-01,85,85,0
Jubilant,PP2 R5 Reaction Section,Golden Batch for R5 Reaction,2022-08-02,93,93,0
Jubilant,PP2 Ac20 Reaction Section,Golden Batch for Ac20 Reaction,2022-08-01,85,85,0
Jubilant,PP2 Ac20 Reaction Section,Golden Batch for Ac20 Reaction,2022-08-02,93,93,0
This diff is collapsed.
This diff is collapsed.
......@@ -7,10 +7,12 @@ import pandas as pd
class Compliance:
def __init__(self, payload, column_rename, tags_data):
def __init__(self, site, payload, column_rename, tags_data):
self.site = site
self.payload = payload
self.column_rename = column_rename
self.tags_data = tags_data
self.site_metadata = METADATA['site_data'][self.site]
self._dp_ = DataPuller(db_host=KAIROS_DB_HOST, payload=self.payload, column_rename=self.column_rename)
@staticmethod
......@@ -21,14 +23,13 @@ class Compliance:
logger.warning(f"Error adding compliance column - {e}")
return df
@staticmethod
def create_compliance_sheet(final_data_dict, compliance_cols, total_columns_criteria):
def create_compliance_sheet(self, final_data_dict, compliance_cols, total_columns_criteria):
logger.info("Calculating overall compliance...")
c_df = pd.DataFrame()
try:
c_df['client'] = [METADATA['client']] * len(final_data_dict)
c_df['site'] = [METADATA['site']] * len(final_data_dict)
c_df['project_name'] = [METADATA['project_name']] * len(final_data_dict)
c_df['client'] = [self.site_metadata['client']] * len(final_data_dict)
c_df['site'] = [self.site_metadata['site']] * len(final_data_dict)
c_df['project_name'] = [self.site_metadata['project_name']] * len(final_data_dict)
final_compliance_list = []
timestamp_data = []
for idx, value in final_data_dict.items():
......@@ -49,14 +50,32 @@ class Compliance:
logger.warning(f'Error - {e}')
return c_df
def generate_day_wise(self, final_df):
final_df['time'] = pd.to_datetime(final_df['time'])
final_df['day'] = final_df['time'].dt.date
new_df = final_df.groupby('day')['compliance'].value_counts().unstack(fill_value=0).reset_index()
new_df['date'] = pd.to_datetime(new_df['day'])
new_df = new_df.drop('day', axis=1)
if 0 not in list(new_df.columns):
new_df[0] = 0
if 1 not in list(new_df.columns):
new_df[1] = 0
new_df = new_df.rename(columns={0: 'followed', 1: 'not_followed'})
new_df['client'] = self.site_metadata['client']
new_df['site'] = self.site_metadata['site']
new_df['project_name'] = self.site_metadata['project_name']
new_df['total_recommendations'] = new_df['followed'] + new_df['not_followed']
new_df = new_df[['client', 'site', 'project_name', 'date', 'total_recommendations', 'followed', 'not_followed']]
return new_df
def start_calculation(self, all_timestamps):
all_dfs = []
parameter_wise_dfs = []
for i in all_timestamps:
start_timestamp = i['start']
end_timestamp = i['end']
start_time = datetime.fromtimestamp(start_timestamp//1000)
end_time = datetime.fromtimestamp(end_timestamp//1000)
start_time = datetime.fromtimestamp(start_timestamp // 1000)
end_time = datetime.fromtimestamp(end_timestamp // 1000)
logger.info(f"Calculating for {start_time} to {end_time}")
df = self._dp_.get_data(start_timestamp, end_timestamp)
total_cols = len(df.columns) - 2
......@@ -71,7 +90,7 @@ class Compliance:
compliance_cols.append(f'{column}_compliance')
# df.to_csv('r5-parameter-wise-compliance.csv', index=False)
parameter_wise_dfs.append(df)
total_columns_criteria = int((METADATA['compliance_percentage'] / 100) * len(compliance_cols))
total_columns_criteria = int((self.site_metadata['compliance_percentage'] / 100) * len(compliance_cols))
logger.info(f"Need {total_columns_criteria} from {len(compliance_cols)} columns to satisfy the "
f"compliance")
rq_cols = compliance_cols.copy()
......@@ -85,8 +104,11 @@ class Compliance:
if all_dfs:
logger.info("Combining the Data")
final_df = pd.concat(all_dfs)
final_df.to_csv('r5-overall-compliance.csv', index=False)
if parameter_wise_dfs:
params_df = pd.concat(parameter_wise_dfs)
params_df.to_csv('r5-parameter-wise-compliance.csv', index=False)
new_df = self.generate_day_wise(final_df)
new_df.to_csv(f'{self.site}-day-wise-compliance.csv', index=False)
final_df.to_csv(f'{self.site}-overall-compliance.csv', index=False)
return new_df
return pd.DataFrame()
# if parameter_wise_dfs:
# params_df = pd.concat(parameter_wise_dfs)
# params_df.to_csv('r5-parameter-wise-compliance.csv', index=False)
......@@ -9,6 +9,8 @@ def get_timestamps(start_date, end_date):
logger.info(f"Getting start and end time from {start_date} and {end_date}")
s_date = datetime.strptime(start_date, "%Y-%m-%d")
e_date = datetime.strptime(end_date, "%Y-%m-%d")
e_date = e_date + timedelta(days=1)
print(e_date)
if s_date > e_date:
raise DateError("Invalid Dates mentioned")
my_dates = [s_date + timedelta(days=x) for x in range((e_date - s_date).days + 1)]
......@@ -20,6 +22,7 @@ def get_timestamps(start_date, end_date):
for i in range(start, end, step):
x = i
t_list = my_dates[x:x + step]
print(t_list)
start = t_list[0]
end = t_list[-1] + timedelta(hours=24, minutes=0, seconds=0, milliseconds=0)
end = min(end, e_date)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment