Commit aee4d77e authored by Sikhin VC's avatar Sikhin VC

initial commit

parent 6962547c
# Default ignored files
/shelf/
/workspace.xml
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (training_pipeline)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/training_pipeline.iml" filepath="$PROJECT_DIR$/.idea/training_pipeline.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
FROM python:3.7
RUN apt-get update
RUN apt-get install tzdata vim -y
RUN apt-get update && apt-get install tzdata ffmpeg libsm6 libxext6 -y
RUN pip3 install --upgrade pip
RUN pip3 install pytz==2020.4 loguru~=0.6.0 scipy~=1.4.1 numpy~=1.19.5 pandas~=1.3.3 requests==2.26.0 pydantic==1.8.2 python-dotenv==0.19.0 PyYAML~=6.0 python-dateutil~=2.8.2 azure-core==1.26.3 azure-identity==1.12.0 azure-storage-blob==12.15.0 Pillow>=7.1.2
RUN pip3 install opencv-python>=4.1.2 matplotlib seaborn
ADD . /app
WORKDIR /app
CMD [ "python","app.py" ]
\ No newline at end of file
import os.path
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv(dotenv_path='config.env')
import yaml
import traceback
import warnings
from loguru import logger
from scripts.constants.app_configuration import master_configuration_file
from scripts.utils.blob_downloader import Blob_Downloader
from scripts.utils.data_augmentation import AugmentationManager
from scripts.utils.dataset_extraction import ExtractDataset
warnings.filterwarnings("ignore")
def main():
with open(master_configuration_file, 'r') as _cf:
master_conf = yaml.full_load(_cf)
root_data_path = master_conf["master_config"]["data_path"]
raw_dataset_path = os.path.join(root_data_path, "raw_dataset")
extracted_dataset_path = os.path.join(root_data_path, "exatracted_dataset")
unaugmented_dataset_path = os.path.join(root_data_path, "unaugmented_dataset")
augmented_dataset_path = os.path.join(root_data_path, "augmented_dataset")
# annotation_directory = os.path.join("data", "dataset")
# post_process_directory = os.path.join(root_data_path, "post")
if(not os.path.exists(root_data_path)):
os.mkdir(root_data_path)
if (not os.path.exists(raw_dataset_path)):
os.mkdir(raw_dataset_path)
if (not os.path.exists(extracted_dataset_path)):
os.mkdir(extracted_dataset_path)
if (not os.path.exists(unaugmented_dataset_path)):
os.mkdir(unaugmented_dataset_path)
if (not os.path.exists(augmented_dataset_path)):
os.mkdir(augmented_dataset_path)
augmentation_functions = {}
logger.info("Starting Vision Data Accusation Pipeline")
# logger.info(f"Master Configuration Read, Found {len(master_conf['master_config']['rtsp_url'])} Steams with check "
# f"batch size of {master_conf['master_config']['check_batch_size']}")
blob_list = master_conf['master_config']['blob_path']
# print(blob_list)
# for url in blob_list:
logger.info(f'Starting accusation of Stream {blob_list}')
blob_util = Blob_Downloader(master_conf['master_config']['project'],
master_conf['master_config']['site'],
master_conf['master_config']['blob_path'],
raw_dataset_path)
blob_download_status = blob_util.download()
if blob_download_status:
logger.info("Data Downloaded Successfully!")
else:
logger.warning("Failed To Download Data")
extract = ExtractDataset()
extract.extract_ds(os.path.join(raw_dataset_path, "dataset.zip"), extracted_dataset_path)
extract.move_files(extracted_dataset_path, unaugmented_dataset_path)
try:
sub_dir = os.walk("data")
print(sub_dir)
annotation_directory = sub_dir[0]
post_process_directory = sub_dir[0]
except:
annotation_directory = "data"
post_process_directory = "data"
augmentation_list = master_conf['master_config']['augmentation_types']
# print(augmentation_list)
for augmentation in augmentation_list:
augmentation_value = master_conf['master_config']['augmentation_types'][augmentation]["value"]
augmentation_functions[augmentation]=[{"property":augmentation, "value": augmentation_value}]
print(augmentation_functions)
augmentation_manager = AugmentationManager(augmentation_functions)
augmentation_manager.process(annotation_directory = unaugmented_dataset_path, post_process_directory = augmented_dataset_path)
if __name__ == '__main__':
logger.info("Attempting to Start Vision Data Accusation Pipeline!")
try:
main()
except Exception as e:
traceback.print_exc()
logger.error(f"Failed to Start Vision Data Accusation Pipeline! : {str(e)}")
[BLOB_STORAGE]
conn_str=$CONN_STR
container_name=$CONTAINER_NAME
\ No newline at end of file
master_config:
project: 'JK_Cements'
site: 'Chittorgarh'
blob_path:
"JK_Cements/dataset2.zip"
data_path:
"jk_data"
augmentation_types:
blur:
value: 0.3
noise:
value: 0.1
horizontal_flip:
value: 0
grayscale:
value: 0.4
hue:
value: 1
saturation:
value: 1
brightness:
value: 0.8
exposure:
value: 0.9
vertical_flip:
value: 0
master_config:
project: 'JK_Cements'
site: 'Chittorgarh'
rtsp_url:
2023_02_28_15_45_00.mp4:
line: 1
camera: 1
contour_area: 1000
weight_file: JK_Cements/Chittorgarh/weights/weight.pt
check_batch_size: 16
packet_size: 20
classes: assets/classes.txt
\ No newline at end of file
CONN_STR = AccountName=azrmlilensqa006382180551;AccountKey=tDGOKfiZ2svfoMvVmS0Fbpf0FTHfTq4wKYuDX7cAxlhve/3991QuzdvJHm9vWc+lo6mtC+x9yPSghWNR4+gacg==;EndpointSuffix=core.windows.net;DefaultEndpointsProtocol=https;
CONTAINER_NAME = mlflow-vm-container
\ No newline at end of file
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
pytz==2020.4
loguru~=0.6.0
#scipy~=1.4.1
numpy~=1.19.5
pandas~=1.3.3
requests==2.26.0
pydantic==1.8.2
python-dotenv==0.19.0
PyYAML~=6.0
python-dateutil~=2.8.2
azure-core==1.26.3
azure-identity==1.12.0
azure-storage-blob==12.15.0
albumentations==1.1.0
\ No newline at end of file
import os
import os.path
import sys
from configparser import ConfigParser, BasicInterpolation
import yaml
master_configuration_file = r"./conf/master_config.yml"
class EnvInterpolation(BasicInterpolation):
"""
Interpolation which expands environment variables in values.
"""
def before_get(self, parser, section, option, value, defaults):
value = super().before_get(parser, section, option, value, defaults)
if not os.path.expandvars(value).startswith("$"):
return os.path.expandvars(value)
else:
return
try:
config = ConfigParser(interpolation=EnvInterpolation())
config.read("conf/application.conf")
except Exception as e:
print(f"Error while loading the config: {e}")
print("Failed to Load Configuration. Exiting!!!")
sys.exit()
class Logging:
level = config.get("LOGGING", "level", fallback="INFO")
level = level or "INFO"
tb_flag = config.getboolean("LOGGING", "traceback", fallback=True)
tb_flag = tb_flag if tb_flag is not None else True
BLOB_CONN_STR = config["BLOB_STORAGE"]["conn_str"]
BLOB_CONTAINER_NAME = config["BLOB_STORAGE"]["container_name"]
import logging
import os
from logging import StreamHandler
from logging.handlers import RotatingFileHandler, SocketHandler
import yaml
from scripts.constants.app_configuration import PathToDirectory, Logging
# this method is to read the configuration from backup.conf
def read_configuration(file_name):
"""
:param file_name:
:return: all the configuration constants
"""
with open(file_name, 'r') as stream:
try:
return yaml.safe_load(stream)
except Exception as e:
print(f"Failed to load Configuration. Error: {e}")
config = read_configuration("scripts/logging/logger_conf.yml")
logging_config = config["logger"]
logging_config["level"] = Logging.level
enable_traceback: bool = Logging.tb_flag
def get_logger():
"""
Creates a rotating log
"""
__logger__ = logging.getLogger('')
__logger__.setLevel(logging_config["level"].upper())
log_formatter = '%(asctime)s - %(levelname)-6s - [%(threadName)5s:%(funcName)5s():''' \
'%(lineno)s] - %(message)s'
time_format = "%Y-%m-%d %H:%M:%S"
file_path = PathToDirectory.LOGS_MODULE_PATH
formatter = logging.Formatter(log_formatter, time_format)
for each_handler in logging_config["handlers"]:
if each_handler["type"] in ["RotatingFileHandler"]:
if not os.path.exists(file_path):
os.makedirs(file_path)
log_file = os.path.join(f"{file_path}{logging_config['name']}.log")
temp_handler = RotatingFileHandler(log_file,
maxBytes=each_handler["max_bytes"],
backupCount=each_handler["back_up_count"])
temp_handler.setFormatter(formatter)
elif each_handler["type"] in ["SocketHandler"]:
temp_handler = SocketHandler(each_handler["host"], each_handler["port"])
elif each_handler["type"] in ["StreamHandler"]:
temp_handler = StreamHandler()
temp_handler.setFormatter(formatter)
else:
temp_handler = None
__logger__.addHandler(temp_handler)
return __logger__
logger = get_logger()
logger:
name: jubilant_r5_golden_batch_yield
level: DEBUG
handlers:
- type: RotatingFileHandler
file_path: logs/
max_bytes: 100000000
back_up_count: 5
- type: StreamHandler
name: jubilant_r5_golden_batch_yield
import os
from loguru import logger
import datetime
import traceback
from azure.storage.blob import BlobServiceClient
from scripts.constants.app_configuration import BLOB_CONN_STR, BLOB_CONTAINER_NAME
class Blob_Downloader:
def __init__(self, project, site, blob_path, raw_ds_path):
self.project = project
self.site = site
self.blob_path = blob_path
# self.line = line
# self.camera = camera
self.blob_service_client = BlobServiceClient.from_connection_string(BLOB_CONN_STR)
self.raw_ds_path = raw_ds_path
self.container = BLOB_CONTAINER_NAME
def download(self):
try:
# for i in os.listdir(self.local_path):
download_file_path = os.path.join(self.raw_ds_path, "dataset.zip")
blob_client = self.blob_service_client.get_blob_client(container=self.container,
blob=self.blob_path)
with open(download_file_path, "wb") as download_file:
blob_client.download_blob().readinto(download_file)
# logger.info(
# f'Uploaded to Azure Storage with blob path:{self.project}/{self.site}/{self.line}/{self.camera}/'
# f'{datetime.datetime.now().day}_{datetime.datetime.now().month}_{datetime.datetime.now().year}'
# f'/images')
return True
except Exception as e:
traceback.print_exc()
logger.error(f"Failed to Push Files to blob Storage! : {str(e)}")
return False
This diff is collapsed.
from zipfile import ZipFile
import os
import shutil
class ExtractDataset():
def extract_ds(self, dataset_path, dest_path):
self.dataset_path = dataset_path
self.dest_path = dest_path
print("zipping")
with ZipFile(self.dataset_path, 'r') as zObject:
zObject.extractall(path=dest_path)
def move_files(self, dataset_path, dest_path):
self.dataset_path = dataset_path
self.dest_path = dest_path
for root, dirs, files in os.walk(self.dataset_path):
for name in files:
shutil.move(os.path.join(root, name), os.path.join(self.dest_path, name))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment