initial commit

aee4d77e · Sikhin VC · 6962547c · aee4d77e · aee4d77e · aee4d77e
Commit aee4d77e authored Mar 20, 2023 by Sikhin VC
30 changed files
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (training_pipeline)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/training_pipeline.iml" filepath="$PROJECT_DIR$/.idea/training_pipeline.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/training_pipeline.iml
+++ b/.idea/training_pipeline.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
+FROM python:3.7
+RUN apt-get update
+RUN apt-get install tzdata vim -y
+RUN apt-get update && apt-get install tzdata ffmpeg libsm6 libxext6  -y
+RUN pip3 install --upgrade pip
+RUN pip3 install pytz==2020.4 loguru~=0.6.0 scipy~=1.4.1 numpy~=1.19.5 pandas~=1.3.3 requests==2.26.0 pydantic==1.8.2 python-dotenv==0.19.0 PyYAML~=6.0 python-dateutil~=2.8.2 azure-core==1.26.3 azure-identity==1.12.0 azure-storage-blob==12.15.0 Pillow>=7.1.2
+RUN pip3 install opencv-python>=4.1.2 matplotlib seaborn
+ADD . /app
+WORKDIR /app
+CMD [ "python","app.py" ]
\ No newline at end of file
--- a/app.py
+++ b/app.py
+import os.path
+if __name__ == "__main__":
+    from dotenv import load_dotenv
+    load_dotenv(dotenv_path='config.env')
+import yaml
+import traceback
+import warnings
+from loguru import logger
+from scripts.constants.app_configuration import master_configuration_file
+from scripts.utils.blob_downloader import Blob_Downloader
+from scripts.utils.data_augmentation import AugmentationManager
+from scripts.utils.dataset_extraction import ExtractDataset
+warnings.filterwarnings("ignore")
+def main():
+    with open(master_configuration_file, 'r') as _cf:
+        master_conf = yaml.full_load(_cf)
+    root_data_path = master_conf["master_config"]["data_path"]
+    raw_dataset_path = os.path.join(root_data_path, "raw_dataset")
+    extracted_dataset_path = os.path.join(root_data_path, "exatracted_dataset")
+    unaugmented_dataset_path = os.path.join(root_data_path, "unaugmented_dataset")
+    augmented_dataset_path = os.path.join(root_data_path, "augmented_dataset")
+    # annotation_directory = os.path.join("data", "dataset")
+    # post_process_directory = os.path.join(root_data_path, "post")
+    if(not os.path.exists(root_data_path)):
+        os.mkdir(root_data_path)
+    if (not os.path.exists(raw_dataset_path)):
+        os.mkdir(raw_dataset_path)
+    if (not os.path.exists(extracted_dataset_path)):
+        os.mkdir(extracted_dataset_path)
+    if (not os.path.exists(unaugmented_dataset_path)):
+        os.mkdir(unaugmented_dataset_path)
+    if (not os.path.exists(augmented_dataset_path)):
+        os.mkdir(augmented_dataset_path)
+    augmentation_functions = {}
+    logger.info("Starting Vision Data Accusation Pipeline")
+    # logger.info(f"Master Configuration Read, Found {len(master_conf['master_config']['rtsp_url'])} Steams with check "
+    #             f"batch size of {master_conf['master_config']['check_batch_size']}")
+    blob_list = master_conf['master_config']['blob_path']
+    # print(blob_list)
+    # for url in blob_list:
+    logger.info(f'Starting accusation of Stream {blob_list}')
+    blob_util = Blob_Downloader(master_conf['master_config']['project'],
+                                master_conf['master_config']['site'],
+                                master_conf['master_config']['blob_path'],
+                                raw_dataset_path)
+    blob_download_status = blob_util.download()
+    if blob_download_status:
+        logger.info("Data Downloaded Successfully!")
+    else:
+        logger.warning("Failed To Download Data")
+    extract = ExtractDataset()
+    extract.extract_ds(os.path.join(raw_dataset_path, "dataset.zip"), extracted_dataset_path)
+    extract.move_files(extracted_dataset_path, unaugmented_dataset_path)
+    try:
+        sub_dir = os.walk("data")
+        print(sub_dir)
+        annotation_directory = sub_dir[0]
+        post_process_directory = sub_dir[0]
+    except:
+        annotation_directory = "data"
+        post_process_directory = "data"
+    augmentation_list = master_conf['master_config']['augmentation_types']
+    # print(augmentation_list)
+    for augmentation in augmentation_list:
+        augmentation_value = master_conf['master_config']['augmentation_types'][augmentation]["value"]
+        augmentation_functions[augmentation]=[{"property":augmentation, "value": augmentation_value}]
+    print(augmentation_functions)
+    augmentation_manager = AugmentationManager(augmentation_functions)
+    augmentation_manager.process(annotation_directory = unaugmented_dataset_path, post_process_directory = augmented_dataset_path)
+if __name__ == '__main__':
+    logger.info("Attempting to Start Vision Data Accusation Pipeline!")
+    try:
+        main()
+    except Exception as e:
+        traceback.print_exc()
+        logger.error(f"Failed to Start Vision Data Accusation Pipeline! : {str(e)}")
--- a/conf/application.conf
+++ b/conf/application.conf
+[BLOB_STORAGE]
+conn_str=$CONN_STR
+container_name=$CONTAINER_NAME
\ No newline at end of file
--- a/conf/master_config.yml
+++ b/conf/master_config.yml
+master_config:
+  project: 'JK_Cements'
+  site: 'Chittorgarh'
+  blob_path:
+    "JK_Cements/dataset2.zip"
+  data_path:
+    "jk_data"
+  augmentation_types:
+    blur:
+      value: 0.3
+    noise:
+      value: 0.1
+    horizontal_flip:
+      value: 0
+    grayscale:
+      value: 0.4
+    hue:
+      value: 1
+    saturation:
+      value: 1
+    brightness:
+      value: 0.8
+    exposure:
+      value: 0.9
+      vertical_flip:
+        value: 0
--- a/conf/master_config_test.yml
+++ b/conf/master_config_test.yml
+master_config:
+  project: 'JK_Cements'
+  site: 'Chittorgarh'
+  rtsp_url:
+    2023_02_28_15_45_00.mp4:
+      line: 1
+      camera: 1
+      contour_area: 1000
+      weight_file: JK_Cements/Chittorgarh/weights/weight.pt
+  check_batch_size: 16
+  packet_size: 20
+  classes: assets/classes.txt
\ No newline at end of file
--- a/config.env
+++ b/config.env
+CONN_STR = AccountName=azrmlilensqa006382180551;AccountKey=tDGOKfiZ2svfoMvVmS0Fbpf0FTHfTq4wKYuDX7cAxlhve/3991QuzdvJHm9vWc+lo6mtC+x9yPSghWNR4+gacg==;EndpointSuffix=core.windows.net;DefaultEndpointsProtocol=https;
+CONTAINER_NAME = mlflow-vm-container
\ No newline at end of file
--- a/main.py
+++ b/main.py
+# This is a sample Python script.
+# Press Shift+F10 to execute it or replace it with your code.
+# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
+def print_hi(name):
+    # Use a breakpoint in the code line below to debug your script.
+    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    print_hi('PyCharm')
+# See PyCharm help at https://www.jetbrains.com/help/pycharm/
--- a/requirements.txt
+++ b/requirements.txt
+pytz==2020.4
+loguru~=0.6.0
+#scipy~=1.4.1
+numpy~=1.19.5
+pandas~=1.3.3
+requests==2.26.0
+pydantic==1.8.2
+python-dotenv==0.19.0
+PyYAML~=6.0
+python-dateutil~=2.8.2
+azure-core==1.26.3
+azure-identity==1.12.0
+azure-storage-blob==12.15.0
+albumentations==1.1.0
\ No newline at end of file
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
--- a/scripts/__pycache__/__init__.cpython-39.pyc
+++ b/scripts/__pycache__/__init__.cpython-39.pyc
--- a/scripts/constants/__init__.py
+++ b/scripts/constants/__init__.py
--- a/scripts/constants/__pycache__/__init__.cpython-39.pyc
+++ b/scripts/constants/__pycache__/__init__.cpython-39.pyc
--- a/scripts/constants/__pycache__/app_configuration.cpython-39.pyc
+++ b/scripts/constants/__pycache__/app_configuration.cpython-39.pyc
--- a/scripts/constants/app_configuration.py
+++ b/scripts/constants/app_configuration.py
+import os
+import os.path
+import sys
+from configparser import ConfigParser, BasicInterpolation
+import yaml
+master_configuration_file = r"./conf/master_config.yml"
+class EnvInterpolation(BasicInterpolation):
+    """
+    Interpolation which expands environment variables in values.
+    """
+    def before_get(self, parser, section, option, value, defaults):
+        value = super().before_get(parser, section, option, value, defaults)
+        if not os.path.expandvars(value).startswith("$"):
+            return os.path.expandvars(value)
+        else:
+            return
+try:
+    config = ConfigParser(interpolation=EnvInterpolation())
+    config.read("conf/application.conf")
+except Exception as e:
+    print(f"Error while loading the config: {e}")
+    print("Failed to Load Configuration. Exiting!!!")
+    sys.exit()
+class Logging:
+    level = config.get("LOGGING", "level", fallback="INFO")
+    level = level or "INFO"
+    tb_flag = config.getboolean("LOGGING", "traceback", fallback=True)
+    tb_flag = tb_flag if tb_flag is not None else True
+BLOB_CONN_STR = config["BLOB_STORAGE"]["conn_str"]
+BLOB_CONTAINER_NAME = config["BLOB_STORAGE"]["container_name"]
--- a/scripts/data_model/__init__.py
+++ b/scripts/data_model/__init__.py
--- a/scripts/logging/__init__.py
+++ b/scripts/logging/__init__.py
+import logging
+import os
+from logging import StreamHandler
+from logging.handlers import RotatingFileHandler, SocketHandler
+import yaml
+from scripts.constants.app_configuration import PathToDirectory, Logging
+# this method is to read the configuration from backup.conf
+def read_configuration(file_name):
+    """
+    :param file_name:
+    :return: all the configuration constants
+    """
+    with open(file_name, 'r') as stream:
+        try:
+            return yaml.safe_load(stream)
+        except Exception as e:
+            print(f"Failed to load Configuration. Error: {e}")
+config = read_configuration("scripts/logging/logger_conf.yml")
+logging_config = config["logger"]
+logging_config["level"] = Logging.level
+enable_traceback: bool = Logging.tb_flag
+def get_logger():
+    """
+     Creates a rotating log
+     """
+    __logger__ = logging.getLogger('')
+    __logger__.setLevel(logging_config["level"].upper())
+    log_formatter = '%(asctime)s - %(levelname)-6s - [%(threadName)5s:%(funcName)5s():''' \
+                    '%(lineno)s] - %(message)s'
+    time_format = "%Y-%m-%d %H:%M:%S"
+    file_path = PathToDirectory.LOGS_MODULE_PATH
+    formatter = logging.Formatter(log_formatter, time_format)
+    for each_handler in logging_config["handlers"]:
+        if each_handler["type"] in ["RotatingFileHandler"]:
+            if not os.path.exists(file_path):
+                os.makedirs(file_path)
+            log_file = os.path.join(f"{file_path}{logging_config['name']}.log")
+            temp_handler = RotatingFileHandler(log_file,
+                                               maxBytes=each_handler["max_bytes"],
+                                               backupCount=each_handler["back_up_count"])
+            temp_handler.setFormatter(formatter)
+        elif each_handler["type"] in ["SocketHandler"]:
+            temp_handler = SocketHandler(each_handler["host"], each_handler["port"])
+        elif each_handler["type"] in ["StreamHandler"]:
+            temp_handler = StreamHandler()
+            temp_handler.setFormatter(formatter)
+        else:
+            temp_handler = None
+        __logger__.addHandler(temp_handler)
+    return __logger__
+logger = get_logger()
--- a/scripts/logging/logger_conf.yml
+++ b/scripts/logging/logger_conf.yml
+logger:
+  name: jubilant_r5_golden_batch_yield
+  level: DEBUG
+  handlers:
+    - type: RotatingFileHandler
+      file_path: logs/
+      max_bytes: 100000000
+      back_up_count: 5
+    - type: StreamHandler
+      name: jubilant_r5_golden_batch_yield
--- a/scripts/utils/__init__.py
+++ b/scripts/utils/__init__.py
--- a/scripts/utils/__pycache__/__init__.cpython-39.pyc
+++ b/scripts/utils/__pycache__/__init__.cpython-39.pyc
--- a/scripts/utils/__pycache__/blob_downloader.cpython-39.pyc
+++ b/scripts/utils/__pycache__/blob_downloader.cpython-39.pyc
--- a/scripts/utils/__pycache__/data_augmentation.cpython-39.pyc
+++ b/scripts/utils/__pycache__/data_augmentation.cpython-39.pyc
--- a/scripts/utils/__pycache__/dataset_extraction.cpython-39.pyc
+++ b/scripts/utils/__pycache__/dataset_extraction.cpython-39.pyc
--- a/scripts/utils/blob_downloader.py
+++ b/scripts/utils/blob_downloader.py
+import os
+from loguru import logger
+import datetime
+import traceback
+from azure.storage.blob import BlobServiceClient
+from scripts.constants.app_configuration import BLOB_CONN_STR, BLOB_CONTAINER_NAME
+class Blob_Downloader:
+    def __init__(self, project, site, blob_path, raw_ds_path):
+        self.project = project
+        self.site = site
+        self.blob_path = blob_path
+        # self.line = line
+        # self.camera = camera
+        self.blob_service_client = BlobServiceClient.from_connection_string(BLOB_CONN_STR)
+        self.raw_ds_path = raw_ds_path
+        self.container = BLOB_CONTAINER_NAME
+    def download(self):
+        try:
+            # for i in os.listdir(self.local_path):
+            download_file_path = os.path.join(self.raw_ds_path, "dataset.zip")
+            blob_client = self.blob_service_client.get_blob_client(container=self.container,
+                                                                   blob=self.blob_path)
+            with open(download_file_path, "wb") as download_file:
+                blob_client.download_blob().readinto(download_file)
+            # logger.info(
+            #     f'Uploaded to Azure Storage with blob path:{self.project}/{self.site}/{self.line}/{self.camera}/'
+            #     f'{datetime.datetime.now().day}_{datetime.datetime.now().month}_{datetime.datetime.now().year}'
+            #     f'/images')
+            return True
+        except Exception as e:
+            traceback.print_exc()
+            logger.error(f"Failed to Push Files to blob Storage! : {str(e)}")
+            return False
--- a/scripts/utils/data_augmentation.py
+++ b/scripts/utils/data_augmentation.py
--- a/scripts/utils/dataset_extraction.py
+++ b/scripts/utils/dataset_extraction.py
+from zipfile import ZipFile
+import os
+import shutil
+class ExtractDataset():
+    def extract_ds(self, dataset_path, dest_path):
+        self.dataset_path = dataset_path
+        self.dest_path = dest_path
+        print("zipping")
+        with ZipFile(self.dataset_path, 'r') as zObject:
+            zObject.extractall(path=dest_path)
+    def move_files(self, dataset_path, dest_path):
+        self.dataset_path = dataset_path
+        self.dest_path = dest_path
+        for root, dirs, files in os.walk(self.dataset_path):
+            for name in files:
+                shutil.move(os.path.join(root, name), os.path.join(self.dest_path, name))