initial commit

aee4d77e · Sikhin VC · 6962547c · aee4d77e · aee4d77e · aee4d77e
Commit aee4d77e authored Mar 20, 2023 by Sikhin VC
30 changed files
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (training_pipeline)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/training_pipeline.iml" filepath="$PROJECT_DIR$/.idea/training_pipeline.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/training_pipeline.iml
+++ b/.idea/training_pipeline.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
+FROM python:3.7
+RUN apt-get update
+RUN apt-get install tzdata vim -y
+RUN apt-get update && apt-get install tzdata ffmpeg libsm6 libxext6  -y
+RUN pip3 install --upgrade pip
+RUN pip3 install pytz==2020.4 loguru~=0.6.0 scipy~=1.4.1 numpy~=1.19.5 pandas~=1.3.3 requests==2.26.0 pydantic==1.8.2 python-dotenv==0.19.0 PyYAML~=6.0 python-dateutil~=2.8.2 azure-core==1.26.3 azure-identity==1.12.0 azure-storage-blob==12.15.0 Pillow>=7.1.2
+RUN pip3 install opencv-python>=4.1.2 matplotlib seaborn
+ADD . /app
+WORKDIR /app
+CMD [ "python","app.py" ]
\ No newline at end of file
--- a/app.py
+++ b/app.py
+import os.path
+if __name__ == "__main__":
+    from dotenv import load_dotenv
+    load_dotenv(dotenv_path='config.env')
+import yaml
+import traceback
+import warnings
+from loguru import logger
+from scripts.constants.app_configuration import master_configuration_file
+from scripts.utils.blob_downloader import Blob_Downloader
+from scripts.utils.data_augmentation import AugmentationManager
+from scripts.utils.dataset_extraction import ExtractDataset
+warnings.filterwarnings("ignore")
+def main():
+    with open(master_configuration_file, 'r') as _cf:
+        master_conf = yaml.full_load(_cf)
+    root_data_path = master_conf["master_config"]["data_path"]
+    raw_dataset_path = os.path.join(root_data_path, "raw_dataset")
+    extracted_dataset_path = os.path.join(root_data_path, "exatracted_dataset")
+    unaugmented_dataset_path = os.path.join(root_data_path, "unaugmented_dataset")
+    augmented_dataset_path = os.path.join(root_data_path, "augmented_dataset")
+    # annotation_directory = os.path.join("data", "dataset")
+    # post_process_directory = os.path.join(root_data_path, "post")
+    if(not os.path.exists(root_data_path)):
+        os.mkdir(root_data_path)
+    if (not os.path.exists(raw_dataset_path)):
+        os.mkdir(raw_dataset_path)
+    if (not os.path.exists(extracted_dataset_path)):
+        os.mkdir(extracted_dataset_path)
+    if (not os.path.exists(unaugmented_dataset_path)):
+        os.mkdir(unaugmented_dataset_path)
+    if (not os.path.exists(augmented_dataset_path)):
+        os.mkdir(augmented_dataset_path)
+    augmentation_functions = {}
+    logger.info("Starting Vision Data Accusation Pipeline")
+    # logger.info(f"Master Configuration Read, Found {len(master_conf['master_config']['rtsp_url'])} Steams with check "
+    #             f"batch size of {master_conf['master_config']['check_batch_size']}")
+    blob_list = master_conf['master_config']['blob_path']
+    # print(blob_list)
+    # for url in blob_list:
+    logger.info(f'Starting accusation of Stream {blob_list}')
+    blob_util = Blob_Downloader(master_conf['master_config']['project'],
+                                master_conf['master_config']['site'],
+                                master_conf['master_config']['blob_path'],
+                                raw_dataset_path)
+    blob_download_status = blob_util.download()
+    if blob_download_status:
+        logger.info("Data Downloaded Successfully!")
+    else:
+        logger.warning("Failed To Download Data")
+    extract = ExtractDataset()
+    extract.extract_ds(os.path.join(raw_dataset_path, "dataset.zip"), extracted_dataset_path)
+    extract.move_files(extracted_dataset_path, unaugmented_dataset_path)
+    try:
+        sub_dir = os.walk("data")
+        print(sub_dir)
+        annotation_directory = sub_dir[0]
+        post_process_directory = sub_dir[0]
+    except:
+        annotation_directory = "data"
+        post_process_directory = "data"
+    augmentation_list = master_conf['master_config']['augmentation_types']
+    # print(augmentation_list)
+    for augmentation in augmentation_list:
+        augmentation_value = master_conf['master_config']['augmentation_types'][augmentation]["value"]
+        augmentation_functions[augmentation]=[{"property":augmentation, "value": augmentation_value}]
+    print(augmentation_functions)
+    augmentation_manager = AugmentationManager(augmentation_functions)
+    augmentation_manager.process(annotation_directory = unaugmented_dataset_path, post_process_directory = augmented_dataset_path)
+if __name__ == '__main__':
+    logger.info("Attempting to Start Vision Data Accusation Pipeline!")
+    try:
+        main()
+    except Exception as e:
+        traceback.print_exc()
+        logger.error(f"Failed to Start Vision Data Accusation Pipeline! : {str(e)}")
--- a/conf/application.conf
+++ b/conf/application.conf
+[BLOB_STORAGE]
+conn_str=$CONN_STR
+container_name=$CONTAINER_NAME
\ No newline at end of file
--- a/conf/master_config.yml
+++ b/conf/master_config.yml
+master_config:
+  project: 'JK_Cements'
+  site: 'Chittorgarh'
+  blob_path:
+    "JK_Cements/dataset2.zip"
+  data_path:
+    "jk_data"
+  augmentation_types:
+    blur:
+      value: 0.3
+    noise:
+      value: 0.1
+    horizontal_flip:
+      value: 0
+    grayscale:
+      value: 0.4
+    hue:
+      value: 1
+    saturation:
+      value: 1
+    brightness:
+      value: 0.8
+    exposure:
+      value: 0.9
+      vertical_flip:
+        value: 0
--- a/conf/master_config_test.yml
+++ b/conf/master_config_test.yml
+master_config:
+  project: 'JK_Cements'
+  site: 'Chittorgarh'
+  rtsp_url:
+    2023_02_28_15_45_00.mp4:
+      line: 1
+      camera: 1
+      contour_area: 1000
+      weight_file: JK_Cements/Chittorgarh/weights/weight.pt
+  check_batch_size: 16
+  packet_size: 20
+  classes: assets/classes.txt
\ No newline at end of file
--- a/config.env
+++ b/config.env
+CONN_STR = AccountName=azrmlilensqa006382180551;AccountKey=tDGOKfiZ2svfoMvVmS0Fbpf0FTHfTq4wKYuDX7cAxlhve/3991QuzdvJHm9vWc+lo6mtC+x9yPSghWNR4+gacg==;EndpointSuffix=core.windows.net;DefaultEndpointsProtocol=https;
+CONTAINER_NAME = mlflow-vm-container
\ No newline at end of file
--- a/main.py
+++ b/main.py
+# This is a sample Python script.
+# Press Shift+F10 to execute it or replace it with your code.
+# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
+def print_hi(name):
+    # Use a breakpoint in the code line below to debug your script.
+    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    print_hi('PyCharm')
+# See PyCharm help at https://www.jetbrains.com/help/pycharm/
--- a/requirements.txt
+++ b/requirements.txt
+pytz==2020.4
+loguru~=0.6.0
+#scipy~=1.4.1
+numpy~=1.19.5
+pandas~=1.3.3
+requests==2.26.0
+pydantic==1.8.2
+python-dotenv==0.19.0
+PyYAML~=6.0
+python-dateutil~=2.8.2
+azure-core==1.26.3
+azure-identity==1.12.0
+azure-storage-blob==12.15.0
+albumentations==1.1.0
\ No newline at end of file
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
--- a/scripts/__pycache__/__init__.cpython-39.pyc
+++ b/scripts/__pycache__/__init__.cpython-39.pyc
--- a/scripts/constants/__init__.py
+++ b/scripts/constants/__init__.py
--- a/scripts/constants/__pycache__/__init__.cpython-39.pyc
+++ b/scripts/constants/__pycache__/__init__.cpython-39.pyc
--- a/scripts/constants/__pycache__/app_configuration.cpython-39.pyc
+++ b/scripts/constants/__pycache__/app_configuration.cpython-39.pyc
--- a/scripts/constants/app_configuration.py
+++ b/scripts/constants/app_configuration.py
+import os
+import os.path
+import sys
+from configparser import ConfigParser, BasicInterpolation
+import yaml
+master_configuration_file = r"./conf/master_config.yml"
+class EnvInterpolation(BasicInterpolation):
+    """
+    Interpolation which expands environment variables in values.
+    """
+    def before_get(self, parser, section, option, value, defaults):
+        value = super().before_get(parser, section, option, value, defaults)
+        if not os.path.expandvars(value).startswith("$"):
+            return os.path.expandvars(value)
+        else:
+            return
+try:
+    config = ConfigParser(interpolation=EnvInterpolation())
+    config.read("conf/application.conf")
+except Exception as e:
+    print(f"Error while loading the config: {e}")
+    print("Failed to Load Configuration. Exiting!!!")
+    sys.exit()
+class Logging:
+    level = config.get("LOGGING", "level", fallback="INFO")
+    level = level or "INFO"
+    tb_flag = config.getboolean("LOGGING", "traceback", fallback=True)
+    tb_flag = tb_flag if tb_flag is not None else True
+BLOB_CONN_STR = config["BLOB_STORAGE"]["conn_str"]
+BLOB_CONTAINER_NAME = config["BLOB_STORAGE"]["container_name"]
--- a/scripts/data_model/__init__.py
+++ b/scripts/data_model/__init__.py
--- a/scripts/logging/__init__.py
+++ b/scripts/logging/__init__.py
+import logging
+import os
+from logging import StreamHandler
+from logging.handlers import RotatingFileHandler, SocketHandler
+import yaml
+from scripts.constants.app_configuration import PathToDirectory, Logging
+# this method is to read the configuration from backup.conf
+def read_configuration(file_name):
+    """
+    :param file_name:
+    :return: all the configuration constants
+    """
+    with open(file_name, 'r') as stream:
+        try:
+            return yaml.safe_load(stream)
+        except Exception as e:
+            print(f"Failed to load Configuration. Error: {e}")
+config = read_configuration("scripts/logging/logger_conf.yml")
+logging_config = config["logger"]
+logging_config["level"] = Logging.level
+enable_traceback: bool = Logging.tb_flag
+def get_logger():
+    """
+     Creates a rotating log
+     """
+    __logger__ = logging.getLogger('')
+    __logger__.setLevel(logging_config["level"].upper())
+    log_formatter = '%(asctime)s - %(levelname)-6s - [%(threadName)5s:%(funcName)5s():''' \
+                    '%(lineno)s] - %(message)s'
+    time_format = "%Y-%m-%d %H:%M:%S"
+    file_path = PathToDirectory.LOGS_MODULE_PATH
+    formatter = logging.Formatter(log_formatter, time_format)
+    for each_handler in logging_config["handlers"]:
+        if each_handler["type"] in ["RotatingFileHandler"]:
+            if not os.path.exists(file_path):
+                os.makedirs(file_path)
+            log_file = os.path.join(f"{file_path}{logging_config['name']}.log")
+            temp_handler = RotatingFileHandler(log_file,
+                                               maxBytes=each_handler["max_bytes"],
+                                               backupCount=each_handler["back_up_count"])
+            temp_handler.setFormatter(formatter)
+        elif each_handler["type"] in ["SocketHandler"]:
+            temp_handler = SocketHandler(each_handler["host"], each_handler["port"])
+        elif each_handler["type"] in ["StreamHandler"]:
+            temp_handler = StreamHandler()
+            temp_handler.setFormatter(formatter)
+        else:
+            temp_handler = None
+        __logger__.addHandler(temp_handler)
+    return __logger__
+logger = get_logger()
--- a/scripts/logging/logger_conf.yml
+++ b/scripts/logging/logger_conf.yml
+logger:
+  name: jubilant_r5_golden_batch_yield
+  level: DEBUG
+  handlers:
+    - type: RotatingFileHandler
+      file_path: logs/
+      max_bytes: 100000000
+      back_up_count: 5
+    - type: StreamHandler
+      name: jubilant_r5_golden_batch_yield
--- a/scripts/utils/__init__.py
+++ b/scripts/utils/__init__.py
--- a/scripts/utils/__pycache__/__init__.cpython-39.pyc
+++ b/scripts/utils/__pycache__/__init__.cpython-39.pyc
--- a/scripts/utils/__pycache__/blob_downloader.cpython-39.pyc
+++ b/scripts/utils/__pycache__/blob_downloader.cpython-39.pyc
--- a/scripts/utils/__pycache__/data_augmentation.cpython-39.pyc
+++ b/scripts/utils/__pycache__/data_augmentation.cpython-39.pyc
--- a/scripts/utils/__pycache__/dataset_extraction.cpython-39.pyc
+++ b/scripts/utils/__pycache__/dataset_extraction.cpython-39.pyc
--- a/scripts/utils/blob_downloader.py
+++ b/scripts/utils/blob_downloader.py
+import os
+from loguru import logger
+import datetime
+import traceback
+from azure.storage.blob import BlobServiceClient
+from scripts.constants.app_configuration import BLOB_CONN_STR, BLOB_CONTAINER_NAME
+class Blob_Downloader:
+    def __init__(self, project, site, blob_path, raw_ds_path):
+        self.project = project
+        self.site = site
+        self.blob_path = blob_path
+        # self.line = line
+        # self.camera = camera
+        self.blob_service_client = BlobServiceClient.from_connection_string(BLOB_CONN_STR)
+        self.raw_ds_path = raw_ds_path
+        self.container = BLOB_CONTAINER_NAME
+    def download(self):
+        try:
+            # for i in os.listdir(self.local_path):
+            download_file_path = os.path.join(self.raw_ds_path, "dataset.zip")
+            blob_client = self.blob_service_client.get_blob_client(container=self.container,
+                                                                   blob=self.blob_path)
+            with open(download_file_path, "wb") as download_file:
+                blob_client.download_blob().readinto(download_file)
+            # logger.info(
+            #     f'Uploaded to Azure Storage with blob path:{self.project}/{self.site}/{self.line}/{self.camera}/'
+            #     f'{datetime.datetime.now().day}_{datetime.datetime.now().month}_{datetime.datetime.now().year}'
+            #     f'/images')
+            return True
+        except Exception as e:
+            traceback.print_exc()
+            logger.error(f"Failed to Push Files to blob Storage! : {str(e)}")
+            return False
--- a/scripts/utils/data_augmentation.py
+++ b/scripts/utils/data_augmentation.py
+import os
+from pathlib import Path
+import cv2
+from uuid import uuid4
+from multiprocessing.pool import ThreadPool as Pool
+from albumentations import Compose, BboxParams
+from albumentations.augmentations.geometric.rotate import Rotate
+from albumentations.augmentations.geometric.transforms import Affine
+from albumentations.augmentations.transforms import (
+    HorizontalFlip, VerticalFlip, ToGray, HueSaturationValue, RandomBrightnessContrast, Blur, GaussNoise)
+class AugmentImage:
+    def __init__(self, funs):
+        self.transformations = list()
+        if 'vertical_flip' in funs:
+            assert type(funs['vertical_flip']), list
+            for each_prop in funs['vertical_flip']:
+                if each_prop['property'] == 'vertical_flip':
+                    self.transformations.append(VerticalFlip(p=1))
+        if 'horizontal_flip' in funs:
+            assert type(funs['horizontal_flip']), list
+            for each_prop in funs['horizontal_flip']:
+                if each_prop['property'] == 'horizontal_flip':
+                    self.transformations.append(HorizontalFlip(p=1))
+        if 'rotation' in funs:
+            assert type(funs['rotation']), list
+            for each_prop in funs['rotation']:
+                if each_prop['property'] == 'rotation':
+                    self.transformations.append(Rotate(
+                        limit=int(45 * each_prop['value']),
+                        interpolation=1,
+                        border_mode=0,
+                        value=None,
+                        mask_value=None,
+                        always_apply=False,
+                        p=0.7))
+        if 'grayscale' in funs:
+            assert type(funs['grayscale']), list
+            for each_prop in funs['grayscale']:
+                if each_prop['property'] == 'probability':
+                    self.transformations.append(
+                        ToGray(p=each_prop['value']))
+        if 'hue' in funs:
+            assert type(funs['hue']), list
+            for each_prop in funs['hue']:
+                if each_prop['property'] == 'hue':
+                    self.transformations.append(
+                        HueSaturationValue(
+                            hue_shift_limit=int(180 * each_prop['value']),
+                            sat_shift_limit=0,
+                            val_shift_limit=0,
+                            always_apply=False,
+                            p=0.7))
+        if 'saturation' in funs:
+            assert type(funs['saturation']), list
+            for each_prop in funs['saturation']:
+                if each_prop['property'] == 'saturation':
+                    self.transformations.append(
+                        HueSaturationValue(
+                            hue_shift_limit=0,
+                            sat_shift_limit=int(255 * each_prop['value']),
+                            val_shift_limit=0,
+                            always_apply=False,
+                            p=0.7))
+        if 'brightness' in funs:
+            assert type(funs['brightness']), list
+            for each_prop in funs['brightness']:
+                if each_prop['property'] == 'brightness':
+                    self.transformations.append(
+                        RandomBrightnessContrast(
+                            brightness_limit=int(30 * each_prop['value']),
+                            contrast_limit=0,
+                            brightness_by_max=True,
+                            always_apply=True,
+                            p=0.7))
+        if 'exposure' in funs:
+            assert type(funs['exposure']), list
+            for each_prop in funs['exposure']:
+                if each_prop['property'] == 'exposure':
+                    self.transformations.append(
+                        HueSaturationValue(
+                            hue_shift_limit=0,
+                            sat_shift_limit=0,
+                            val_shift_limit=int(255 * each_prop['value']),
+                            always_apply=False,
+                            p=0.7))
+        if 'blur' in funs:
+            assert type(funs['blur']), list
+            for each_prop in funs['blur']:
+                if each_prop['property'] == 'blur':
+                    self.transformations.append(
+                        Blur(blur_limit=int(150 * each_prop['value']),
+                             always_apply=False,
+                             p=1))
+        if 'noise' in funs:
+            assert type(funs['noise']), list
+            for each_prop in funs['noise']:
+                if each_prop['property'] == 'noise':
+                    self.transformations.append(
+                        GaussNoise(
+                            var_limit=int(50000 * each_prop['value']),
+                            mean=0,
+                            per_channel=True,
+                            always_apply=True,
+                            p=0.7))
+        if 'horizontal_shear' in funs:
+            assert type(funs['horizontal_shear']), list
+            for each_prop in funs['horizontal_shear']:
+                if each_prop['property'] == 'horizontal_shear':
+                    self.transformations.append(
+                        Affine(
+                            scale=None,
+                            translate_percent=None,
+                            translate_px=None,
+                            rotate=None,
+                            shear={"x": int(-45 * each_prop['value']), "y": 0},
+                            interpolation=0,
+                            mask_interpolation=0,
+                            cval=0,
+                            cval_mask=0,
+                            mode=0,
+                            fit_output=False,
+                            always_apply=False,
+                            p=0.9))
+                    self.transformations.append(
+                        Affine(
+                            scale=None,
+                            translate_percent=None,
+                            translate_px=None,
+                            rotate=None,
+                            shear={"x": int(45 * each_prop['value']), "y": 0},
+                            interpolation=0,
+                            mask_interpolation=0,
+                            cval=0,
+                            cval_mask=0,
+                            mode=0,
+                            fit_output=False,
+                            always_apply=False,
+                            p=0.9))
+        if 'vertical_shear' in funs:
+            assert type(funs['vertical_shear']), list
+            for each_prop in funs['vertical_shear']:
+                if each_prop['property'] == 'vertical_shear':
+                    self.transformations.append(
+                        Affine(
+                            scale=None,
+                            translate_percent=None,
+                            translate_px=None,
+                            rotate=None,
+                            shear={"x": 0, "y": int(-45 * each_prop['value'])},
+                            interpolation=0,
+                            mask_interpolation=0,
+                            cval=0,
+                            cval_mask=0,
+                            mode=0,
+                            fit_output=False,
+                            always_apply=False,
+                            p=0.9))
+                    self.transformations.append(
+                        Affine(
+                            scale=None,
+                            translate_percent=None,
+                            translate_px=None,
+                            rotate=None,
+                            shear={"x": 0, "y": int(45 * each_prop['value'])},
+                            interpolation=0,
+                            mask_interpolation=0,
+                            cval=0,
+                            cval_mask=0,
+                            mode=0,
+                            fit_output=False,
+                            always_apply=False,
+                            p=0.9))
+        self.transformer = Compose(self.transformations,
+                                   bbox_params=BboxParams(
+                                       format='yolo',
+                                       label_fields=['category_ids']))
+    def __call__(self, image, bounding_boxes, category_ids):
+        if self.transformations:
+            transformed = self.transformer(
+                image=image,
+                bboxes=bounding_boxes,
+                category_ids=category_ids)
+            # immmgg = self.visualize(
+            #     transformed['image'],
+            #     self.conv_2_coco(transformed['image'], transformed['bboxes']),
+            #     transformed['category_ids'],
+            #     {0: 'cement_bag'},
+            # )
+            # return [immmgg, transformed['bboxes'], category_ids]
+            return [transformed['image'], transformed['bboxes'], category_ids]
+        return image
+    @staticmethod
+    def conv_2_coco(img, *args):
+        import numpy as np
+        x_cen, y_cen, w, h = args[0][0]
+        ih, iw, _ = img.shape
+        x_min = (x_cen - w / 2) * iw
+        y_min = (y_cen - h / 2) * ih
+        wid = w * iw
+        hei = h * ih
+        return np.array([(x_min, y_min, wid, hei)])
+    BOX_COLOR = (255, 0, 0)
+    @staticmethod
+    def visualize_bbox(img, bbox, class_name, color=BOX_COLOR, thickness=2):
+        BOX_COLOR = (255, 0, 0)  # Red
+        TEXT_COLOR = (255, 255, 255)  # White
+        """Visualizes a single bounding box on the image"""
+        x_min, y_min, w, h = bbox
+        x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)
+        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
+        ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
+        cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
+        cv2.putText(
+            img,
+            text=class_name,
+            org=(x_min, y_min - int(0.3 * text_height)),
+            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+            fontScale=0.35,
+            color=TEXT_COLOR,
+            lineType=cv2.LINE_AA,
+        )
+        return img
+    def visualize(self, image, bboxes, category_ids, category_id_to_name):
+        img = image.copy()
+        for bbox, category_id in zip(bboxes, category_ids):
+            class_name = category_id_to_name[category_id]
+            img = self.visualize_bbox(img, bbox, class_name)
+        return img
+class ChainAugmentations:
+    def __init__(self, functions):
+        self.transformers = [AugmentImage({each_prop: functions[each_prop]}) for each_prop in functions]
+    def __call__(self, image, bounding_boxes, category_ids):
+        return [each_transformers(image, bounding_boxes, category_ids) for each_transformers in self.transformers]
+class AugmentationManager:
+    def __init__(self, functions=None):
+        if functions is None:
+            functions = {'blur': [{'property': 'blur','value': 0.3}],'noise': [{'property': 'noise','value': 0.1}]}
+        self.augment = ChainAugmentations(functions=functions)
+        self.pool = Pool(12)
+    def run_augmentations(self, annotation_directory, post_process_directory, filename, each_file):
+        print("inside augmentations")
+        with open(os.path.join(filename + ".txt"), 'r') as f:
+            annotations = [e for e in f.read().split('\n') if e]
+            bounding_boxes = list()
+            category_ids = list()
+            for each_annotations in annotations:
+                print("inside spliting")
+                split_annotations = each_annotations.split(' ')
+                category_ids.append(split_annotations[0])
+                bounding_boxes.append([float(e) for e in split_annotations[1:]])
+        print("after splitting")
+        image = cv2.imread(os.path.join(annotation_directory, each_file))
+        multi_images = self.augment(image, bounding_boxes, category_ids)
+        for each_element in multi_images:
+            image, bounding_boxes, category_id = each_element
+            _file_name = Path(each_file).stem + str(uuid4())
+            cv2.imwrite(os.path.join(post_process_directory, _file_name + ".jpg"), image)
+            new_annotations = list()
+            for _x in zip(category_id, bounding_boxes):
+                lis = list(_x[1:][0])
+                lis.insert(0, _x[0])
+                lis = [str(e) for e in lis]
+                print("lis")
+                print(lis)
+                new_annotations.append(' '.join(lis))
+                new_annotations.append('\n')
+                print("new ann list")
+                print(new_annotations)
+            with open(os.path.join(post_process_directory, _file_name + '.txt'), 'w') as _f:
+                _f.writelines(new_annotations)
+                print("new ann")
+                print(new_annotations)
+    def process(self, annotation_directory, post_process_directory):
+        print("annotation dir")
+        print(annotation_directory)
+        print("post process")
+        print(post_process_directory)
+        assert os.path.exists(annotation_directory)
+        if not os.path.exists(post_process_directory):
+            os.mkdir(post_process_directory)
+            print(f"Path: {post_process_directory} does not exist, creating one now!")
+        for each_file in os.listdir(annotation_directory):
+            filename, file_extension = os.path.splitext(os.path.join(annotation_directory, each_file))
+            if file_extension in ['.jpg', '.jpeg', '.png']:
+                print(os.path.join(filename + ".txt"))
+                if os.path.isfile(os.path.join(filename + ".txt")):
+                    print("txt file exists")
+                    self.pool.apply_async(self.run_augmentations,
+                                          (annotation_directory, post_process_directory, filename, each_file))
+        self.pool.close()
+        self.pool.join()
+#
+#
+# if __name__ == '__main__':
+#     augment_manager = AugmentationManager(functions={
+#         "blur": [
+#             {
+#                 "property": "blur",
+#                 "value": 0.3
+#             }
+#         ],
+#         "noise": [
+#             {
+#                 "property": "noise",
+#                 "value": 0.1
+#             }
+#         ],
+#         "horizontal_flip": [
+#             {
+#                 "property": "horizontal_flip",
+#             }
+#         ],
+#         "grayscale": [
+#             {
+#                 "property": "grayscale",
+#                 "value": 0.3
+#             }
+#         ],
+#         "hue": [
+#             {
+#                 "property": "hue",
+#                 "value": 1
+#             }
+#         ],
+#         "saturation": [
+#             {
+#                 "property": "saturation",
+#                 "value": 1
+#             }
+#         ],
+#         "brightness": [
+#             {
+#                 "property": "brightness",
+#                 "value": 0.8
+#             }
+#         ],
+#         "exposure": [
+#             {
+#                 "property": "exposure",
+#                 "value": 0.9
+#             }
+#         ],
+#         "vertical_flip": [
+#             {
+#                 "property": "vertical_flip",
+#                 "value": 0
+#             }
+#         ]
+#
+#
+#     })
+#     augment_manager.process(
+#         annotation_directory=r"C:\Users\sikhin.vc\PycharmProjects\training_pipeline\jk_data\unaugmented_dataset",
+#         post_process_directory=r"C:\Users\sikhin.vc\PycharmProjects\training_pipeline\jk_data\augmented_dataset")
--- a/scripts/utils/dataset_extraction.py
+++ b/scripts/utils/dataset_extraction.py
+from zipfile import ZipFile
+import os
+import shutil
+class ExtractDataset():
+    def extract_ds(self, dataset_path, dest_path):
+        self.dataset_path = dataset_path
+        self.dest_path = dest_path
+        print("zipping")
+        with ZipFile(self.dataset_path, 'r') as zObject:
+            zObject.extractall(path=dest_path)
+    def move_files(self, dataset_path, dest_path):
+        self.dataset_path = dataset_path
+        self.dest_path = dest_path
+        for root, dirs, files in os.walk(self.dataset_path):
+            for name in files:
+                shutil.move(os.path.join(root, name), os.path.join(self.dest_path, name))