Commit aee4d77e authored by Sikhin VC's avatar Sikhin VC

initial commit

parent 6962547c
# Default ignored files
/shelf/
/workspace.xml
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (training_pipeline)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/training_pipeline.iml" filepath="$PROJECT_DIR$/.idea/training_pipeline.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
FROM python:3.7
RUN apt-get update
RUN apt-get install tzdata vim -y
RUN apt-get update && apt-get install tzdata ffmpeg libsm6 libxext6 -y
RUN pip3 install --upgrade pip
RUN pip3 install pytz==2020.4 loguru~=0.6.0 scipy~=1.4.1 numpy~=1.19.5 pandas~=1.3.3 requests==2.26.0 pydantic==1.8.2 python-dotenv==0.19.0 PyYAML~=6.0 python-dateutil~=2.8.2 azure-core==1.26.3 azure-identity==1.12.0 azure-storage-blob==12.15.0 Pillow>=7.1.2
RUN pip3 install opencv-python>=4.1.2 matplotlib seaborn
ADD . /app
WORKDIR /app
CMD [ "python","app.py" ]
\ No newline at end of file
import os.path
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv(dotenv_path='config.env')
import yaml
import traceback
import warnings
from loguru import logger
from scripts.constants.app_configuration import master_configuration_file
from scripts.utils.blob_downloader import Blob_Downloader
from scripts.utils.data_augmentation import AugmentationManager
from scripts.utils.dataset_extraction import ExtractDataset
warnings.filterwarnings("ignore")
def main():
with open(master_configuration_file, 'r') as _cf:
master_conf = yaml.full_load(_cf)
root_data_path = master_conf["master_config"]["data_path"]
raw_dataset_path = os.path.join(root_data_path, "raw_dataset")
extracted_dataset_path = os.path.join(root_data_path, "exatracted_dataset")
unaugmented_dataset_path = os.path.join(root_data_path, "unaugmented_dataset")
augmented_dataset_path = os.path.join(root_data_path, "augmented_dataset")
# annotation_directory = os.path.join("data", "dataset")
# post_process_directory = os.path.join(root_data_path, "post")
if(not os.path.exists(root_data_path)):
os.mkdir(root_data_path)
if (not os.path.exists(raw_dataset_path)):
os.mkdir(raw_dataset_path)
if (not os.path.exists(extracted_dataset_path)):
os.mkdir(extracted_dataset_path)
if (not os.path.exists(unaugmented_dataset_path)):
os.mkdir(unaugmented_dataset_path)
if (not os.path.exists(augmented_dataset_path)):
os.mkdir(augmented_dataset_path)
augmentation_functions = {}
logger.info("Starting Vision Data Accusation Pipeline")
# logger.info(f"Master Configuration Read, Found {len(master_conf['master_config']['rtsp_url'])} Steams with check "
# f"batch size of {master_conf['master_config']['check_batch_size']}")
blob_list = master_conf['master_config']['blob_path']
# print(blob_list)
# for url in blob_list:
logger.info(f'Starting accusation of Stream {blob_list}')
blob_util = Blob_Downloader(master_conf['master_config']['project'],
master_conf['master_config']['site'],
master_conf['master_config']['blob_path'],
raw_dataset_path)
blob_download_status = blob_util.download()
if blob_download_status:
logger.info("Data Downloaded Successfully!")
else:
logger.warning("Failed To Download Data")
extract = ExtractDataset()
extract.extract_ds(os.path.join(raw_dataset_path, "dataset.zip"), extracted_dataset_path)
extract.move_files(extracted_dataset_path, unaugmented_dataset_path)
try:
sub_dir = os.walk("data")
print(sub_dir)
annotation_directory = sub_dir[0]
post_process_directory = sub_dir[0]
except:
annotation_directory = "data"
post_process_directory = "data"
augmentation_list = master_conf['master_config']['augmentation_types']
# print(augmentation_list)
for augmentation in augmentation_list:
augmentation_value = master_conf['master_config']['augmentation_types'][augmentation]["value"]
augmentation_functions[augmentation]=[{"property":augmentation, "value": augmentation_value}]
print(augmentation_functions)
augmentation_manager = AugmentationManager(augmentation_functions)
augmentation_manager.process(annotation_directory = unaugmented_dataset_path, post_process_directory = augmented_dataset_path)
if __name__ == '__main__':
logger.info("Attempting to Start Vision Data Accusation Pipeline!")
try:
main()
except Exception as e:
traceback.print_exc()
logger.error(f"Failed to Start Vision Data Accusation Pipeline! : {str(e)}")
[BLOB_STORAGE]
conn_str=$CONN_STR
container_name=$CONTAINER_NAME
\ No newline at end of file
master_config:
project: 'JK_Cements'
site: 'Chittorgarh'
blob_path:
"JK_Cements/dataset2.zip"
data_path:
"jk_data"
augmentation_types:
blur:
value: 0.3
noise:
value: 0.1
horizontal_flip:
value: 0
grayscale:
value: 0.4
hue:
value: 1
saturation:
value: 1
brightness:
value: 0.8
exposure:
value: 0.9
vertical_flip:
value: 0
master_config:
project: 'JK_Cements'
site: 'Chittorgarh'
rtsp_url:
2023_02_28_15_45_00.mp4:
line: 1
camera: 1
contour_area: 1000
weight_file: JK_Cements/Chittorgarh/weights/weight.pt
check_batch_size: 16
packet_size: 20
classes: assets/classes.txt
\ No newline at end of file
CONN_STR = AccountName=azrmlilensqa006382180551;AccountKey=tDGOKfiZ2svfoMvVmS0Fbpf0FTHfTq4wKYuDX7cAxlhve/3991QuzdvJHm9vWc+lo6mtC+x9yPSghWNR4+gacg==;EndpointSuffix=core.windows.net;DefaultEndpointsProtocol=https;
CONTAINER_NAME = mlflow-vm-container
\ No newline at end of file
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
pytz==2020.4
loguru~=0.6.0
#scipy~=1.4.1
numpy~=1.19.5
pandas~=1.3.3
requests==2.26.0
pydantic==1.8.2
python-dotenv==0.19.0
PyYAML~=6.0
python-dateutil~=2.8.2
azure-core==1.26.3
azure-identity==1.12.0
azure-storage-blob==12.15.0
albumentations==1.1.0
\ No newline at end of file
import os
import os.path
import sys
from configparser import ConfigParser, BasicInterpolation
import yaml
master_configuration_file = r"./conf/master_config.yml"
class EnvInterpolation(BasicInterpolation):
"""
Interpolation which expands environment variables in values.
"""
def before_get(self, parser, section, option, value, defaults):
value = super().before_get(parser, section, option, value, defaults)
if not os.path.expandvars(value).startswith("$"):
return os.path.expandvars(value)
else:
return
try:
config = ConfigParser(interpolation=EnvInterpolation())
config.read("conf/application.conf")
except Exception as e:
print(f"Error while loading the config: {e}")
print("Failed to Load Configuration. Exiting!!!")
sys.exit()
class Logging:
level = config.get("LOGGING", "level", fallback="INFO")
level = level or "INFO"
tb_flag = config.getboolean("LOGGING", "traceback", fallback=True)
tb_flag = tb_flag if tb_flag is not None else True
BLOB_CONN_STR = config["BLOB_STORAGE"]["conn_str"]
BLOB_CONTAINER_NAME = config["BLOB_STORAGE"]["container_name"]
import logging
import os
from logging import StreamHandler
from logging.handlers import RotatingFileHandler, SocketHandler
import yaml
from scripts.constants.app_configuration import PathToDirectory, Logging
# this method is to read the configuration from backup.conf
def read_configuration(file_name):
"""
:param file_name:
:return: all the configuration constants
"""
with open(file_name, 'r') as stream:
try:
return yaml.safe_load(stream)
except Exception as e:
print(f"Failed to load Configuration. Error: {e}")
config = read_configuration("scripts/logging/logger_conf.yml")
logging_config = config["logger"]
logging_config["level"] = Logging.level
enable_traceback: bool = Logging.tb_flag
def get_logger():
"""
Creates a rotating log
"""
__logger__ = logging.getLogger('')
__logger__.setLevel(logging_config["level"].upper())
log_formatter = '%(asctime)s - %(levelname)-6s - [%(threadName)5s:%(funcName)5s():''' \
'%(lineno)s] - %(message)s'
time_format = "%Y-%m-%d %H:%M:%S"
file_path = PathToDirectory.LOGS_MODULE_PATH
formatter = logging.Formatter(log_formatter, time_format)
for each_handler in logging_config["handlers"]:
if each_handler["type"] in ["RotatingFileHandler"]:
if not os.path.exists(file_path):
os.makedirs(file_path)
log_file = os.path.join(f"{file_path}{logging_config['name']}.log")
temp_handler = RotatingFileHandler(log_file,
maxBytes=each_handler["max_bytes"],
backupCount=each_handler["back_up_count"])
temp_handler.setFormatter(formatter)
elif each_handler["type"] in ["SocketHandler"]:
temp_handler = SocketHandler(each_handler["host"], each_handler["port"])
elif each_handler["type"] in ["StreamHandler"]:
temp_handler = StreamHandler()
temp_handler.setFormatter(formatter)
else:
temp_handler = None
__logger__.addHandler(temp_handler)
return __logger__
logger = get_logger()
logger:
name: jubilant_r5_golden_batch_yield
level: DEBUG
handlers:
- type: RotatingFileHandler
file_path: logs/
max_bytes: 100000000
back_up_count: 5
- type: StreamHandler
name: jubilant_r5_golden_batch_yield
import os
from loguru import logger
import datetime
import traceback
from azure.storage.blob import BlobServiceClient
from scripts.constants.app_configuration import BLOB_CONN_STR, BLOB_CONTAINER_NAME
class Blob_Downloader:
def __init__(self, project, site, blob_path, raw_ds_path):
self.project = project
self.site = site
self.blob_path = blob_path
# self.line = line
# self.camera = camera
self.blob_service_client = BlobServiceClient.from_connection_string(BLOB_CONN_STR)
self.raw_ds_path = raw_ds_path
self.container = BLOB_CONTAINER_NAME
def download(self):
try:
# for i in os.listdir(self.local_path):
download_file_path = os.path.join(self.raw_ds_path, "dataset.zip")
blob_client = self.blob_service_client.get_blob_client(container=self.container,
blob=self.blob_path)
with open(download_file_path, "wb") as download_file:
blob_client.download_blob().readinto(download_file)
# logger.info(
# f'Uploaded to Azure Storage with blob path:{self.project}/{self.site}/{self.line}/{self.camera}/'
# f'{datetime.datetime.now().day}_{datetime.datetime.now().month}_{datetime.datetime.now().year}'
# f'/images')
return True
except Exception as e:
traceback.print_exc()
logger.error(f"Failed to Push Files to blob Storage! : {str(e)}")
return False
import os
from pathlib import Path
import cv2
from uuid import uuid4
from multiprocessing.pool import ThreadPool as Pool
from albumentations import Compose, BboxParams
from albumentations.augmentations.geometric.rotate import Rotate
from albumentations.augmentations.geometric.transforms import Affine
from albumentations.augmentations.transforms import (
HorizontalFlip, VerticalFlip, ToGray, HueSaturationValue, RandomBrightnessContrast, Blur, GaussNoise)
class AugmentImage:
def __init__(self, funs):
self.transformations = list()
if 'vertical_flip' in funs:
assert type(funs['vertical_flip']), list
for each_prop in funs['vertical_flip']:
if each_prop['property'] == 'vertical_flip':
self.transformations.append(VerticalFlip(p=1))
if 'horizontal_flip' in funs:
assert type(funs['horizontal_flip']), list
for each_prop in funs['horizontal_flip']:
if each_prop['property'] == 'horizontal_flip':
self.transformations.append(HorizontalFlip(p=1))
if 'rotation' in funs:
assert type(funs['rotation']), list
for each_prop in funs['rotation']:
if each_prop['property'] == 'rotation':
self.transformations.append(Rotate(
limit=int(45 * each_prop['value']),
interpolation=1,
border_mode=0,
value=None,
mask_value=None,
always_apply=False,
p=0.7))
if 'grayscale' in funs:
assert type(funs['grayscale']), list
for each_prop in funs['grayscale']:
if each_prop['property'] == 'probability':
self.transformations.append(
ToGray(p=each_prop['value']))
if 'hue' in funs:
assert type(funs['hue']), list
for each_prop in funs['hue']:
if each_prop['property'] == 'hue':
self.transformations.append(
HueSaturationValue(
hue_shift_limit=int(180 * each_prop['value']),
sat_shift_limit=0,
val_shift_limit=0,
always_apply=False,
p=0.7))
if 'saturation' in funs:
assert type(funs['saturation']), list
for each_prop in funs['saturation']:
if each_prop['property'] == 'saturation':
self.transformations.append(
HueSaturationValue(
hue_shift_limit=0,
sat_shift_limit=int(255 * each_prop['value']),
val_shift_limit=0,
always_apply=False,
p=0.7))
if 'brightness' in funs:
assert type(funs['brightness']), list
for each_prop in funs['brightness']:
if each_prop['property'] == 'brightness':
self.transformations.append(
RandomBrightnessContrast(
brightness_limit=int(30 * each_prop['value']),
contrast_limit=0,
brightness_by_max=True,
always_apply=True,
p=0.7))
if 'exposure' in funs:
assert type(funs['exposure']), list
for each_prop in funs['exposure']:
if each_prop['property'] == 'exposure':
self.transformations.append(
HueSaturationValue(
hue_shift_limit=0,
sat_shift_limit=0,
val_shift_limit=int(255 * each_prop['value']),
always_apply=False,
p=0.7))
if 'blur' in funs:
assert type(funs['blur']), list
for each_prop in funs['blur']:
if each_prop['property'] == 'blur':
self.transformations.append(
Blur(blur_limit=int(150 * each_prop['value']),
always_apply=False,
p=1))
if 'noise' in funs:
assert type(funs['noise']), list
for each_prop in funs['noise']:
if each_prop['property'] == 'noise':
self.transformations.append(
GaussNoise(
var_limit=int(50000 * each_prop['value']),
mean=0,
per_channel=True,
always_apply=True,
p=0.7))
if 'horizontal_shear' in funs:
assert type(funs['horizontal_shear']), list
for each_prop in funs['horizontal_shear']:
if each_prop['property'] == 'horizontal_shear':
self.transformations.append(
Affine(
scale=None,
translate_percent=None,
translate_px=None,
rotate=None,
shear={"x": int(-45 * each_prop['value']), "y": 0},
interpolation=0,
mask_interpolation=0,
cval=0,
cval_mask=0,
mode=0,
fit_output=False,
always_apply=False,
p=0.9))
self.transformations.append(
Affine(
scale=None,
translate_percent=None,
translate_px=None,
rotate=None,
shear={"x": int(45 * each_prop['value']), "y": 0},
interpolation=0,
mask_interpolation=0,
cval=0,
cval_mask=0,
mode=0,
fit_output=False,
always_apply=False,
p=0.9))
if 'vertical_shear' in funs:
assert type(funs['vertical_shear']), list
for each_prop in funs['vertical_shear']:
if each_prop['property'] == 'vertical_shear':
self.transformations.append(
Affine(
scale=None,
translate_percent=None,
translate_px=None,
rotate=None,
shear={"x": 0, "y": int(-45 * each_prop['value'])},
interpolation=0,
mask_interpolation=0,
cval=0,
cval_mask=0,
mode=0,
fit_output=False,
always_apply=False,
p=0.9))
self.transformations.append(
Affine(
scale=None,
translate_percent=None,
translate_px=None,
rotate=None,
shear={"x": 0, "y": int(45 * each_prop['value'])},
interpolation=0,
mask_interpolation=0,
cval=0,
cval_mask=0,
mode=0,
fit_output=False,
always_apply=False,
p=0.9))
self.transformer = Compose(self.transformations,
bbox_params=BboxParams(
format='yolo',
label_fields=['category_ids']))
def __call__(self, image, bounding_boxes, category_ids):
if self.transformations:
transformed = self.transformer(
image=image,
bboxes=bounding_boxes,
category_ids=category_ids)
# immmgg = self.visualize(
# transformed['image'],
# self.conv_2_coco(transformed['image'], transformed['bboxes']),
# transformed['category_ids'],
# {0: 'cement_bag'},
# )
# return [immmgg, transformed['bboxes'], category_ids]
return [transformed['image'], transformed['bboxes'], category_ids]
return image
@staticmethod
def conv_2_coco(img, *args):
import numpy as np
x_cen, y_cen, w, h = args[0][0]
ih, iw, _ = img.shape
x_min = (x_cen - w / 2) * iw
y_min = (y_cen - h / 2) * ih
wid = w * iw
hei = h * ih
return np.array([(x_min, y_min, wid, hei)])
BOX_COLOR = (255, 0, 0)
@staticmethod
def visualize_bbox(img, bbox, class_name, color=BOX_COLOR, thickness=2):
BOX_COLOR = (255, 0, 0) # Red
TEXT_COLOR = (255, 255, 255) # White
"""Visualizes a single bounding box on the image"""
x_min, y_min, w, h = bbox
x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
cv2.putText(
img,
text=class_name,
org=(x_min, y_min - int(0.3 * text_height)),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.35,
color=TEXT_COLOR,
lineType=cv2.LINE_AA,
)
return img
def visualize(self, image, bboxes, category_ids, category_id_to_name):
img = image.copy()
for bbox, category_id in zip(bboxes, category_ids):
class_name = category_id_to_name[category_id]
img = self.visualize_bbox(img, bbox, class_name)
return img
class ChainAugmentations:
def __init__(self, functions):
self.transformers = [AugmentImage({each_prop: functions[each_prop]}) for each_prop in functions]
def __call__(self, image, bounding_boxes, category_ids):
return [each_transformers(image, bounding_boxes, category_ids) for each_transformers in self.transformers]
class AugmentationManager:
def __init__(self, functions=None):
if functions is None:
functions = {'blur': [{'property': 'blur','value': 0.3}],'noise': [{'property': 'noise','value': 0.1}]}
self.augment = ChainAugmentations(functions=functions)
self.pool = Pool(12)
def run_augmentations(self, annotation_directory, post_process_directory, filename, each_file):
print("inside augmentations")
with open(os.path.join(filename + ".txt"), 'r') as f:
annotations = [e for e in f.read().split('\n') if e]
bounding_boxes = list()
category_ids = list()
for each_annotations in annotations:
print("inside spliting")
split_annotations = each_annotations.split(' ')
category_ids.append(split_annotations[0])
bounding_boxes.append([float(e) for e in split_annotations[1:]])
print("after splitting")
image = cv2.imread(os.path.join(annotation_directory, each_file))
multi_images = self.augment(image, bounding_boxes, category_ids)
for each_element in multi_images:
image, bounding_boxes, category_id = each_element
_file_name = Path(each_file).stem + str(uuid4())
cv2.imwrite(os.path.join(post_process_directory, _file_name + ".jpg"), image)
new_annotations = list()
for _x in zip(category_id, bounding_boxes):
lis = list(_x[1:][0])
lis.insert(0, _x[0])
lis = [str(e) for e in lis]
print("lis")
print(lis)
new_annotations.append(' '.join(lis))
new_annotations.append('\n')
print("new ann list")
print(new_annotations)
with open(os.path.join(post_process_directory, _file_name + '.txt'), 'w') as _f:
_f.writelines(new_annotations)
print("new ann")
print(new_annotations)
def process(self, annotation_directory, post_process_directory):
print("annotation dir")
print(annotation_directory)
print("post process")
print(post_process_directory)
assert os.path.exists(annotation_directory)
if not os.path.exists(post_process_directory):
os.mkdir(post_process_directory)
print(f"Path: {post_process_directory} does not exist, creating one now!")
for each_file in os.listdir(annotation_directory):
filename, file_extension = os.path.splitext(os.path.join(annotation_directory, each_file))
if file_extension in ['.jpg', '.jpeg', '.png']:
print(os.path.join(filename + ".txt"))
if os.path.isfile(os.path.join(filename + ".txt")):
print("txt file exists")
self.pool.apply_async(self.run_augmentations,
(annotation_directory, post_process_directory, filename, each_file))
self.pool.close()
self.pool.join()
#
#
# if __name__ == '__main__':
# augment_manager = AugmentationManager(functions={
# "blur": [
# {
# "property": "blur",
# "value": 0.3
# }
# ],
# "noise": [
# {
# "property": "noise",
# "value": 0.1
# }
# ],
# "horizontal_flip": [
# {
# "property": "horizontal_flip",
# }
# ],
# "grayscale": [
# {
# "property": "grayscale",
# "value": 0.3
# }
# ],
# "hue": [
# {
# "property": "hue",
# "value": 1
# }
# ],
# "saturation": [
# {
# "property": "saturation",
# "value": 1
# }
# ],
# "brightness": [
# {
# "property": "brightness",
# "value": 0.8
# }
# ],
# "exposure": [
# {
# "property": "exposure",
# "value": 0.9
# }
# ],
# "vertical_flip": [
# {
# "property": "vertical_flip",
# "value": 0
# }
# ]
#
#
# })
# augment_manager.process(
# annotation_directory=r"C:\Users\sikhin.vc\PycharmProjects\training_pipeline\jk_data\unaugmented_dataset",
# post_process_directory=r"C:\Users\sikhin.vc\PycharmProjects\training_pipeline\jk_data\augmented_dataset")
from zipfile import ZipFile
import os
import shutil
class ExtractDataset():
def extract_ds(self, dataset_path, dest_path):
self.dataset_path = dataset_path
self.dest_path = dest_path
print("zipping")
with ZipFile(self.dataset_path, 'r') as zObject:
zObject.extractall(path=dest_path)
def move_files(self, dataset_path, dest_path):
self.dataset_path = dataset_path
self.dest_path = dest_path
for root, dirs, files in os.walk(self.dataset_path):
for name in files:
shutil.move(os.path.join(root, name), os.path.join(self.dest_path, name))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment