Commit 0c59fda2 authored by sikhin.vc's avatar sikhin.vc

initial commit

parent d1cbb1b3
# lpr_data_processing_and_model_training
This repo is to prepare dataset and train LPR model using YoloV8
**Step 1: Annotation using labelImg**
**Step 2: Remove images without labels**
Store images and labels in different folders.
Add path of images and labels in "remove_images_without_labels.py" file and execute
**Step 3: Split dataset**
Copy all images and labels to a single folder
Run "split_dataset.py" file as shown below:
python split_dataset.py \
--datadir='data/all/' \
--split=0.1 \
--train_output='data/train/' \
--test_output='data/test/' \
--image_ext='jpeg'
Where,
datadir : directory where all images and labels are present
split : train_valid_ratio
train_output : directory where splitted train data is to be stored
test_output : directory where splitted train data is to be stored
image_ext : extension of image
**Step 4: Augment dataset**
Augmentation is performed only for training images
Again move images and labels under train and test folder into corresponding images and labels folder
Provide path for images_loc, labels_loc, target_images_loc, target_labels_loc in aug.py file
Where,
images_loc : location of training images
labels_loc : location of labels
target_images_loc : location where generated images will be stored
target_labels_loc : location where generated labels will be stored
After splitting dataset, Combine unaugmented train images and labels with augmented train images and labels.
Put train and valid folder in one folder
Add data.yaml file in the folder
Change number of classes and location of train and valid images in data.yaml
Compress the folder and upload to drive.
Use "yolov8_object_detection_custom_training.ipynb" file to train yolov8 model in colab
import os
import random
import cv2
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
import pybboxes as pbx
from uuid import uuid4
def bb(x1, y1, x2, y2, label):
return ia.BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2, label=label)
def create_augmentation_params(coord):
# image = annotation_dict["image"]
ann_list = []
for c in coord:
cls = c[0]
annotation = c[1]
bounding_box = bb(annotation[0], annotation[1], annotation[2], annotation[3], cls)
ann_list.append(bounding_box)
# print("bb: ", ann_list)
return ann_list
def read_images(img_list, label_list):
for ind, image in enumerate(img_list):
img = cv2.imread(image)
# cv2.imshow("original image", cv2.resize(img, (900, 600)))
txt_loc = label_list[ind]
# print(txt_loc)
with open(txt_loc, "r") as f:
lines = f.readlines()
f.close()
coco_annotations = []
H, W, _ = img.shape
for line in lines:
line = line[:-1]
splitted_line = line.split(" ")
# print(splitted_line)
class_id = splitted_line[0]
coord = splitted_line[1:]
coordinate_float = [float(x) for x in coord]
coordinate_float = tuple(coordinate_float)
coordinate_coco = pbx.convert_bbox(coordinate_float, from_type="yolo", to_type="voc", image_size=(W, H))
coco_annotations.append([class_id, coordinate_coco])
annotation_dict = {"image": img, "coord": coco_annotations}
aug_params = create_augmentation_params(annotation_dict["coord"])
for seq in seq_list:
rand_no = random.randrange(0, 10, 1)
if rand_no > 6:
images_aug, bbs_aug = seq(images=[annotation_dict["image"]], bounding_boxes=aug_params)
print("bbs aug: ",bbs_aug)
unique_id = uuid4()
image_name = str(unique_id) + ".jpg"
label_name = str(unique_id) + ".txt"
try:
cv2.imwrite(os.path.join(target_images_loc, image_name), images_aug[0])
with open(os.path.join(target_labels_loc, label_name), "a") as f:
for b in bbs_aug:
# # print(b.x1)https://github.com/amineHY/WebApp-Computer-Vision-streamlit.git
# # print(type(b))
H, W, _ = images_aug[0].shape
x1, y1, x2, y2, cls = b.x1, b.y1, b.x2, b.y2, b.label
cv2.rectangle(images_aug[0], (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2)
normalized_coord = pbx.convert_bbox((x1, y1, x2, y2), from_type="voc", to_type="yolo", image_size=(W, H))
line_for_txt_file = f"{str(cls)} {normalized_coord[0]} {normalized_coord[1]} {normalized_coord[2]} {normalized_coord[3]} \n"
f.write(line_for_txt_file)
f.close()
except:
continue
# cv2.imshow("augmented: ", cv2.resize(images_aug[0], (900, 600)))
# cv2.waitKey(1)
else:
continue
# iaa.Affine(translate_px={"x": (1, 5)}),
# ,
# iaa.AddElementwise((-40, 40))
# iaa.AdditiveGaussianNoise(scale=0.05*255)
# seq = iaa.Sequential([
# iaa.Affine(rotate=(-45, 45))
# ])
# iaa.ChannelShuffle(0.35, channels=[0, 1]),
# iaa.Affine(
# scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
# translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
# rotate=(-45, 45), # rotate by -45 to +45 degrees
# shear=(-16, 16), # shear by -16 to +16 degrees
# order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
# cval=(0, 255), # if mode is constant, use a cval between 0 and 255
# mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
# )
seq_list = [
iaa.ChannelShuffle(0.35),
iaa.Add((-100, 100)),
iaa.AdditiveGaussianNoise(scale=(0, 0.2*255)),
iaa.Multiply((0.5, 1.5)),
iaa.Multiply((0.5, 1.5), per_channel=0.5),
iaa.Dropout(p=(0, 0.2)),
iaa.CoarseDropout((0.0, 0.05), size_percent=(0.02, 0.25)),
iaa.Dropout2d(p=0.5),
iaa.Cartoon(),
iaa.GaussianBlur(sigma=(0.0, 3.0)),
iaa.MotionBlur(k=15),
iaa.ChangeColorTemperature((1100, 10000)),
iaa.SigmoidContrast(gain=(3, 10), cutoff=(0.4, 0.6), per_channel=True),
iaa.CLAHE(),
iaa.Fliplr(1),
iaa.Affine(scale=(0.5, 1.5)),
iaa.PiecewiseAffine(scale=(0.01, 0.05)),
iaa.PerspectiveTransform(scale=(0.01, 0.15)),
iaa.ElasticTransformation(alpha=(0, 5.0), sigma=0.25),
iaa.FastSnowyLandscape(
lightness_threshold=140,
lightness_multiplier=2.5
),
iaa.Clouds(),
iaa.Fog(),
iaa.Rain(speed=(0.1, 0.3))
]
images_loc = "/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/train/images"
labels_loc = "/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/train/labels"
target_images_loc = "/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/Augmented_dataset/train/images"
target_labels_loc = "/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/Augmented_dataset/train/labels"
images = os.listdir(images_loc)
img_list = []
label_list = []
for img in images:
if img.endswith(".jpg") or img.endswith(".png") or img.endswith(".jpeg"):
img_list.append(os.path.join(images_loc, img))
label_list.append(os.path.join(labels_loc, img[:-4] + ".txt"))
read_images(img_list=img_list, label_list=label_list)
# print("annotation dict: ", annotation_dict)
#
# # images = np.zeros((2, 128, 128, 3), dtype=np.uint8) # two example images
# # images[:, 64, 64, :] = 255
# bbs = [
# [ia.BoundingBox(x1=10.5, y1=15.5, x2=30.5, y2=50.5)],
# [ia.BoundingBox(x1=10.5, y1=20.5, x2=50.5, y2=50.5),
# ia.BoundingBox(x1=40.5, y1=75.5, x2=70.5, y2=100.5)]
# ]
#
# seq = iaa.Sequential([
# iaa.AdditiveGaussianNoise(scale=0.05*255),
# iaa.Affine(translate_px={"x": (1, 5)})
# ])
#
# images_aug, bbs_aug = seq(images=images, bounding_boxes=bbs)
#
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
#path: ../datasets/coco128 # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images
#test: # test images (optional)
# Classes
names:
0: r1
1: r2
2: lp
0 0.130729 0.474537 0.009375 0.015741
1 0.161719 0.466204 0.026562 0.026852
0 0.403385 0.604630 0.009896 0.020370
1 0.447135 0.582407 0.036979 0.046296
0 0.590104 0.684722 0.013542 0.026852
1 0.636719 0.654630 0.038021 0.050000
0 0.172656 0.509259 0.010937 0.025926
1 0.216927 0.503704 0.035937 0.033333
0 0.082812 0.542130 0.010417 0.032407
1 0.127344 0.531944 0.040104 0.037963
import os
img_path = "/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/images"
label_path = "/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/labels"
img_list = os.listdir(img_path)
for img in img_list:
img_without_ext = img[:-4]
txt_file = img_without_ext + ".txt"
txt_file_path = os.path.join(label_path, txt_file)
if os.path.isfile(txt_file_path):
print("yes")
else:
print("no")
os.remove(os.path.join(img_path, img))
\ No newline at end of file
import argparse
import os
from random import shuffle
import pandas as pd
from math import floor
import shutil
parser = argparse.ArgumentParser()
parser.add_argument('--datadir', help='Path to the all input data', type=str)
parser.add_argument('--split', help='Split value - Test %', type=float, default=0.1)
parser.add_argument('--train_output', help='Path to output train data', type=str)
parser.add_argument('--test_output', help='Path to output test data', type=str)
parser.add_argument('--image_ext', help='jpeg or jpg or png', type=str, default='jpeg')
FLAGS = parser.parse_args()
def check_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
print('Creating directory -', directory)
else:
print('Directory exists -', directory)
def get_file_list_from_dir(datadir):
all_files = os.listdir(os.path.abspath(datadir))
data_files = list(filter(lambda file: file.endswith('.' + FLAGS.image_ext), all_files))
# print(data_files)
shuffled_files = randomize_files(data_files)
all_cervix_images = pd.DataFrame({'imagepath': shuffled_files})
# print(lambda row: row.imagepath.split(".")[0])
all_cervix_images['filename'] = all_cervix_images.apply(lambda row: row.imagepath[:-4], axis=1)
return all_cervix_images
def randomize_files(file_list):
shuffle(file_list)
return file_list
def get_training_and_testing_sets(file_list, split):
split_index = floor(file_list.shape[0] * split)
testing = file_list[:split_index]
training = file_list[split_index:]
training = training.reset_index(drop=True)
return training, testing
def write_data(training, testing, datadir, train_output, test_output):
# Train Data
print('Writing -', training.shape[0], '- Train data images at -', train_output)
for name in training['filename']:
try:
# Moving xmls
rd_path = os.path.join(datadir, name + '.txt')
wr_path = os.path.join(train_output, name + '.txt')
shutil.move(rd_path, wr_path)
# Moving images
rd_path = os.path.join(datadir, name + '.' + FLAGS.image_ext)
wr_path = os.path.join(train_output, name + '.' + FLAGS.image_ext)
shutil.move(rd_path, wr_path)
except:
print('Could not find {}'.format(name + '.txt'))
# Test Data
print('Writing -', testing.shape[0], '- Test data images at -', test_output)
for name in testing['filename']:
try:
# Moving xmls
rd_path = os.path.join(datadir, name + '.txt')
wr_path = os.path.join(test_output, name + '.txt')
shutil.move(rd_path, wr_path)
# Moving images
rd_path = os.path.join(datadir, name + '.' + FLAGS.image_ext)
wr_path = os.path.join(test_output, name + '.' + FLAGS.image_ext)
shutil.move(rd_path, wr_path)
except:
print('Could not find {}'.format(name + '.txt'))
def main():
check_dir(FLAGS.train_output)
check_dir(FLAGS.test_output)
file_list = get_file_list_from_dir(FLAGS.datadir)
print('Read -', file_list.shape[0], '- files from the directory -', FLAGS.datadir)
training, testing = get_training_and_testing_sets(file_list, FLAGS.split)
write_data(training, testing, FLAGS.datadir, FLAGS.train_output, FLAGS.test_output)
if __name__ == '__main__':
main()
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment