Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
lpr_data_processing_and_model_training
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
CI / CD Analytics
Repository Analytics
Value Stream Analytics
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
sikhin.vc
lpr_data_processing_and_model_training
Commits
0c59fda2
Commit
0c59fda2
authored
Dec 20, 2023
by
sikhin.vc
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
initial commit
parent
d1cbb1b3
Changes
16
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
4617 additions
and
0 deletions
+4617
-0
README.md
README.md
+51
-0
aug.py
aug.py
+180
-0
data.yaml
data.yaml
+13
-0
data/images/637c72d98d8d2e1916aa82eb_jpeg.rf.18179e89c20d432d9b023b21474dd16e.jpg
...2e1916aa82eb_jpeg.rf.18179e89c20d432d9b023b21474dd16e.jpg
+0
-0
data/images/637c72d98d8d2e1916aa82f3_jpeg.rf.6463afa9200cc37075ab9594ccfaadfa.jpg
...2e1916aa82f3_jpeg.rf.6463afa9200cc37075ab9594ccfaadfa.jpg
+0
-0
data/images/637c72da8d8d2e1916aa82fa_jpeg.rf.21bc48bf34791fde0878b9312f98b09b.jpg
...2e1916aa82fa_jpeg.rf.21bc48bf34791fde0878b9312f98b09b.jpg
+0
-0
data/images/637c73aa8d8d2e1916aa862d_jpeg.rf.17abd2576af9529c313737084aa9927d.jpg
...2e1916aa862d_jpeg.rf.17abd2576af9529c313737084aa9927d.jpg
+0
-0
data/images/637c73da8d8d2e1916aa864d_jpeg.rf.ff6a706dd9970fa353d8c7c02ed4d98a.jpg
...2e1916aa864d_jpeg.rf.ff6a706dd9970fa353d8c7c02ed4d98a.jpg
+0
-0
data/labels/637c72d98d8d2e1916aa82eb_jpeg.rf.18179e89c20d432d9b023b21474dd16e.txt
...2e1916aa82eb_jpeg.rf.18179e89c20d432d9b023b21474dd16e.txt
+2
-0
data/labels/637c72d98d8d2e1916aa82f3_jpeg.rf.6463afa9200cc37075ab9594ccfaadfa.txt
...2e1916aa82f3_jpeg.rf.6463afa9200cc37075ab9594ccfaadfa.txt
+2
-0
data/labels/637c72da8d8d2e1916aa82fa_jpeg.rf.21bc48bf34791fde0878b9312f98b09b.txt
...2e1916aa82fa_jpeg.rf.21bc48bf34791fde0878b9312f98b09b.txt
+2
-0
data/labels/637c73aa8d8d2e1916aa862d_jpeg.rf.17abd2576af9529c313737084aa9927d.txt
...2e1916aa862d_jpeg.rf.17abd2576af9529c313737084aa9927d.txt
+2
-0
data/labels/637c73da8d8d2e1916aa864d_jpeg.rf.ff6a706dd9970fa353d8c7c02ed4d98a.txt
...2e1916aa864d_jpeg.rf.ff6a706dd9970fa353d8c7c02ed4d98a.txt
+2
-0
remove_images_without_labels.py
remove_images_without_labels.py
+17
-0
split_dataset.py
split_dataset.py
+97
-0
yolov8_object_detection_custom_training.ipynb
yolov8_object_detection_custom_training.ipynb
+4249
-0
No files found.
README.md
View file @
0c59fda2
# lpr_data_processing_and_model_training
This repo is to prepare dataset and train LPR model using YoloV8
**Step 1: Annotation using labelImg**
**Step 2: Remove images without labels**
Store images and labels in different folders.
Add path of images and labels in "remove_images_without_labels.py" file and execute
**Step 3: Split dataset**
Copy all images and labels to a single folder
Run "split_dataset.py" file as shown below:
python split_dataset.py
\
--datadir='data/all/'
\
--split=0.1
\
--train_output='data/train/'
\
--test_output='data/test/'
\
--image_ext='jpeg'
Where,
datadir : directory where all images and labels are present
split : train_valid_ratio
train_output : directory where splitted train data is to be stored
test_output : directory where splitted train data is to be stored
image_ext : extension of image
**Step 4: Augment dataset**
Augmentation is performed only for training images
Again move images and labels under train and test folder into corresponding images and labels folder
Provide path for images_loc, labels_loc, target_images_loc, target_labels_loc in aug.py file
Where,
images_loc : location of training images
labels_loc : location of labels
target_images_loc : location where generated images will be stored
target_labels_loc : location where generated labels will be stored
After splitting dataset, Combine unaugmented train images and labels with augmented train images and labels.
Put train and valid folder in one folder
Add data.yaml file in the folder
Change number of classes and location of train and valid images in data.yaml
Compress the folder and upload to drive.
Use "yolov8_object_detection_custom_training.ipynb" file to train yolov8 model in colab
aug.py
0 → 100644
View file @
0c59fda2
import
os
import
random
import
cv2
import
numpy
as
np
import
imgaug
as
ia
import
imgaug.augmenters
as
iaa
import
pybboxes
as
pbx
from
uuid
import
uuid4
def
bb
(
x1
,
y1
,
x2
,
y2
,
label
):
return
ia
.
BoundingBox
(
x1
=
x1
,
y1
=
y1
,
x2
=
x2
,
y2
=
y2
,
label
=
label
)
def
create_augmentation_params
(
coord
):
# image = annotation_dict["image"]
ann_list
=
[]
for
c
in
coord
:
cls
=
c
[
0
]
annotation
=
c
[
1
]
bounding_box
=
bb
(
annotation
[
0
],
annotation
[
1
],
annotation
[
2
],
annotation
[
3
],
cls
)
ann_list
.
append
(
bounding_box
)
# print("bb: ", ann_list)
return
ann_list
def
read_images
(
img_list
,
label_list
):
for
ind
,
image
in
enumerate
(
img_list
):
img
=
cv2
.
imread
(
image
)
# cv2.imshow("original image", cv2.resize(img, (900, 600)))
txt_loc
=
label_list
[
ind
]
# print(txt_loc)
with
open
(
txt_loc
,
"r"
)
as
f
:
lines
=
f
.
readlines
()
f
.
close
()
coco_annotations
=
[]
H
,
W
,
_
=
img
.
shape
for
line
in
lines
:
line
=
line
[:
-
1
]
splitted_line
=
line
.
split
(
" "
)
# print(splitted_line)
class_id
=
splitted_line
[
0
]
coord
=
splitted_line
[
1
:]
coordinate_float
=
[
float
(
x
)
for
x
in
coord
]
coordinate_float
=
tuple
(
coordinate_float
)
coordinate_coco
=
pbx
.
convert_bbox
(
coordinate_float
,
from_type
=
"yolo"
,
to_type
=
"voc"
,
image_size
=
(
W
,
H
))
coco_annotations
.
append
([
class_id
,
coordinate_coco
])
annotation_dict
=
{
"image"
:
img
,
"coord"
:
coco_annotations
}
aug_params
=
create_augmentation_params
(
annotation_dict
[
"coord"
])
for
seq
in
seq_list
:
rand_no
=
random
.
randrange
(
0
,
10
,
1
)
if
rand_no
>
6
:
images_aug
,
bbs_aug
=
seq
(
images
=
[
annotation_dict
[
"image"
]],
bounding_boxes
=
aug_params
)
print
(
"bbs aug: "
,
bbs_aug
)
unique_id
=
uuid4
()
image_name
=
str
(
unique_id
)
+
".jpg"
label_name
=
str
(
unique_id
)
+
".txt"
try
:
cv2
.
imwrite
(
os
.
path
.
join
(
target_images_loc
,
image_name
),
images_aug
[
0
])
with
open
(
os
.
path
.
join
(
target_labels_loc
,
label_name
),
"a"
)
as
f
:
for
b
in
bbs_aug
:
# # print(b.x1)https://github.com/amineHY/WebApp-Computer-Vision-streamlit.git
# # print(type(b))
H
,
W
,
_
=
images_aug
[
0
]
.
shape
x1
,
y1
,
x2
,
y2
,
cls
=
b
.
x1
,
b
.
y1
,
b
.
x2
,
b
.
y2
,
b
.
label
cv2
.
rectangle
(
images_aug
[
0
],
(
int
(
x1
),
int
(
y1
)),
(
int
(
x2
),
int
(
y2
)),
(
255
,
0
,
0
),
2
)
normalized_coord
=
pbx
.
convert_bbox
((
x1
,
y1
,
x2
,
y2
),
from_type
=
"voc"
,
to_type
=
"yolo"
,
image_size
=
(
W
,
H
))
line_for_txt_file
=
f
"{str(cls)} {normalized_coord[0]} {normalized_coord[1]} {normalized_coord[2]} {normalized_coord[3]}
\n
"
f
.
write
(
line_for_txt_file
)
f
.
close
()
except
:
continue
# cv2.imshow("augmented: ", cv2.resize(images_aug[0], (900, 600)))
# cv2.waitKey(1)
else
:
continue
# iaa.Affine(translate_px={"x": (1, 5)}),
# ,
# iaa.AddElementwise((-40, 40))
# iaa.AdditiveGaussianNoise(scale=0.05*255)
# seq = iaa.Sequential([
# iaa.Affine(rotate=(-45, 45))
# ])
# iaa.ChannelShuffle(0.35, channels=[0, 1]),
# iaa.Affine(
# scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
# translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
# rotate=(-45, 45), # rotate by -45 to +45 degrees
# shear=(-16, 16), # shear by -16 to +16 degrees
# order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
# cval=(0, 255), # if mode is constant, use a cval between 0 and 255
# mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
# )
seq_list
=
[
iaa
.
ChannelShuffle
(
0.35
),
iaa
.
Add
((
-
100
,
100
)),
iaa
.
AdditiveGaussianNoise
(
scale
=
(
0
,
0.2
*
255
)),
iaa
.
Multiply
((
0.5
,
1.5
)),
iaa
.
Multiply
((
0.5
,
1.5
),
per_channel
=
0.5
),
iaa
.
Dropout
(
p
=
(
0
,
0.2
)),
iaa
.
CoarseDropout
((
0.0
,
0.05
),
size_percent
=
(
0.02
,
0.25
)),
iaa
.
Dropout2d
(
p
=
0.5
),
iaa
.
Cartoon
(),
iaa
.
GaussianBlur
(
sigma
=
(
0.0
,
3.0
)),
iaa
.
MotionBlur
(
k
=
15
),
iaa
.
ChangeColorTemperature
((
1100
,
10000
)),
iaa
.
SigmoidContrast
(
gain
=
(
3
,
10
),
cutoff
=
(
0.4
,
0.6
),
per_channel
=
True
),
iaa
.
CLAHE
(),
iaa
.
Fliplr
(
1
),
iaa
.
Affine
(
scale
=
(
0.5
,
1.5
)),
iaa
.
PiecewiseAffine
(
scale
=
(
0.01
,
0.05
)),
iaa
.
PerspectiveTransform
(
scale
=
(
0.01
,
0.15
)),
iaa
.
ElasticTransformation
(
alpha
=
(
0
,
5.0
),
sigma
=
0.25
),
iaa
.
FastSnowyLandscape
(
lightness_threshold
=
140
,
lightness_multiplier
=
2.5
),
iaa
.
Clouds
(),
iaa
.
Fog
(),
iaa
.
Rain
(
speed
=
(
0.1
,
0.3
))
]
images_loc
=
"/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/train/images"
labels_loc
=
"/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/train/labels"
target_images_loc
=
"/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/Augmented_dataset/train/images"
target_labels_loc
=
"/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/Augmented_dataset/train/labels"
images
=
os
.
listdir
(
images_loc
)
img_list
=
[]
label_list
=
[]
for
img
in
images
:
if
img
.
endswith
(
".jpg"
)
or
img
.
endswith
(
".png"
)
or
img
.
endswith
(
".jpeg"
):
img_list
.
append
(
os
.
path
.
join
(
images_loc
,
img
))
label_list
.
append
(
os
.
path
.
join
(
labels_loc
,
img
[:
-
4
]
+
".txt"
))
read_images
(
img_list
=
img_list
,
label_list
=
label_list
)
# print("annotation dict: ", annotation_dict)
#
# # images = np.zeros((2, 128, 128, 3), dtype=np.uint8) # two example images
# # images[:, 64, 64, :] = 255
# bbs = [
# [ia.BoundingBox(x1=10.5, y1=15.5, x2=30.5, y2=50.5)],
# [ia.BoundingBox(x1=10.5, y1=20.5, x2=50.5, y2=50.5),
# ia.BoundingBox(x1=40.5, y1=75.5, x2=70.5, y2=100.5)]
# ]
#
# seq = iaa.Sequential([
# iaa.AdditiveGaussianNoise(scale=0.05*255),
# iaa.Affine(translate_px={"x": (1, 5)})
# ])
#
# images_aug, bbs_aug = seq(images=images, bounding_boxes=bbs)
#
data.yaml
0 → 100644
View file @
0c59fda2
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
#path: ../datasets/coco128 # dataset root dir
train
:
images/train2017
# train images (relative to 'path') 128 images
val
:
images/train2017
# val images (relative to 'path') 128 images
#test: # test images (optional)
# Classes
names
:
0
:
r1
1
:
r2
2
:
lp
data/images/637c72d98d8d2e1916aa82eb_jpeg.rf.18179e89c20d432d9b023b21474dd16e.jpg
0 → 100644
View file @
0c59fda2
369 KB
data/images/637c72d98d8d2e1916aa82f3_jpeg.rf.6463afa9200cc37075ab9594ccfaadfa.jpg
0 → 100644
View file @
0c59fda2
318 KB
data/images/637c72da8d8d2e1916aa82fa_jpeg.rf.21bc48bf34791fde0878b9312f98b09b.jpg
0 → 100644
View file @
0c59fda2
519 KB
data/images/637c73aa8d8d2e1916aa862d_jpeg.rf.17abd2576af9529c313737084aa9927d.jpg
0 → 100644
View file @
0c59fda2
427 KB
data/images/637c73da8d8d2e1916aa864d_jpeg.rf.ff6a706dd9970fa353d8c7c02ed4d98a.jpg
0 → 100644
View file @
0c59fda2
365 KB
data/labels/637c72d98d8d2e1916aa82eb_jpeg.rf.18179e89c20d432d9b023b21474dd16e.txt
0 → 100644
View file @
0c59fda2
0 0.130729 0.474537 0.009375 0.015741
1 0.161719 0.466204 0.026562 0.026852
data/labels/637c72d98d8d2e1916aa82f3_jpeg.rf.6463afa9200cc37075ab9594ccfaadfa.txt
0 → 100644
View file @
0c59fda2
0 0.403385 0.604630 0.009896 0.020370
1 0.447135 0.582407 0.036979 0.046296
data/labels/637c72da8d8d2e1916aa82fa_jpeg.rf.21bc48bf34791fde0878b9312f98b09b.txt
0 → 100644
View file @
0c59fda2
0 0.590104 0.684722 0.013542 0.026852
1 0.636719 0.654630 0.038021 0.050000
data/labels/637c73aa8d8d2e1916aa862d_jpeg.rf.17abd2576af9529c313737084aa9927d.txt
0 → 100644
View file @
0c59fda2
0 0.172656 0.509259 0.010937 0.025926
1 0.216927 0.503704 0.035937 0.033333
data/labels/637c73da8d8d2e1916aa864d_jpeg.rf.ff6a706dd9970fa353d8c7c02ed4d98a.txt
0 → 100644
View file @
0c59fda2
0 0.082812 0.542130 0.010417 0.032407
1 0.127344 0.531944 0.040104 0.037963
remove_images_without_labels.py
0 → 100644
View file @
0c59fda2
import
os
img_path
=
"/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/images"
label_path
=
"/home/shikhin/Downloads/ADNOC/UAE_CAR_COMPLETE_DATASET/Annotated_Dataset/Combined_dataset/labels"
img_list
=
os
.
listdir
(
img_path
)
for
img
in
img_list
:
img_without_ext
=
img
[:
-
4
]
txt_file
=
img_without_ext
+
".txt"
txt_file_path
=
os
.
path
.
join
(
label_path
,
txt_file
)
if
os
.
path
.
isfile
(
txt_file_path
):
print
(
"yes"
)
else
:
print
(
"no"
)
os
.
remove
(
os
.
path
.
join
(
img_path
,
img
))
\ No newline at end of file
split_dataset.py
0 → 100644
View file @
0c59fda2
import
argparse
import
os
from
random
import
shuffle
import
pandas
as
pd
from
math
import
floor
import
shutil
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--datadir'
,
help
=
'Path to the all input data'
,
type
=
str
)
parser
.
add_argument
(
'--split'
,
help
=
'Split value - Test
%
'
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--train_output'
,
help
=
'Path to output train data'
,
type
=
str
)
parser
.
add_argument
(
'--test_output'
,
help
=
'Path to output test data'
,
type
=
str
)
parser
.
add_argument
(
'--image_ext'
,
help
=
'jpeg or jpg or png'
,
type
=
str
,
default
=
'jpeg'
)
FLAGS
=
parser
.
parse_args
()
def
check_dir
(
directory
):
if
not
os
.
path
.
exists
(
directory
):
os
.
makedirs
(
directory
)
print
(
'Creating directory -'
,
directory
)
else
:
print
(
'Directory exists -'
,
directory
)
def
get_file_list_from_dir
(
datadir
):
all_files
=
os
.
listdir
(
os
.
path
.
abspath
(
datadir
))
data_files
=
list
(
filter
(
lambda
file
:
file
.
endswith
(
'.'
+
FLAGS
.
image_ext
),
all_files
))
# print(data_files)
shuffled_files
=
randomize_files
(
data_files
)
all_cervix_images
=
pd
.
DataFrame
({
'imagepath'
:
shuffled_files
})
# print(lambda row: row.imagepath.split(".")[0])
all_cervix_images
[
'filename'
]
=
all_cervix_images
.
apply
(
lambda
row
:
row
.
imagepath
[:
-
4
],
axis
=
1
)
return
all_cervix_images
def
randomize_files
(
file_list
):
shuffle
(
file_list
)
return
file_list
def
get_training_and_testing_sets
(
file_list
,
split
):
split_index
=
floor
(
file_list
.
shape
[
0
]
*
split
)
testing
=
file_list
[:
split_index
]
training
=
file_list
[
split_index
:]
training
=
training
.
reset_index
(
drop
=
True
)
return
training
,
testing
def
write_data
(
training
,
testing
,
datadir
,
train_output
,
test_output
):
# Train Data
print
(
'Writing -'
,
training
.
shape
[
0
],
'- Train data images at -'
,
train_output
)
for
name
in
training
[
'filename'
]:
try
:
# Moving xmls
rd_path
=
os
.
path
.
join
(
datadir
,
name
+
'.txt'
)
wr_path
=
os
.
path
.
join
(
train_output
,
name
+
'.txt'
)
shutil
.
move
(
rd_path
,
wr_path
)
# Moving images
rd_path
=
os
.
path
.
join
(
datadir
,
name
+
'.'
+
FLAGS
.
image_ext
)
wr_path
=
os
.
path
.
join
(
train_output
,
name
+
'.'
+
FLAGS
.
image_ext
)
shutil
.
move
(
rd_path
,
wr_path
)
except
:
print
(
'Could not find {}'
.
format
(
name
+
'.txt'
))
# Test Data
print
(
'Writing -'
,
testing
.
shape
[
0
],
'- Test data images at -'
,
test_output
)
for
name
in
testing
[
'filename'
]:
try
:
# Moving xmls
rd_path
=
os
.
path
.
join
(
datadir
,
name
+
'.txt'
)
wr_path
=
os
.
path
.
join
(
test_output
,
name
+
'.txt'
)
shutil
.
move
(
rd_path
,
wr_path
)
# Moving images
rd_path
=
os
.
path
.
join
(
datadir
,
name
+
'.'
+
FLAGS
.
image_ext
)
wr_path
=
os
.
path
.
join
(
test_output
,
name
+
'.'
+
FLAGS
.
image_ext
)
shutil
.
move
(
rd_path
,
wr_path
)
except
:
print
(
'Could not find {}'
.
format
(
name
+
'.txt'
))
def
main
():
check_dir
(
FLAGS
.
train_output
)
check_dir
(
FLAGS
.
test_output
)
file_list
=
get_file_list_from_dir
(
FLAGS
.
datadir
)
print
(
'Read -'
,
file_list
.
shape
[
0
],
'- files from the directory -'
,
FLAGS
.
datadir
)
training
,
testing
=
get_training_and_testing_sets
(
file_list
,
FLAGS
.
split
)
write_data
(
training
,
testing
,
FLAGS
.
datadir
,
FLAGS
.
train_output
,
FLAGS
.
test_output
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
yolov8_object_detection_custom_training.ipynb
0 → 100644
View file @
0c59fda2
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment