Commit d4d8c1f6 authored by Riya Srivastava's avatar Riya Srivastava

pushing preprocess codes, Inference codes along with weight files

parent 8a6d74a1
# riya_daily_tasks
Link to yolov8x_objectdetection_best.pt to identify the ROI of seven segment display:- https://drive.google.com/file/d/1Qu-2udd1y1WDwGZMHJsdCMeKBknzqtuL/view?usp=drive_link
Link to yolov8n_objectdetection_best.pt to identify the ROI of seven segment display:- https://drive.google.com/file/d/1iCijEpwqgY1btPAoT7ctPeD32M_Ri434/view?usp=drive_link
\ No newline at end of file
import os
path = r"D:\Kalypso-robotics\seven_segment\model_v2"
img_list = os.listdir(os.path.join(path, "crop_img"))
# for img in img_list:
# print(img)
new_img_list = []
with open(os.path.join(path, "valid_list.txt"), "r") as f:
lines = f.readlines()
for line in lines:
# print(line)
# print(line)
abc = line.split("\t")
# print(abc)
# text = abc[1]
image_name = abc[0]
# print(image_name)
if image_name in img_list:
print("True")
new_img_list.append(line)
else:
print("False")
# print(text[:-1])
# x = image_name.split("crop_img/")
# # print(x)
# image_name = x[1]
# print(image_name)
with open(os.path.join(path, "new_valid_list.txt"), "a") as f:
for k in new_img_list:
print("abc")
f.write(k)
# #
# for line in lines:
# print(line)
# with open(os.path.join(path , "valid_list.txt") ) as f:
# lines = f.readlines()
#
# for line in lines:
# print(line)
import os
# Directory containing the text files to merge
input_directory = r""
# Output file where the merged content will be saved
output_file = r"D:\Kalypso-robotics\seven_segment\All_txt_files_v3_v5\merged_texts.txt"
# if not os.path.exists(output_file):
# os.makedirs(output_file)
# Open the output file in append mode
with open(output_file, 'a') as merged_file:
# Loop through all files in the directory
for filename in os.listdir(input_directory):
# Check if the file is a text file (ends with .txt)
if filename.endswith(".txt"):
# Open and read the content of the text file
with open(os.path.join(input_directory, filename), 'r') as txt_file:
file_content = txt_file.read()
# Write the content to the merged file
merged_file.write(file_content)
# Add a newline to separate the contents of different files
merged_file.write("\n")
print(f"Merged {len(os.listdir(input_directory))} files into {output_file}")
import pyautogui as pg
import time
while True:
time.sleep(10)
pg.click(500, 500)
time.sleep(10)
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from PIL import Image
from paddleocr import PaddleOCR, draw_ocr
folder_path = r"C:\Users\vinod.baste\Downloads\crops (1)\content\runs\detect\exp\crops\text"
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(lang='en',
use_angle_cls=True,
use_gpu=False,
det_db_thresh=0.2,
rec_thresh=0.2,
det_db_box_thresh=0.2,
use_space_char=True,
rec_char_info_thresh=0.2,
max_text_length=45,
rec_char_dict_path=r'C:\Users\vinod.baste\Desktop\TextDetectionOCR\en_dict.txt',
det_db_unclip_ratio=2.0,
use_dynamic_shape=False,
det_visualize=False,
drop_score=0.2,
det_db_score_mode="fast",
det_db_polynms_overlap_thresh=0.1,
debug=False,
det_rotate_crop=False,
det_max_crop_size=1280,
det_polygon=False,
det_split_mode=True,
rec_model_dir=r'C:\Users\vinod.baste\Desktop\TextDetectionOCR\en_PP-OCRv3_rec',
rec_image_shape="3, 32, 320",
rec_char_type="en",
rec_batch_num=12,
# det_model_dir=r"C:\Users\vinod.baste\Downloads\trained_model_output_det\content\PaddleOCR\output"
# r"\det_db_inference",
det_limit_side_len=960,
rec_image_dir=None,
rec_batch_num1=18,
rec_char_dict_path1=None,
drop_score1=0.4,
use_dilation=True,
use_polygon_score=True,
rec_algorithm='SVTR_LCNet',
det_algorithm='DB'
) # need to run only once to download and load model into memory
# Iterate over each file in the folder
for filename in os.listdir(folder_path):
# Check if the file is an image (you can add more file extensions if needed)
if filename.lower().endswith(".jpg") or filename.endswith(".png") or filename.endswith(".bmp"):
# Read the image
image_path = os.path.join(folder_path, filename)
img = Image.open(image_path).convert('RGB')
# Perform OCR on the image
result = ocr.ocr(image_path, cls=True, rec=True, det=False)
# Print the OCR result
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
# Draw and save the OCR result
result = result[0] # Assuming you only want to draw the first result
# boxes = [line[0] for line in result]
boxes = []
# txts = [line[1][0] for line in result]
txts = [line[0] for line in result]
# scores = [line[1][1] for line in result]
scores = [line[1] for line in result]
im_show = draw_ocr(img, boxes, txts=txts, scores=scores, font_path='RobotoLight.ttf')
im_show = Image.fromarray(im_show)
new_folder = r"C:\Users\vinod.baste\Desktop\New folder (3)\crop_out - Copy"
save_path = os.path.join(new_folder, "result_" + filename)
im_show.save(save_path)
\ No newline at end of file
from paddleocr import PaddleOCR, draw_ocr
import numpy as np
from PIL import Image
import cv2
import os
# path='/home/shikhin/PycharmProjects/kalypso_robotic_project/paddle_ocr_seven_segment_display/random_ssd_images_50'
path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\All_crop_images_merged - Copy"
img_list = os.listdir(path)
for img in img_list:
img_join = os.path.join(path, img)
# print(img_join)
image = cv2.imread(img_join)
# cv2.imshow("Image", image)
# # convert the image to grayscale and blur it slightly
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (7, 7), 0)
# apply simple thresholding with a hardcoded threshold value
# (T, threshInv) = cv2.threshold(blurred, 230, 255,
# cv2.THRESH_BINARY_INV)
# cv2.imshow("Simple Thresholding", threshInv)
# cv2.waitKey(0)
# apply Otsu's automatic thresholding
(T, threshInv) = cv2.threshold(blurred, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
cv2.imshow("Otsu Thresholding", threshInv)
# # define the kernel
# kernel = np.ones((5, 5), np.uint8)
#
# # invert the image
# invert = cv2.bitwise_not(threshInv)
#
# # erode the image
# erosion = cv2.erode(invert, kernel,
# iterations=1)
# define the kernel
kernel = np.ones((3, 3), np.uint8)
# invert the image
invert = cv2.bitwise_not(threshInv)
# dilate the image
dilation = cv2.dilate(invert, kernel, iterations=1)
# print the output
# plt.imshow(erosion, cmap='gray')
# define the kernel
kernel = np.ones((3, 3), np.uint8)
# opening the image
closing = cv2.morphologyEx(dilation, cv2.MORPH_CLOSE, kernel, iterations=1)
cv2.imshow("closed", closing)
dest_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\all_dilated_images"
image_join = os.path.join(dest_path, img)
print(image_join)
cv2.imwrite(image_join, closing)
cv2.waitKey(1)
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan`
# to switch the language model in order.
# ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
# img_path = img_join
# # img_path = '/home/shikhin/PycharmProjects/kalypso_robotic_project/paddle_ocr_seven_segment_display/abc.png'
# result = ocr.ocr(closing, cls=True)
# for idx in range(len(result)):
# res = result[idx]
# for line in res:
# print(line)
#
# # draw result
#
# result = result[0]
# image = Image.open(img_path).convert('RGB')
# boxes = [line[0] for line in result]
# txts = [line[1][0] for line in result]
# scores = [line[1][1] for line in result]
# im_show = draw_ocr(image, boxes, txts, scores,
# font_path=r'C:\Users\riya.srivastava\PycharmProjects\paddleocr\PaddleOCR\doc\fonts\simfang.ttf')
# im_show = Image.fromarray(im_show)
# im_show.save('result.jpg')
#
# # Reading an image in default mode
#
# # img_path = '/home/shikhin/PycharmProjects/kalypso_robotic_project/paddle_ocr_seven_segment_display/abc.png'
# # image = cv2.imread(img_path)
# # window_name = 'Image'
#
# open_cv_image = np.array(im_show)
# # Convert RGB to BGR
# open_cv_image = open_cv_image[:, :, ::-1].copy()
# cv2.imshow('Image', open_cv_image)
#
# cv2.waitKey(1)
# closing all open windows
cv2.destroyAllWindows()
import cv2
import os
import numpy as np
import uuid
def rotation(image, angle=10):
rows, cols, _ = image.shape
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 0.5)
img_rotation = cv2.warpAffine(image, M, (cols, rows))
return img_rotation
def shearing(image):
rows, cols, _ = image.shape
M = np.float32([[1, 0.5, 0], [0, 1, 0], [0, 0, 1]])
img_shearing = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))
return img_shearing
def noise(mean=0, stddev=180):
# img.noise("laplacian", attenuate=1.0)
# mean = 0
# stddev = 180
noise = np.zeros(img.shape, np.uint8)
cv2.randn(noise, mean, stddev)
# Add noise to image
img_noisy = cv2.add(img, noise)
return img_noisy
input_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\all_dilated_images"
output_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\dilated_augmented"
if not os.path.exists(output_path):
os.makedirs(output_path)
img_list = os.listdir(input_path)
with open(r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\All_txt_files_v3_v5\merged_texts.txt", "r") as f:
lines = f.readlines()
for line in lines:
# print(line)
abc = line.split("\t")
# print(abc)
text = abc[1]
image_name = abc[0]
print(image_name)
# print(text[:-1])
x = image_name.split("crop_img/")
# print(x)
image_name = x[1]
print(image_name)
# txt_path = r"D:\sikhin_ssd_datasets\dataset_ssd_dilated_images - Copy\augmented_rec_gt.txt"
# if not os.path.exists(txt_path):
# os.makedirs(txt_path)
#
# for image_name in img_list:
image_path = os.path.join(input_path, image_name)
# Load the image
img = cv2.imread(image_path)
if img is not None:
# Perform rotation on the image
rotated_img = rotation(img, angle=10)
cv2.imshow("image_rotation", rotated_img)
new_img_name_rotation = str(uuid.uuid4()) + "_" + str(text[:-1]) + "_rotation_.jpg"
label_text_rotation = "crop_img/" + new_img_name_rotation + "\t" + text
# Save the rotated image to the output directory
output_image_path = os.path.join(output_path, new_img_name_rotation)
cv2.imwrite(output_image_path, rotated_img)
print(f"Rotated and saved: {output_image_path}")
else:
print(f"Failed to load image: {image_path}")
if img is not None:
sheared_img = shearing(img)
cv2.imshow("image_shearing", sheared_img)
new_img_name_shearing = str(uuid.uuid4()) + "_" + str(text[:-1]) + "_shearing_.jpg"
cv2.waitKey(1)
output_image_path = os.path.join(output_path, new_img_name_shearing)
label_text_shearing = "crop_img/" + new_img_name_shearing + "\t" + text
cv2.imwrite(output_image_path, sheared_img)
print(f"Rotated and saved: {output_image_path}")
else:
print(f"Failed to load image: {image_path}")
if img is not None:
noisy_img = noise(mean=0, stddev=180)
cv2.imshow("image_noise", noisy_img)
new_img_name_noisy = str(uuid.uuid4()) + "_" + str(text[:-1]) + "_noisy_.jpg"
cv2.waitKey(1)
output_image_path = os.path.join(output_path, new_img_name_noisy)
label_text_noisy = "crop_img/" + new_img_name_noisy + "\t" + text
cv2.imwrite(output_image_path, noisy_img)
print(f"Rotated and saved: {output_image_path}")
else:
print(f"Failed to load image: {image_path}")
with open("merged_augmented_rec_gt.txt", "a") as q:
q.write(label_text_rotation)
q.write(label_text_shearing)
q.write(label_text_noisy)
# q.write("")
q.close()
from ultralytics import YOLO
from PIL import Image
import cv2
import os
from PIL import Image
from paddleocr import PaddleOCR, draw_ocr
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
#Load yolo model
model = YOLO(r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\yolo8n_object_detection.pt")
pth = r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\meter.jpg"
#load paddleocr model
folder_path = r"D:\sikhin_ssd_datasets\all_images_txt_files\all_images\paddleocr_datasets_600_1900 - Copy\crop_img"
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(lang='en',
use_angle_cls=True,
use_gpu=False,
det_db_thresh=0.2,
rec_thresh=0.2,
det_db_box_thresh=0.2,
use_space_char=True,
rec_char_info_thresh=0.2,
max_text_length=45,
det_db_unclip_ratio=2.0,
use_dynamic_shape=False,
det_visualize=False,
drop_score=0.2,
det_db_score_mode="fast",
det_db_polynms_overlap_thresh=0.1,
debug=False,
det_rotate_crop=False,
det_max_crop_size=1280,
det_polygon=False,
det_split_mode=True,
rec_model_dir=r'C:\Users\riya.srivastava\Downloads\trained_model_output\content\PaddleOCR\output\inference\en_PP-OCRv3_rec',
rec_image_shape="3, 32, 320",
rec_char_type="en",
rec_batch_num=12,
det_model_dir=None,
det_limit_side_len=960,
rec_image_dir=None,
rec_batch_num1=18,
rec_char_dict_path=r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\PaddleOCR\ppocr\utils\en_dict_ssd.txt",
drop_score1=0.4,
use_dilation=True,
use_polygon_score=True,
rec_algorithm='SVTR_LCNet',
det_algorithm='DB'
) # need to run only once to download and load model into memory
#read image
image = cv2.imread(pth)
results = model.predict(image)
result = results[0]
# print(result)
bbox_tensor = result.boxes.xyxy
cls_tensor = result.boxes.cls
conf_tensor = result.boxes.conf
for bbox, cls, conf in zip(bbox_tensor, cls_tensor, conf_tensor):
# print(bbox)
# print(cls)
# print(conf)
x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
class_index = int(cls)
if conf>0.7:
confidence = float(conf)
print(confidence)
y=y1
h = abs(y2 - y1)
w = abs(x2 - x1)
x=x1
crop_img = image[y:y + h, x:x + w]
gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (7, 7), 0)
# apply simple thresholding with a hardcoded threshold value
(T, threshInv) = cv2.threshold(blurred, 230, 255,
cv2.THRESH_BINARY_INV)
cv2.imshow("Simple Thresholding", threshInv)
cv2.waitKey(0)
# apply Otsu's automatic thresholding
(T, threshInv) = cv2.threshold(blurred, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
cv2.imshow("Otsu Thresholding", threshInv)
# cv2.imwrite("Otsu.png", threshInv)
cv2.waitKey(0)
result = ocr.ocr(threshInv, cls=True, rec=True, det=False)
# Print the OCR result
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
# Draw and save the OCR result
result = result[0] # Assuming you only want to draw the first result
# boxes = [line[0] for line in result]
boxes = []
# txts = [line[1][0] for line in result]
txts = [line[0] for line in result]
print(txts)
import cv2
import os
import random
input_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\All_txt_files_v3_v5"
with open(os.path.join(input_path, 'merged_texts - Copy.txt'), 'r') as f:
lines = f.readlines()
random.shuffle(lines)
# print(len(lines))
list_length = len(lines)
train_ratio = int(0.7 * list_length)
# print(train_ratio)
valid_ratio = int(0.2 * list_length)
test_ratio = int(0.1 * list_length)
train_list = lines[:train_ratio]
valid_list = lines[train_ratio: train_ratio + valid_ratio]
test_list = lines[train_ratio + valid_ratio:]
print(len(train_list))
print(len(valid_list))
print(len(test_list))
print(train_list)
print(valid_list)
print(test_list)
with open("train_list.txt", 'a') as f:
for line in train_list:
f.write(line)
f.close()
with open("valid_list.txt", 'a') as f:
for line in valid_list:
f.write(line)
f.close()
with open("test_list.txt", 'a') as f:
for line in test_list:
f.write(line)
f.close()
from ultralytics import YOLO
from PIL import Image
import cv2
model = YOLO(r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\yolo8n_object_detection.pt")
pth = r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\meter.jpg"
image = cv2.imread(pth)
results = model.predict(image)
# Print image.jpg results in JSON format
result = results[0]
# print(result)
bbox_tensor = result.boxes.xyxy
cls_tensor = result.boxes.cls
conf_tensor = result.boxes.conf
# print(type(result))
# print(bbox_tensor)
for bbox, cls, conf in zip(bbox_tensor, cls_tensor, conf_tensor):
# print(bbox)
# print(cls)
# print(conf)
x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
class_index = int(cls)
confidence = float(conf)
# print(x1, y1, x2, y2)
# print(class_index)
# print(confidence)
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
image_resize = cv2.resize(image, (900, 600))
cv2.imshow('output', image_resize)
cv2.waitKey(0)
# for bbox in bbox_tensor:
# x = torch.tensor([[414.8924, 238.3147, 1831.7047, 793.2424]])
# print(x)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment