pushing preprocess codes, Inference codes along with weight files

d4d8c1f6 · Riya Srivastava · 8a6d74a1 · d4d8c1f6 · d4d8c1f6 · d4d8c1f6
Commit d4d8c1f6 authored Oct 16, 2023 by Riya Srivastava
10 changed files
--- a/README.md
+++ b/README.md
-# riya_daily_tasks
-
+Link to yolov8x_objectdetection_best.pt to identify the ROI of seven segment display:- https://drive.google.com/file/d/1Qu-2udd1y1WDwGZMHJsdCMeKBknzqtuL/view?usp=drive_link
+Link to yolov8n_objectdetection_best.pt to identify the ROI of seven segment display:- https://drive.google.com/file/d/1iCijEpwqgY1btPAoT7ctPeD32M_Ri434/view?usp=drive_link
\ No newline at end of file
--- a/image_getting.py
+++ b/image_getting.py
+import os
+
+path = r"D:\Kalypso-robotics\seven_segment\model_v2"
+
+img_list = os.listdir(os.path.join(path, "crop_img"))
+# for img in img_list:
+#     print(img)
+new_img_list = []
+with open(os.path.join(path, "valid_list.txt"), "r") as f:
+    lines = f.readlines()
+
+for line in lines:
+    # print(line)
+    # print(line)
+    abc = line.split("\t")
+    # print(abc)
+    # text = abc[1]
+    image_name = abc[0]
+
+    # print(image_name)
+
+    if image_name in img_list:
+        print("True")
+        new_img_list.append(line)
+    else:
+        print("False")
+
+    # print(text[:-1])
+    # x = image_name.split("crop_img/")
+    # # print(x)
+    # image_name = x[1]
+    # print(image_name)
+
+with open(os.path.join(path, "new_valid_list.txt"), "a") as f:
+    for k in new_img_list:
+        print("abc")
+        f.write(k)
+# #
+# for line in lines:
+#     print(line)
+
+# with open(os.path.join(path , "valid_list.txt") ) as f:
+#     lines = f.readlines()
+#
+# for line in lines:
+#     print(line)
--- a/merged_txt_files.py
+++ b/merged_txt_files.py
+import os
+
+# Directory containing the text files to merge
+input_directory = r""
+
+# Output file where the merged content will be saved
+output_file = r"D:\Kalypso-robotics\seven_segment\All_txt_files_v3_v5\merged_texts.txt"
+
+# if not os.path.exists(output_file):
+#     os.makedirs(output_file)
+
+# Open the output file in append mode
+with open(output_file, 'a') as merged_file:
+    # Loop through all files in the directory
+    for filename in os.listdir(input_directory):
+        # Check if the file is a text file (ends with .txt)
+        if filename.endswith(".txt"):
+            # Open and read the content of the text file
+            with open(os.path.join(input_directory, filename), 'r') as txt_file:
+                file_content = txt_file.read()
+
+                # Write the content to the merged file
+                merged_file.write(file_content)
+                # Add a newline to separate the contents of different files
+                merged_file.write("\n")
+
+print(f"Merged {len(os.listdir(input_directory))} files into {output_file}")
--- a/mouseclick_test1.py
+++ b/mouseclick_test1.py
+import pyautogui as pg
+
+import time
+
+while True:
+
+    time.sleep(10)
+    pg.click(500, 500)
+    time.sleep(10)
--- a/paddle_ocr_inference.py
+++ b/paddle_ocr_inference.py
+import os
+
+os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+from PIL import Image
+
+from paddleocr import PaddleOCR, draw_ocr
+
+folder_path = r"C:\Users\vinod.baste\Downloads\crops (1)\content\runs\detect\exp\crops\text"
+
+# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
+# You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan`
+# to switch the language model in order.
+ocr = PaddleOCR(lang='en',
+                use_angle_cls=True,
+                use_gpu=False,
+                det_db_thresh=0.2,
+                rec_thresh=0.2,
+                det_db_box_thresh=0.2,
+                use_space_char=True,
+                rec_char_info_thresh=0.2,
+                max_text_length=45,
+                rec_char_dict_path=r'C:\Users\vinod.baste\Desktop\TextDetectionOCR\en_dict.txt',
+                det_db_unclip_ratio=2.0,
+                use_dynamic_shape=False,
+                det_visualize=False,
+                drop_score=0.2,
+                det_db_score_mode="fast",
+                det_db_polynms_overlap_thresh=0.1,
+                debug=False,
+                det_rotate_crop=False,
+                det_max_crop_size=1280,
+                det_polygon=False,
+                det_split_mode=True,
+                rec_model_dir=r'C:\Users\vinod.baste\Desktop\TextDetectionOCR\en_PP-OCRv3_rec',
+                rec_image_shape="3, 32, 320",
+                rec_char_type="en",
+                rec_batch_num=12,
+                # det_model_dir=r"C:\Users\vinod.baste\Downloads\trained_model_output_det\content\PaddleOCR\output"
+                #               r"\det_db_inference",
+                det_limit_side_len=960,
+                rec_image_dir=None,
+                rec_batch_num1=18,
+                rec_char_dict_path1=None,
+                drop_score1=0.4,
+                use_dilation=True,
+                use_polygon_score=True,
+                rec_algorithm='SVTR_LCNet',
+                det_algorithm='DB'
+                )  # need to run only once to download and load model into memory
+
+# Iterate over each file in the folder
+for filename in os.listdir(folder_path):
+    # Check if the file is an image (you can add more file extensions if needed)
+    if filename.lower().endswith(".jpg") or filename.endswith(".png") or filename.endswith(".bmp"):
+        # Read the image
+        image_path = os.path.join(folder_path, filename)
+        img = Image.open(image_path).convert('RGB')
+
+        # Perform OCR on the image
+        result = ocr.ocr(image_path, cls=True, rec=True, det=False)
+
+        # Print the OCR result
+        for idx in range(len(result)):
+            res = result[idx]
+            for line in res:
+                print(line)
+
+        # Draw and save the OCR result
+        result = result[0]  # Assuming you only want to draw the first result
+        # boxes = [line[0] for line in result]
+        boxes = []
+        # txts = [line[1][0] for line in result]
+        txts = [line[0] for line in result]
+        # scores = [line[1][1] for line in result]
+        scores = [line[1] for line in result]
+        im_show = draw_ocr(img, boxes, txts=txts, scores=scores, font_path='RobotoLight.ttf')
+        im_show = Image.fromarray(im_show)
+        new_folder = r"C:\Users\vinod.baste\Desktop\New folder (3)\crop_out - Copy"
+        save_path = os.path.join(new_folder, "result_" + filename)
+        im_show.save(save_path)
\ No newline at end of file
--- a/paddleocr_inf_on_directory_of_images.py
+++ b/paddleocr_inf_on_directory_of_images.py
+from paddleocr import PaddleOCR, draw_ocr
+import numpy as np
+from PIL import Image
+import cv2
+import os
+
+# path='/home/shikhin/PycharmProjects/kalypso_robotic_project/paddle_ocr_seven_segment_display/random_ssd_images_50'
+path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\All_crop_images_merged - Copy"
+img_list = os.listdir(path)
+
+for img in img_list:
+    img_join = os.path.join(path, img)
+    # print(img_join)
+    image = cv2.imread(img_join)
+    # cv2.imshow("Image", image)
+    # # convert the image to grayscale and blur it slightly
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (7, 7), 0)
+
+    # apply simple thresholding with a hardcoded threshold value
+    # (T, threshInv) = cv2.threshold(blurred, 230, 255,
+    #                                cv2.THRESH_BINARY_INV)
+    # cv2.imshow("Simple Thresholding", threshInv)
+    # cv2.waitKey(0)
+
+    # apply Otsu's automatic thresholding
+    (T, threshInv) = cv2.threshold(blurred, 0, 255,
+                                   cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
+    cv2.imshow("Otsu Thresholding", threshInv)
+
+    # # define the kernel
+    # kernel = np.ones((5, 5), np.uint8)
+    #
+    # # invert the image
+    # invert = cv2.bitwise_not(threshInv)
+    #
+    # # erode the image
+    # erosion = cv2.erode(invert, kernel,
+    #                     iterations=1)
+    # define the kernel
+    kernel = np.ones((3, 3), np.uint8)
+
+    # invert the image
+    invert = cv2.bitwise_not(threshInv)
+
+    # dilate the image
+    dilation = cv2.dilate(invert, kernel, iterations=1)
+    # print the output
+    # plt.imshow(erosion, cmap='gray')
+
+    # define the kernel
+    kernel = np.ones((3, 3), np.uint8)
+
+    # opening the image
+    closing = cv2.morphologyEx(dilation, cv2.MORPH_CLOSE, kernel, iterations=1)
+
+    cv2.imshow("closed", closing)
+    dest_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\all_dilated_images"
+    image_join = os.path.join(dest_path, img)
+    print(image_join)
+    cv2.imwrite(image_join, closing)
+    cv2.waitKey(1)
+    # Paddleocr supports Chinese, English, French, German, Korean and Japanese.
+    # You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan`
+    # to switch the language model in order.
+    # ocr = PaddleOCR(use_angle_cls=True, lang='en')  # need to run only once to download and load model into memory
+    # img_path = img_join
+    # # img_path = '/home/shikhin/PycharmProjects/kalypso_robotic_project/paddle_ocr_seven_segment_display/abc.png'
+    # result = ocr.ocr(closing, cls=True)
+    # for idx in range(len(result)):
+    #     res = result[idx]
+    #     for line in res:
+    #         print(line)
+    #
+    # # draw result
+    #
+    # result = result[0]
+    # image = Image.open(img_path).convert('RGB')
+    # boxes = [line[0] for line in result]
+    # txts = [line[1][0] for line in result]
+    # scores = [line[1][1] for line in result]
+    # im_show = draw_ocr(image, boxes, txts, scores,
+    #                    font_path=r'C:\Users\riya.srivastava\PycharmProjects\paddleocr\PaddleOCR\doc\fonts\simfang.ttf')
+    # im_show = Image.fromarray(im_show)
+    # im_show.save('result.jpg')
+    #
+    # # Reading an image in default mode
+    #
+    # # img_path = '/home/shikhin/PycharmProjects/kalypso_robotic_project/paddle_ocr_seven_segment_display/abc.png'
+    # # image = cv2.imread(img_path)
+    # # window_name = 'Image'
+    #
+    # open_cv_image = np.array(im_show)
+    # # Convert RGB to BGR
+    # open_cv_image = open_cv_image[:, :, ::-1].copy()
+    # cv2.imshow('Image', open_cv_image)
+    #
+    # cv2.waitKey(1)
+
+# closing all open windows
+cv2.destroyAllWindows()
--- a/rotation_augmented.py
+++ b/rotation_augmented.py
+import cv2
+import os
+import numpy as np
+import uuid
+
+
+def rotation(image, angle=10):
+    rows, cols, _ = image.shape
+    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 0.5)
+    img_rotation = cv2.warpAffine(image, M, (cols, rows))
+    return img_rotation
+
+
+def shearing(image):
+    rows, cols, _ = image.shape
+    M = np.float32([[1, 0.5, 0], [0, 1, 0], [0, 0, 1]])
+    img_shearing = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))
+    return img_shearing
+
+
+def noise(mean=0, stddev=180):
+    # img.noise("laplacian", attenuate=1.0)
+    # mean = 0
+    # stddev = 180
+    noise = np.zeros(img.shape, np.uint8)
+    cv2.randn(noise, mean, stddev)
+
+    # Add noise to image
+    img_noisy = cv2.add(img, noise)
+    return img_noisy
+
+
+input_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\all_dilated_images"
+output_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\dilated_augmented"
+
+if not os.path.exists(output_path):
+    os.makedirs(output_path)
+
+img_list = os.listdir(input_path)
+with open(r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\All_txt_files_v3_v5\merged_texts.txt", "r") as f:
+    lines = f.readlines()
+
+for line in lines:
+    # print(line)
+    abc = line.split("\t")
+    # print(abc)
+    text = abc[1]
+    image_name = abc[0]
+
+    print(image_name)
+    # print(text[:-1])
+    x = image_name.split("crop_img/")
+    # print(x)
+    image_name = x[1]
+    print(image_name)
+
+    # txt_path = r"D:\sikhin_ssd_datasets\dataset_ssd_dilated_images - Copy\augmented_rec_gt.txt"
+    # if not os.path.exists(txt_path):
+    #     os.makedirs(txt_path)
+
+    #
+
+    # for image_name in img_list:
+    image_path = os.path.join(input_path, image_name)
+
+    # Load the image
+    img = cv2.imread(image_path)
+
+    if img is not None:
+        # Perform rotation on the image
+        rotated_img = rotation(img, angle=10)
+        cv2.imshow("image_rotation", rotated_img)
+        new_img_name_rotation = str(uuid.uuid4()) + "_" + str(text[:-1]) + "_rotation_.jpg"
+        label_text_rotation = "crop_img/" + new_img_name_rotation + "\t" + text
+        # Save the rotated image to the output directory
+        output_image_path = os.path.join(output_path, new_img_name_rotation)
+
+        cv2.imwrite(output_image_path, rotated_img)
+        print(f"Rotated and saved: {output_image_path}")
+    else:
+        print(f"Failed to load image: {image_path}")
+
+    if img is not None:
+        sheared_img = shearing(img)
+        cv2.imshow("image_shearing", sheared_img)
+        new_img_name_shearing = str(uuid.uuid4()) + "_" + str(text[:-1]) + "_shearing_.jpg"
+        cv2.waitKey(1)
+        output_image_path = os.path.join(output_path, new_img_name_shearing)
+        label_text_shearing = "crop_img/" + new_img_name_shearing + "\t" + text
+        cv2.imwrite(output_image_path, sheared_img)
+        print(f"Rotated and saved: {output_image_path}")
+    else:
+        print(f"Failed to load image: {image_path}")
+
+    if img is not None:
+        noisy_img = noise(mean=0, stddev=180)
+        cv2.imshow("image_noise", noisy_img)
+        new_img_name_noisy = str(uuid.uuid4()) + "_" + str(text[:-1]) + "_noisy_.jpg"
+        cv2.waitKey(1)
+        output_image_path = os.path.join(output_path, new_img_name_noisy)
+        label_text_noisy = "crop_img/" + new_img_name_noisy + "\t" + text
+        cv2.imwrite(output_image_path, noisy_img)
+        print(f"Rotated and saved: {output_image_path}")
+    else:
+        print(f"Failed to load image: {image_path}")
+
+    with open("merged_augmented_rec_gt.txt", "a") as q:
+        q.write(label_text_rotation)
+        q.write(label_text_shearing)
+        q.write(label_text_noisy)
+
+        # q.write("")
+    q.close()
--- a/seven_segment_inference_PIPELINE.py
+++ b/seven_segment_inference_PIPELINE.py
+from ultralytics import YOLO
+from PIL import Image
+import cv2
+import os
+from PIL import Image
+
+from paddleocr import PaddleOCR, draw_ocr
+
+os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+#Load yolo model
+
+model = YOLO(r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\yolo8n_object_detection.pt")
+pth = r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\meter.jpg"
+
+#load paddleocr model
+folder_path = r"D:\sikhin_ssd_datasets\all_images_txt_files\all_images\paddleocr_datasets_600_1900 - Copy\crop_img"
+
+# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
+# You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan`
+# to switch the language model in order.
+ocr = PaddleOCR(lang='en',
+                use_angle_cls=True,
+                use_gpu=False,
+                det_db_thresh=0.2,
+                rec_thresh=0.2,
+                det_db_box_thresh=0.2,
+                use_space_char=True,
+                rec_char_info_thresh=0.2,
+                max_text_length=45,
+                det_db_unclip_ratio=2.0,
+                use_dynamic_shape=False,
+                det_visualize=False,
+                drop_score=0.2,
+                det_db_score_mode="fast",
+                det_db_polynms_overlap_thresh=0.1,
+                debug=False,
+                det_rotate_crop=False,
+                det_max_crop_size=1280,
+                det_polygon=False,
+                det_split_mode=True,
+                rec_model_dir=r'C:\Users\riya.srivastava\Downloads\trained_model_output\content\PaddleOCR\output\inference\en_PP-OCRv3_rec',
+                rec_image_shape="3, 32, 320",
+                rec_char_type="en",
+                rec_batch_num=12,
+                det_model_dir=None,
+                det_limit_side_len=960,
+                rec_image_dir=None,
+                rec_batch_num1=18,
+                rec_char_dict_path=r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\PaddleOCR\ppocr\utils\en_dict_ssd.txt",
+                drop_score1=0.4,
+                use_dilation=True,
+                use_polygon_score=True,
+                rec_algorithm='SVTR_LCNet',
+                det_algorithm='DB'
+                )  # need to run only once to download and load model into memory
+
+
+#read image
+image = cv2.imread(pth)
+results = model.predict(image)
+
+result = results[0]
+
+# print(result)
+bbox_tensor = result.boxes.xyxy
+cls_tensor = result.boxes.cls
+conf_tensor = result.boxes.conf
+
+for bbox, cls, conf in zip(bbox_tensor, cls_tensor, conf_tensor):
+    # print(bbox)
+    # print(cls)
+    # print(conf)
+    x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
+    class_index = int(cls)
+    if conf>0.7:
+
+        confidence = float(conf)
+        print(confidence)
+        y=y1
+        h = abs(y2 - y1)
+        w = abs(x2 - x1)
+        x=x1
+        crop_img = image[y:y + h, x:x + w]
+
+        gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
+        blurred = cv2.GaussianBlur(gray, (7, 7), 0)
+
+        # apply simple thresholding with a hardcoded threshold value
+        (T, threshInv) = cv2.threshold(blurred, 230, 255,
+                                       cv2.THRESH_BINARY_INV)
+        cv2.imshow("Simple Thresholding", threshInv)
+        cv2.waitKey(0)
+
+        # apply Otsu's automatic thresholding
+        (T, threshInv) = cv2.threshold(blurred, 0, 255,
+                                       cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
+        cv2.imshow("Otsu Thresholding", threshInv)
+        # cv2.imwrite("Otsu.png", threshInv)
+        cv2.waitKey(0)
+
+        result = ocr.ocr(threshInv, cls=True, rec=True, det=False)
+
+        # Print the OCR result
+        for idx in range(len(result)):
+            res = result[idx]
+            for line in res:
+                print(line)
+
+        # Draw and save the OCR result
+        result = result[0]  # Assuming you only want to draw the first result
+        # boxes = [line[0] for line in result]
+        boxes = []
+        # txts = [line[1][0] for line in result]
+        txts = [line[0] for line in result]
+        print(txts)
--- a/spliting_random_texts.py
+++ b/spliting_random_texts.py
+import cv2
+import os
+import random
+
+input_path = r"D:\Kalypso-robotics\seven_segment\All_cropimg_texts_v3_v5\All_txt_files_v3_v5"
+
+with open(os.path.join(input_path, 'merged_texts - Copy.txt'), 'r') as f:
+    lines = f.readlines()
+
+random.shuffle(lines)
+# print(len(lines))
+list_length = len(lines)
+
+train_ratio = int(0.7 * list_length)
+# print(train_ratio)
+
+valid_ratio = int(0.2 * list_length)
+
+test_ratio = int(0.1 * list_length)
+
+train_list = lines[:train_ratio]
+valid_list = lines[train_ratio: train_ratio + valid_ratio]
+test_list = lines[train_ratio + valid_ratio:]
+
+print(len(train_list))
+print(len(valid_list))
+print(len(test_list))
+
+print(train_list)
+print(valid_list)
+print(test_list)
+
+with open("train_list.txt", 'a') as f:
+    for line in train_list:
+        f.write(line)
+
+f.close()
+
+with open("valid_list.txt", 'a') as f:
+    for line in valid_list:
+        f.write(line)
+
+f.close()
+
+with open("test_list.txt", 'a') as f:
+    for line in test_list:
+        f.write(line)
+
+f.close()
--- a/yolov8_inference.py
+++ b/yolov8_inference.py
+from ultralytics import YOLO
+from PIL import Image
+import cv2
+
+model = YOLO(r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\yolo8n_object_detection.pt")
+pth = r"C:\Users\riya.srivastava\PycharmProjects\paddleocr\meter.jpg"
+image = cv2.imread(pth)
+results = model.predict(image)
+# Print image.jpg results in JSON format
+result = results[0]
+
+# print(result)
+bbox_tensor = result.boxes.xyxy
+cls_tensor = result.boxes.cls
+conf_tensor = result.boxes.conf
+# print(type(result))
+
+
+# print(bbox_tensor)
+for bbox, cls, conf in zip(bbox_tensor, cls_tensor, conf_tensor):
+    # print(bbox)
+    # print(cls)
+    # print(conf)
+    x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
+    class_index = int(cls)
+    confidence = float(conf)
+    # print(x1, y1, x2, y2)
+    # print(class_index)
+    # print(confidence)
+    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+    image_resize = cv2.resize(image, (900, 600))
+    cv2.imshow('output', image_resize)
+    cv2.waitKey(0)
+# for bbox in bbox_tensor:
+
+
+# x = torch.tensor([[414.8924, 238.3147, 1831.7047, 793.2424]])
+# print(x)