|
import sys |
|
import os |
|
import torch |
|
from PIL import Image |
|
import cv2 |
|
import numpy as np |
|
|
|
|
|
|
|
from IndicPhotoOCR.script_identification.CLIP_identifier import CLIPidentifier |
|
from IndicPhotoOCR.recognition.parseq_recogniser import PARseqrecogniser |
|
import IndicPhotoOCR.detection.east_config as cfg |
|
from IndicPhotoOCR.detection.textbpn.textbpnpp_detector import TextBPNpp_detector |
|
|
|
|
|
class OCR: |
|
def __init__(self, device='cuda:0', verbose=False): |
|
|
|
self.device = device |
|
self.verbose = verbose |
|
|
|
|
|
self.detector = TextBPNpp_detector(device=self.device) |
|
self.recogniser = PARseqrecogniser() |
|
self.identifier = CLIPidentifier() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect(self, image_path): |
|
self.detections = self.detector.detect(image_path) |
|
return self.detections['detections'] |
|
|
|
def visualize_detection(self, image_path, detections, save_path=None, show=False): |
|
|
|
default_save_path = "test.png" |
|
path_to_save = save_path if save_path is not None else default_save_path |
|
|
|
|
|
directory = os.path.dirname(path_to_save) |
|
|
|
|
|
if directory and not os.path.exists(directory): |
|
os.makedirs(directory) |
|
print(f"Created directory: {directory}") |
|
|
|
|
|
image = cv2.imread(image_path) |
|
for box in detections: |
|
|
|
points = np.array(box, np.int32) |
|
|
|
|
|
x_min = np.min(points[:, 0]) |
|
y_min = np.min(points[:, 1]) |
|
x_max = np.max(points[:, 0]) |
|
y_max = np.max(points[:, 1]) |
|
|
|
|
|
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color=(0, 255, 0), thickness=3) |
|
|
|
|
|
if show: |
|
plt.figure(figsize=(10, 10)) |
|
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) |
|
plt.axis("off") |
|
plt.show() |
|
|
|
|
|
cv2.imwrite(path_to_save, image) |
|
print(f"Image saved at: {path_to_save}") |
|
|
|
def crop_and_identify_script(self, image, bbox): |
|
""" |
|
Crop a text area from the image and identify its script language. |
|
|
|
Args: |
|
image (PIL.Image): The full image. |
|
bbox (list): List of four corner points, each a [x, y] pair. |
|
|
|
Returns: |
|
str: Identified script language. |
|
""" |
|
|
|
x_coords = [point[0] for point in bbox] |
|
y_coords = [point[1] for point in bbox] |
|
|
|
|
|
x_min, y_min = min(x_coords), min(y_coords) |
|
x_max, y_max = max(x_coords), max(y_coords) |
|
|
|
|
|
cropped_image = image.crop((x_min, y_min, x_max, y_max)) |
|
root_image_dir = "IndicPhotoOCR/script_identification" |
|
os.makedirs(f"{root_image_dir}/images", exist_ok=True) |
|
|
|
cropped_path = f'{root_image_dir}/images/temp_crop_{x_min}_{y_min}.jpg' |
|
cropped_image.save(cropped_path) |
|
|
|
|
|
if self.verbose: |
|
print("Identifying script for the cropped area...") |
|
script_lang = self.identifier.identify(cropped_path, "hindi") |
|
|
|
|
|
|
|
|
|
|
|
return script_lang, cropped_path |
|
|
|
def recognise(self, cropped_image_path, script_lang): |
|
"""Recognize text in a cropped image area using the identified script.""" |
|
if self.verbose: |
|
print("Recognizing text in detected area...") |
|
recognized_text = self.recogniser.recognise(script_lang, cropped_image_path, script_lang, self.verbose) |
|
|
|
return recognized_text |
|
|
|
def ocr(self, image_path): |
|
"""Process the image by detecting text areas, identifying script, and recognizing text.""" |
|
recognized_words = [] |
|
image = Image.open(image_path) |
|
|
|
|
|
detections = self.detect(image_path) |
|
|
|
|
|
for bbox in detections: |
|
|
|
script_lang, cropped_path = self.crop_and_identify_script(image, bbox) |
|
|
|
|
|
if script_lang: |
|
|
|
|
|
recognized_word = self.recognise(cropped_path, script_lang) |
|
recognized_words.append(recognized_word) |
|
|
|
if self.verbose: |
|
print(f"Recognized word: {recognized_word}") |
|
|
|
return recognized_words |
|
|
|
if __name__ == '__main__': |
|
|
|
sample_image_path = 'test_images/image_141.jpg' |
|
cropped_image_path = 'test_images/cropped_image/image_141_0.jpg' |
|
|
|
ocr = OCR(device="cuda", verbose=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
recognised_words = ocr.ocr(sample_image_path) |
|
print(recognised_words) |