import gradio as gr from PIL import Image import os from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named ocr.py from IndicPhotoOCR.theme import Seafoam # Initialize the OCR object for text detection and recognition ocr = OCR(device="cpu", verbose=False) def process_image(image): """ Processes the uploaded image for text detection and recognition. - Detects bounding boxes in the image - Draws bounding boxes on the image and identifies script in each detected area - Recognizes text in each cropped region and returns the annotated image and recognized text Parameters: image (PIL.Image): The input image to be processed. Returns: tuple: A PIL.Image with bounding boxes and a string of recognized text. """ # Save the input image temporarily image_path = "input_image.jpg" image.save(image_path) # Detect bounding boxes on the image using OCR detections = ocr.detect(image_path) # Draw bounding boxes on the image and save it as output ocr.visualize_detection(image_path, detections, save_path="output_image.png") # Load the annotated image with bounding boxes drawn output_image = Image.open("output_image.png") # Initialize list to hold recognized text from each detected area recognized_texts = [] pil_image = Image.open(image_path) # Process each detected bounding box for script identification and text recognition for bbox in detections: # Identify the script and crop the image to this region script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox) if script_lang: # Only proceed if a script language is identified # Recognize text in the cropped area recognized_text = ocr.recognise(cropped_path, script_lang) recognized_texts.append(recognized_text) # Combine recognized texts into a single string for display recognized_texts_combined = " ".join(recognized_texts) return output_image, recognized_texts_combined # Custom HTML for interface header with logos and alignment interface_html = """