Spaces:

Bhashini-IITJ
/

IndicPhotoOCR

Running

App Files Files Community

anikde commited on 26 days ago

Commit

83c2afb

1 Parent(s): ba402b9

added line breaks in visualization

Browse files

Files changed (1) hide show

app.py +22 -43

app.py CHANGED Viewed

@@ -5,10 +5,10 @@ from IndicPhotoOCR.ocr import OCR  # Ensure OCR class is saved in a file named o
 from IndicPhotoOCR.theme import Seafoam
 from IndicPhotoOCR.utils.helper import detect_para
-# Initialize the OCR object for text detection and recognition
-ocr = OCR(device="cpu", verbose=False)
-def process_image(image):
     """
     Processes the uploaded image for text detection and recognition.
     - Detects bounding boxes in the image
@@ -17,6 +17,7 @@ def process_image(image):
     Parameters:
     image (PIL.Image): The input image to be processed.
     Returns:
     tuple: A PIL.Image with bounding boxes and a string of recognized text.
@@ -25,6 +26,9 @@ def process_image(image):
     # Save the input image temporarily
     image_path = "input_image.jpg"
     image.save(image_path)
     # Detect bounding boxes on the image using OCR
     detections = ocr.detect(image_path)
@@ -35,39 +39,11 @@ def process_image(image):
     # Load the annotated image with bounding boxes drawn
     output_image = Image.open("output_image.png")
-    # Initialize list to hold recognized text from each detected area
-    recognized_texts = {}
-    pil_image = Image.open(image_path)
-    # # Process each detected bounding box for script identification and text recognition
-    # for bbox in detections:
-    #     # Identify the script and crop the image to this region
-    #     script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
-    #     if script_lang:  # Only proceed if a script language is identified
-    #         # Recognize text in the cropped area
-    #         recognized_text = ocr.recognise(cropped_path, script_lang)
-    #         recognized_texts.append(recognized_text)
-    for id, bbox in enumerate(detections):
-        # Identify the script and crop the image to this region
-        script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
-        # Calculate bounding box coordinates
-        x1 = min([bbox[i][0] for i in range(len(bbox))])
-        y1 = min([bbox[i][1] for i in range(len(bbox))])
-        x2 = max([bbox[i][0] for i in range(len(bbox))])
-        y2 = max([bbox[i][1] for i in range(len(bbox))])
-        if script_lang:
-            recognized_text = ocr.recognise(cropped_path, script_lang)
-            recognized_texts[f"img_{id}"] = {"txt": recognized_text, "bbox": [x1, y1, x2, y2]}
-    # Combine recognized texts into a single string for display
-    # recognized_texts_combined = " ".join(recognized_texts)
-    string = detect_para(recognized_texts)
-    recognized_texts_combined = '\n'.join([' '.join(line) for line in string])
-    return output_image, recognized_texts_combined
 # Custom HTML for interface header with logos and alignment
 interface_html = """
@@ -111,13 +87,17 @@ examples = [
 title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
-# Set up the Gradio Interface with the defined function and customizations
-demo = gr.Interface(
     fn=process_image,
-    inputs=gr.Image(type="pil", image_mode="RGB"),
     outputs=[
-        gr.Image(type="pil", label="Detected Bounding Boxes"),
-        gr.Textbox(label="Recognized Text", elem_classes="custom-textbox")
     ],
     title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
     description=title+interface_html+links_html,
@@ -125,11 +105,10 @@ demo = gr.Interface(
     css=custom_css,
     examples=examples
 )
 # # Server setup and launch configuration
 # if __name__ == "__main__":
 #     server = "0.0.0.0"  # IP address for server
 #     port = 7866  # Port to run the server on
-#     demo.launch(server_name=server, server_port=port, share=True)
-demo.launch()

 from IndicPhotoOCR.theme import Seafoam
 from IndicPhotoOCR.utils.helper import detect_para
+# Possible values for identifier_lang
+VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"]  # Add more as needed
+def process_image(image, identifier_lang):
     """
     Processes the uploaded image for text detection and recognition.
     - Detects bounding boxes in the image
     Parameters:
     image (PIL.Image): The input image to be processed.
+    identifier_lang (str): The script identifier model to use.
     Returns:
     tuple: A PIL.Image with bounding boxes and a string of recognized text.
     # Save the input image temporarily
     image_path = "input_image.jpg"
     image.save(image_path)
+    # Initialize OCR with the selected identifier language
+    ocr = OCR(identifier_lang=identifier_lang, verbose=False)
     # Detect bounding boxes on the image using OCR
     detections = ocr.detect(image_path)
     # Load the annotated image with bounding boxes drawn
     output_image = Image.open("output_image.png")
+    # Recognize text from the detected areas
+    recognized_text = ocr.ocr(image_path)
+    recognized_text = '\n'.join([' '.join(line) for line in recognized_text])
+    return output_image, recognized_text
 # Custom HTML for interface header with logos and alignment
 interface_html = """
 title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
+# Define the Gradio interface
+iface = gr.Interface(
     fn=process_image,
+    inputs=[
+        gr.Image(type="pil", image_mode="RGB"),
+        gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="hindi")
+    ],
     outputs=[
+        gr.Image(type="pil", label="Processed Image"),
+        gr.Textbox(label="Recognized Text")
     ],
     title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
     description=title+interface_html+links_html,
     css=custom_css,
     examples=examples
 )
 # # Server setup and launch configuration
 # if __name__ == "__main__":
 #     server = "0.0.0.0"  # IP address for server
 #     port = 7866  # Port to run the server on
+#     iface.launch(server_name=server, server_port=port, share=False)
+iface.launch()