Spaces:
Running
Running
added line breaks in visualization
Browse files
app.py
CHANGED
@@ -5,10 +5,10 @@ from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named o
|
|
5 |
from IndicPhotoOCR.theme import Seafoam
|
6 |
from IndicPhotoOCR.utils.helper import detect_para
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
|
11 |
-
def process_image(image):
|
12 |
"""
|
13 |
Processes the uploaded image for text detection and recognition.
|
14 |
- Detects bounding boxes in the image
|
@@ -17,6 +17,7 @@ def process_image(image):
|
|
17 |
|
18 |
Parameters:
|
19 |
image (PIL.Image): The input image to be processed.
|
|
|
20 |
|
21 |
Returns:
|
22 |
tuple: A PIL.Image with bounding boxes and a string of recognized text.
|
@@ -25,6 +26,9 @@ def process_image(image):
|
|
25 |
# Save the input image temporarily
|
26 |
image_path = "input_image.jpg"
|
27 |
image.save(image_path)
|
|
|
|
|
|
|
28 |
|
29 |
# Detect bounding boxes on the image using OCR
|
30 |
detections = ocr.detect(image_path)
|
@@ -35,39 +39,11 @@ def process_image(image):
|
|
35 |
# Load the annotated image with bounding boxes drawn
|
36 |
output_image = Image.open("output_image.png")
|
37 |
|
38 |
-
#
|
39 |
-
|
40 |
-
|
41 |
|
42 |
-
|
43 |
-
# for bbox in detections:
|
44 |
-
# # Identify the script and crop the image to this region
|
45 |
-
# script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
|
46 |
-
|
47 |
-
# if script_lang: # Only proceed if a script language is identified
|
48 |
-
# # Recognize text in the cropped area
|
49 |
-
# recognized_text = ocr.recognise(cropped_path, script_lang)
|
50 |
-
# recognized_texts.append(recognized_text)
|
51 |
-
for id, bbox in enumerate(detections):
|
52 |
-
# Identify the script and crop the image to this region
|
53 |
-
script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
|
54 |
-
|
55 |
-
# Calculate bounding box coordinates
|
56 |
-
x1 = min([bbox[i][0] for i in range(len(bbox))])
|
57 |
-
y1 = min([bbox[i][1] for i in range(len(bbox))])
|
58 |
-
x2 = max([bbox[i][0] for i in range(len(bbox))])
|
59 |
-
y2 = max([bbox[i][1] for i in range(len(bbox))])
|
60 |
-
|
61 |
-
if script_lang:
|
62 |
-
recognized_text = ocr.recognise(cropped_path, script_lang)
|
63 |
-
recognized_texts[f"img_{id}"] = {"txt": recognized_text, "bbox": [x1, y1, x2, y2]}
|
64 |
-
|
65 |
-
# Combine recognized texts into a single string for display
|
66 |
-
# recognized_texts_combined = " ".join(recognized_texts)
|
67 |
-
string = detect_para(recognized_texts)
|
68 |
-
recognized_texts_combined = '\n'.join([' '.join(line) for line in string])
|
69 |
-
|
70 |
-
return output_image, recognized_texts_combined
|
71 |
|
72 |
# Custom HTML for interface header with logos and alignment
|
73 |
interface_html = """
|
@@ -111,13 +87,17 @@ examples = [
|
|
111 |
|
112 |
title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
|
113 |
|
114 |
-
|
115 |
-
|
|
|
116 |
fn=process_image,
|
117 |
-
inputs=
|
|
|
|
|
|
|
118 |
outputs=[
|
119 |
-
gr.Image(type="pil", label="
|
120 |
-
gr.Textbox(label="Recognized Text"
|
121 |
],
|
122 |
title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
|
123 |
description=title+interface_html+links_html,
|
@@ -125,11 +105,10 @@ demo = gr.Interface(
|
|
125 |
css=custom_css,
|
126 |
examples=examples
|
127 |
)
|
128 |
-
|
129 |
# # Server setup and launch configuration
|
130 |
# if __name__ == "__main__":
|
131 |
# server = "0.0.0.0" # IP address for server
|
132 |
# port = 7866 # Port to run the server on
|
133 |
-
#
|
134 |
|
135 |
-
|
|
|
5 |
from IndicPhotoOCR.theme import Seafoam
|
6 |
from IndicPhotoOCR.utils.helper import detect_para
|
7 |
|
8 |
+
# Possible values for identifier_lang
|
9 |
+
VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] # Add more as needed
|
10 |
|
11 |
+
def process_image(image, identifier_lang):
|
12 |
"""
|
13 |
Processes the uploaded image for text detection and recognition.
|
14 |
- Detects bounding boxes in the image
|
|
|
17 |
|
18 |
Parameters:
|
19 |
image (PIL.Image): The input image to be processed.
|
20 |
+
identifier_lang (str): The script identifier model to use.
|
21 |
|
22 |
Returns:
|
23 |
tuple: A PIL.Image with bounding boxes and a string of recognized text.
|
|
|
26 |
# Save the input image temporarily
|
27 |
image_path = "input_image.jpg"
|
28 |
image.save(image_path)
|
29 |
+
|
30 |
+
# Initialize OCR with the selected identifier language
|
31 |
+
ocr = OCR(identifier_lang=identifier_lang, verbose=False)
|
32 |
|
33 |
# Detect bounding boxes on the image using OCR
|
34 |
detections = ocr.detect(image_path)
|
|
|
39 |
# Load the annotated image with bounding boxes drawn
|
40 |
output_image = Image.open("output_image.png")
|
41 |
|
42 |
+
# Recognize text from the detected areas
|
43 |
+
recognized_text = ocr.ocr(image_path)
|
44 |
+
recognized_text = '\n'.join([' '.join(line) for line in recognized_text])
|
45 |
|
46 |
+
return output_image, recognized_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# Custom HTML for interface header with logos and alignment
|
49 |
interface_html = """
|
|
|
87 |
|
88 |
title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
|
89 |
|
90 |
+
|
91 |
+
# Define the Gradio interface
|
92 |
+
iface = gr.Interface(
|
93 |
fn=process_image,
|
94 |
+
inputs=[
|
95 |
+
gr.Image(type="pil", image_mode="RGB"),
|
96 |
+
gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="hindi")
|
97 |
+
],
|
98 |
outputs=[
|
99 |
+
gr.Image(type="pil", label="Processed Image"),
|
100 |
+
gr.Textbox(label="Recognized Text")
|
101 |
],
|
102 |
title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
|
103 |
description=title+interface_html+links_html,
|
|
|
105 |
css=custom_css,
|
106 |
examples=examples
|
107 |
)
|
|
|
108 |
# # Server setup and launch configuration
|
109 |
# if __name__ == "__main__":
|
110 |
# server = "0.0.0.0" # IP address for server
|
111 |
# port = 7866 # Port to run the server on
|
112 |
+
# iface.launch(server_name=server, server_port=port, share=False)
|
113 |
|
114 |
+
iface.launch()
|