anikde commited on
Commit
83c2afb
·
1 Parent(s): ba402b9

added line breaks in visualization

Browse files
Files changed (1) hide show
  1. app.py +22 -43
app.py CHANGED
@@ -5,10 +5,10 @@ from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named o
5
  from IndicPhotoOCR.theme import Seafoam
6
  from IndicPhotoOCR.utils.helper import detect_para
7
 
8
- # Initialize the OCR object for text detection and recognition
9
- ocr = OCR(device="cpu", verbose=False)
10
 
11
- def process_image(image):
12
  """
13
  Processes the uploaded image for text detection and recognition.
14
  - Detects bounding boxes in the image
@@ -17,6 +17,7 @@ def process_image(image):
17
 
18
  Parameters:
19
  image (PIL.Image): The input image to be processed.
 
20
 
21
  Returns:
22
  tuple: A PIL.Image with bounding boxes and a string of recognized text.
@@ -25,6 +26,9 @@ def process_image(image):
25
  # Save the input image temporarily
26
  image_path = "input_image.jpg"
27
  image.save(image_path)
 
 
 
28
 
29
  # Detect bounding boxes on the image using OCR
30
  detections = ocr.detect(image_path)
@@ -35,39 +39,11 @@ def process_image(image):
35
  # Load the annotated image with bounding boxes drawn
36
  output_image = Image.open("output_image.png")
37
 
38
- # Initialize list to hold recognized text from each detected area
39
- recognized_texts = {}
40
- pil_image = Image.open(image_path)
41
 
42
- # # Process each detected bounding box for script identification and text recognition
43
- # for bbox in detections:
44
- # # Identify the script and crop the image to this region
45
- # script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
46
-
47
- # if script_lang: # Only proceed if a script language is identified
48
- # # Recognize text in the cropped area
49
- # recognized_text = ocr.recognise(cropped_path, script_lang)
50
- # recognized_texts.append(recognized_text)
51
- for id, bbox in enumerate(detections):
52
- # Identify the script and crop the image to this region
53
- script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
54
-
55
- # Calculate bounding box coordinates
56
- x1 = min([bbox[i][0] for i in range(len(bbox))])
57
- y1 = min([bbox[i][1] for i in range(len(bbox))])
58
- x2 = max([bbox[i][0] for i in range(len(bbox))])
59
- y2 = max([bbox[i][1] for i in range(len(bbox))])
60
-
61
- if script_lang:
62
- recognized_text = ocr.recognise(cropped_path, script_lang)
63
- recognized_texts[f"img_{id}"] = {"txt": recognized_text, "bbox": [x1, y1, x2, y2]}
64
-
65
- # Combine recognized texts into a single string for display
66
- # recognized_texts_combined = " ".join(recognized_texts)
67
- string = detect_para(recognized_texts)
68
- recognized_texts_combined = '\n'.join([' '.join(line) for line in string])
69
-
70
- return output_image, recognized_texts_combined
71
 
72
  # Custom HTML for interface header with logos and alignment
73
  interface_html = """
@@ -111,13 +87,17 @@ examples = [
111
 
112
  title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
113
 
114
- # Set up the Gradio Interface with the defined function and customizations
115
- demo = gr.Interface(
 
116
  fn=process_image,
117
- inputs=gr.Image(type="pil", image_mode="RGB"),
 
 
 
118
  outputs=[
119
- gr.Image(type="pil", label="Detected Bounding Boxes"),
120
- gr.Textbox(label="Recognized Text", elem_classes="custom-textbox")
121
  ],
122
  title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
123
  description=title+interface_html+links_html,
@@ -125,11 +105,10 @@ demo = gr.Interface(
125
  css=custom_css,
126
  examples=examples
127
  )
128
-
129
  # # Server setup and launch configuration
130
  # if __name__ == "__main__":
131
  # server = "0.0.0.0" # IP address for server
132
  # port = 7866 # Port to run the server on
133
- # demo.launch(server_name=server, server_port=port, share=True)
134
 
135
- demo.launch()
 
5
  from IndicPhotoOCR.theme import Seafoam
6
  from IndicPhotoOCR.utils.helper import detect_para
7
 
8
+ # Possible values for identifier_lang
9
+ VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] # Add more as needed
10
 
11
+ def process_image(image, identifier_lang):
12
  """
13
  Processes the uploaded image for text detection and recognition.
14
  - Detects bounding boxes in the image
 
17
 
18
  Parameters:
19
  image (PIL.Image): The input image to be processed.
20
+ identifier_lang (str): The script identifier model to use.
21
 
22
  Returns:
23
  tuple: A PIL.Image with bounding boxes and a string of recognized text.
 
26
  # Save the input image temporarily
27
  image_path = "input_image.jpg"
28
  image.save(image_path)
29
+
30
+ # Initialize OCR with the selected identifier language
31
+ ocr = OCR(identifier_lang=identifier_lang, verbose=False)
32
 
33
  # Detect bounding boxes on the image using OCR
34
  detections = ocr.detect(image_path)
 
39
  # Load the annotated image with bounding boxes drawn
40
  output_image = Image.open("output_image.png")
41
 
42
+ # Recognize text from the detected areas
43
+ recognized_text = ocr.ocr(image_path)
44
+ recognized_text = '\n'.join([' '.join(line) for line in recognized_text])
45
 
46
+ return output_image, recognized_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # Custom HTML for interface header with logos and alignment
49
  interface_html = """
 
87
 
88
  title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
89
 
90
+
91
+ # Define the Gradio interface
92
+ iface = gr.Interface(
93
  fn=process_image,
94
+ inputs=[
95
+ gr.Image(type="pil", image_mode="RGB"),
96
+ gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="hindi")
97
+ ],
98
  outputs=[
99
+ gr.Image(type="pil", label="Processed Image"),
100
+ gr.Textbox(label="Recognized Text")
101
  ],
102
  title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
103
  description=title+interface_html+links_html,
 
105
  css=custom_css,
106
  examples=examples
107
  )
 
108
  # # Server setup and launch configuration
109
  # if __name__ == "__main__":
110
  # server = "0.0.0.0" # IP address for server
111
  # port = 7866 # Port to run the server on
112
+ # iface.launch(server_name=server, server_port=port, share=False)
113
 
114
+ iface.launch()