yashbyname commited on
Commit
4322a12
·
verified ·
1 Parent(s): 0a1dda6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +108 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Final WebApp using Gradio.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1a5-p_KZd9Hk0tsKZ_JoqoYeRD3XOQtRK
8
+
9
+ # **Task 2 - Web App Development with Gradio**
10
+
11
+ ## **Gradio Interface for OCR Application**
12
+
13
+ In this notebook, I created an interactive web application using Gradio to facilitate the OCR process and allow users to perform keyword searches on the extracted text.
14
+ """
15
+
16
+ !pip install gradio
17
+ !pip install -q tiktoken verovio
18
+ !pip install pytesseract
19
+
20
+ """**Library Imports**:
21
+
22
+ - In addition to libraries from the first notebook, I imported `gradio` to build the user interface for the application.
23
+ """
24
+
25
+ import cv2
26
+ from pytesseract import pytesseract
27
+ from transformers import AutoModel, AutoTokenizer
28
+ import gradio as gr
29
+
30
+ """**Model and Tesseract Configuration**:
31
+ - Similar to the first notebook, I loaded the GOT2 model for English text and configured Tesseract for Hindi text.
32
+ """
33
+
34
+ tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
35
+ model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval().cuda()
36
+
37
+ pytesseract.tesseract_cmd = '/usr/bin/tesseract'
38
+ tesseract_config = '--oem 3 --psm 6 -l hin'
39
+
40
+ """**Perform OCR Function**:
41
+ - The `perform_ocr` function was adapted to handle image input from the Gradio interface. This function processes the uploaded image based on the selected language and returns the extracted English and Hindi texts.
42
+ """
43
+
44
+ def perform_ocr(img, language):
45
+
46
+ img_path = "/tmp/uploaded_image.png"
47
+ img.save(img_path)
48
+
49
+ res_eng = ""
50
+ res_hin = ""
51
+
52
+ if language in ["English", "Both"]:
53
+ res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
54
+
55
+ if language in ["Hindi", "Both"]:
56
+ img_cv = cv2.imread(img_path)
57
+ res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
58
+
59
+ return res_eng, res_hin
60
+
61
+ """**Keyword Search Functionality**:
62
+ - A new function, `ocr_and_search`, was implemented to allow users to search for keywords within the extracted text. It checks for keyword matches in both English and Hindi texts, providing appropriate feedback.
63
+ """
64
+
65
+ def ocr_and_search(image, language, keyword):
66
+
67
+ english_text, hindi_text = perform_ocr(image, language)
68
+
69
+ extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
70
+ extracted_hindi = f"Extracted Hindi Text:\n{hindi_text}" if hindi_text else "No Hindi text extracted."
71
+
72
+ # Search for the keyword in the extracted text
73
+ search_results = []
74
+ if keyword:
75
+
76
+ if language in ["English", "Both"] and keyword.lower() in english_text.lower():
77
+ search_results.append(f"Keyword '{keyword}' found in English text.")
78
+
79
+ if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
80
+ search_results.append(f"Keyword '{keyword}' found in Hindi text.")
81
+
82
+ search_output = "\n".join(search_results) if search_results else "No matches found."
83
+
84
+ return extracted_english, extracted_hindi, search_output
85
+
86
+ """**Gradio Interface Setup**:
87
+ - The user interface is constructed using Gradio's Blocks API, allowing users to upload images, select the desired language for OCR, and enter a keyword for search.
88
+ - The outputs are displayed in separate text boxes for extracted English text, extracted Hindi text, and search results.
89
+ """
90
+
91
+ # Gradio
92
+ with gr.Blocks() as app:
93
+ gr.Markdown("### OCR Application")
94
+ image_input = gr.Image(type="pil", label="Upload Image")
95
+ language_selection = gr.Radio(choices=["English", "Hindi", "Both"], label="Select Language")
96
+ keyword_input = gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
97
+ output_english = gr.Textbox(label="Extracted English Text", interactive=False)
98
+ output_hindi = gr.Textbox(label="Extracted Hindi Text", interactive=False)
99
+ output_search = gr.Textbox(label="Search Results", interactive=False)
100
+
101
+ submit_button = gr.Button("Submit")
102
+ submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])
103
+
104
+ """**Application Launch**:
105
+ - Finally, the Gradio app is launched, making the OCR application accessible for user interaction. This enables real-time testing and usability of the OCR functionalities implemented in the previous notebook.
106
+ """
107
+
108
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ opencv-python
3
+ pytesseract
4
+ transformers
5
+ langdetect