RufusRubin777 commited on
Commit
c8267f0
Β·
verified Β·
1 Parent(s): 3dcf506

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ from transformers import AutoModel, AutoTokenizer
4
+ import os
5
+ import base64
6
+ import io
7
+ import uuid
8
+ import time
9
+ import shutil
10
+ from pathlib import Path
11
+ import re
12
+ import easyocr
13
+
14
+ # OCR Model
15
+ tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
16
+ # model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True)
17
+ model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
18
+ # model = model.eval().cuda()
19
+ model = model.eval()
20
+ reader = easyocr.Reader(['hi'])
21
+
22
+ UPLOAD_FOLDER = "./uploads"
23
+ RESULTS_FOLDER = "./results"
24
+
25
+ for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
26
+ if not os.path.exists(folder):
27
+ os.makedirs(folder)
28
+
29
+ def image_to_base64(image):
30
+ buffered = io.BytesIO()
31
+ image.save(buffered, format="PNG")
32
+ return base64.b64encode(buffered.getvalue()).decode()
33
+
34
+ # OCR Processing of the image uploaded by the user
35
+ # @spaces.GPU
36
+ def run_GOT(image,language):
37
+ unique_id = str(uuid.uuid4())
38
+ image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
39
+
40
+ shutil.copy(image, image_path)
41
+
42
+ try:
43
+ if language == "English":
44
+ res = model.chat(tokenizer, image_path, ocr_type='ocr')
45
+ return res
46
+ elif language == "Hindi":
47
+ res = reader.readtext(image)
48
+ extracted_text = ''
49
+ for x in res:
50
+ extracted_text += x[1] + '\n'
51
+ return extracted_text
52
+ else:
53
+ english_extraction = model.chat(tokenizer, image_path, ocr_type='ocr')
54
+ hindi_extraction = reader.readtext(image)
55
+ hindi_extract = ''
56
+ for x in hindi_extraction:
57
+ hindi_extract += x[1] + '\n'
58
+ return english_extraction+'\n'+hindi_extract
59
+ except Exception as e:
60
+ return f"Error: {str(e)}", None
61
+ finally:
62
+ if os.path.exists(image_path):
63
+ os.remove(image_path)
64
+
65
+ # Search Functionality
66
+ def search_keyword(text,keyword):
67
+ # Convert text and keyword to lowercase for case-insensitive search
68
+ text_lower = text.lower()
69
+ keyword_lower = keyword.lower()
70
+
71
+ # Keyword position in the text
72
+ pos = text_lower.find(keyword_lower)
73
+
74
+ if pos == -1:
75
+ ans = '<h3 style="text-align: center;">'+"Keyword not found"+'</h3>'
76
+ else:
77
+ res = [i.start() for i in re.finditer(keyword_lower, text)]
78
+ ans = '<h3>'
79
+ l = 0
80
+ for x in res:
81
+ ans += text[l:x]+'<mark>'+text[x:x+len(keyword)]+'</mark>'
82
+ l += len(text[l:x]+text[x:x+len(keyword)])
83
+ ans += text[l:]+'</h3>'
84
+ return ans
85
+
86
+ def cleanup_old_files():
87
+ current_time = time.time()
88
+ for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
89
+ for file_path in Path(folder).glob('*'):
90
+ if current_time - file_path.stat().st_mtime > 3600: # 1 hour
91
+ file_path.unlink()
92
+
93
+ title_html = """
94
+ <h1> <span class="gradient-text" id="text">Scan Master</span></h1>
95
+ <p>Scan Master uses General OCR Theory (GOT), a 580M end-to-end OCR 2.0 model for English optical character recognition and EASYOCR for Hindi optical character recognition. It supports plain text ocr.</p>
96
+ """
97
+
98
+ acknowledgement_html = """
99
+ <h3>Acknowledgement</h3>
100
+ <a href="https://huggingface.co/ucaslcl/GOT-OCR2_0">[😊 Hugging Face]</a>
101
+ <a href="https://arxiv.org/abs/2409.01704">[πŸ“œ Paper]</a>
102
+ <a href="https://github.com/Ucas-HaoranWei/GOT-OCR2.0/">[🌟 GitHub]</a>
103
+ """
104
+
105
+ aboutme_html = """
106
+ <h3>About Me</h3>
107
+ <p>Name : Satvik Chandrakar</p>
108
+ <a href="https://github.com/Satvik-ai">[🌟 GitHub]</a> """
109
+
110
+
111
+ # Scan Master web application developed using Gradio
112
+ with gr.Blocks() as scan_master_web_app:
113
+ gr.HTML(title_html)
114
+ gr.Markdown("""
115
+ You need to upload your image below and choose appropriate language, then click "Submit" to run the model. More characters will result in longer wait times.""")
116
+
117
+ with gr.Row():
118
+ with gr.Column():
119
+ image_input = gr.Image(type="filepath", label="Upload your image")
120
+ gr.Markdown("""If your image contains only English text, then choose English option in the language. If it contains only Hindi text, then choose Hindi option in the language. If it contains both the language, then choose the third option.""")
121
+ lang_dropdown = gr.Dropdown(
122
+ choices=[
123
+ "English",
124
+ "Hindi",
125
+ "English + Hindi",
126
+ ],
127
+ label="Choose language",
128
+ value="English"
129
+ )
130
+ submit_button = gr.Button("Submit")
131
+
132
+ with gr.Column():
133
+ ocr_result = gr.Textbox(label="GOT output")
134
+
135
+ with gr.Row():
136
+ with gr.Column():
137
+ keyword = gr.Textbox(label="Search a keyword in the extracted text")
138
+ search_button = gr.Button("Search")
139
+
140
+ with gr.Column():
141
+ search_result = gr.HTML(label="Search result")
142
+
143
+ gr.HTML(acknowledgement_html)
144
+ gr.HTML(aboutme_html)
145
+
146
+ submit_button.click(
147
+ run_GOT,
148
+ inputs=[image_input,lang_dropdown],
149
+ outputs=[ocr_result]
150
+ )
151
+
152
+ search_button.click(
153
+ search_keyword,
154
+ inputs=[ocr_result,keyword],
155
+ outputs=[search_result]
156
+ )
157
+
158
+ if __name__ == "__main__":
159
+ cleanup_old_files()
160
+ scan_master_web_app.launch()