tdnathmlenthusiast
commited on
Commit
•
a5d0684
1
Parent(s):
728ab01
required files for OCR tool
Browse files- .gitattributes +35 -35
- README.md +14 -12
- app.py +111 -0
- requirements.txt +11 -0
.gitattributes
CHANGED
@@ -1,35 +1,35 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
-
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.44.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Ocr Gradio(Developed by Tirtha Debnath, Bangladesh)
|
3 |
+
emoji: 🐢
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: red
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.44.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
short_description: IIT Roorkee
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import all required libraries after doing research
|
2 |
+
import gradio as gr
|
3 |
+
from PIL import Image
|
4 |
+
from surya.ocr import run_ocr # dedicated GOT_OCR_2.0 for hindi languages
|
5 |
+
from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
|
6 |
+
from surya.model.recognition.model import load_model as load_rec_model
|
7 |
+
from surya.model.recognition.processor import load_processor as load_rec_processor
|
8 |
+
import re # recognized hindi encoded pattern
|
9 |
+
from transformers import AutoModel, AutoTokenizer
|
10 |
+
import torch
|
11 |
+
import tempfile
|
12 |
+
import os
|
13 |
+
|
14 |
+
# device = "cuda"
|
15 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
16 |
+
print(device)
|
17 |
+
# load_desirable_model
|
18 |
+
got_model_name = 'tdnathmlenthusiast/got-ocr-2.0-modified-hindi-version'
|
19 |
+
|
20 |
+
det_processor, det_model = load_det_processor(), load_det_model()
|
21 |
+
det_model.to(device)
|
22 |
+
rec_model, rec_processor = load_rec_model(), load_rec_processor()
|
23 |
+
rec_model.to(device)
|
24 |
+
|
25 |
+
# tokenized to extract individual character
|
26 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
27 |
+
got_model_name, trust_remote_code=True, device_map=device)
|
28 |
+
got_model = AutoModel.from_pretrained(
|
29 |
+
got_model_name, trust_remote_code=True, low_cpu_mem_usage=True, device_map=device, use_safetensors=True)
|
30 |
+
got_model = got_model.eval().to(device)
|
31 |
+
|
32 |
+
|
33 |
+
# function to extract hindi & english
|
34 |
+
def extract_hindi(text):
|
35 |
+
# Unicode range for Devanagari script
|
36 |
+
hindi_pattern = re.compile(r'[\u0900-\u097F]+')
|
37 |
+
hindi_words = hindi_pattern.findall(text)
|
38 |
+
return ' '.join(hindi_words)
|
39 |
+
|
40 |
+
|
41 |
+
def process_image(image):
|
42 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
|
43 |
+
image.save(temp_file.name)
|
44 |
+
temp_file_path = temp_file.name
|
45 |
+
|
46 |
+
image = Image.open(temp_file_path)
|
47 |
+
image = image.convert("RGB")
|
48 |
+
|
49 |
+
langs = ["hi"]
|
50 |
+
surya_predictions = run_ocr(
|
51 |
+
[image], [langs], det_model, det_processor, rec_model, rec_processor)
|
52 |
+
|
53 |
+
surya_text_list = re.findall(r"text='(.*?)'", str(surya_predictions[0]))
|
54 |
+
surya_text = '\n'.join(surya_text_list)
|
55 |
+
surya_text = extract_hindi(surya_text)
|
56 |
+
|
57 |
+
got_res = got_model.chat(tokenizer, temp_file_path, ocr_type='ocr')
|
58 |
+
|
59 |
+
combined_text = f"<h2> Hindi Text (Surya OCR) </h2> <br>{surya_text}<br> <br> <h2> English Text (GOT OCR) </h2> <br> {got_res}"
|
60 |
+
|
61 |
+
if os.path.exists(temp_file_path):
|
62 |
+
os.remove(temp_file_path)
|
63 |
+
|
64 |
+
return combined_text
|
65 |
+
|
66 |
+
# code to search words like documents
|
67 |
+
|
68 |
+
|
69 |
+
def highlight_search(text, query):
|
70 |
+
if query:
|
71 |
+
pattern = re.compile(re.escape(query), re.IGNORECASE)
|
72 |
+
highlighted_text = pattern.sub(
|
73 |
+
lambda m: f"<span style='background-color: limegreen;'>{m.group(0)}</span>", text)
|
74 |
+
return highlighted_text
|
75 |
+
return text
|
76 |
+
|
77 |
+
|
78 |
+
with gr.Blocks() as ocr_interface:
|
79 |
+
gr.Markdown("# OCR Application for Hindi & English")
|
80 |
+
gr.Markdown(
|
81 |
+
"Upload an image for OCR processing.(Takes a little bit time or sometimes a lot due to the limitation of the resources)")
|
82 |
+
|
83 |
+
with gr.Row():
|
84 |
+
with gr.Column():
|
85 |
+
image_input = gr.Image(
|
86 |
+
type="pil", label="Upload an Image(Hindi/English/Hindi+English)")
|
87 |
+
run_ocr_button = gr.Button("Run OCR")
|
88 |
+
|
89 |
+
with gr.Column():
|
90 |
+
output_text = gr.HTML(label="Extracted Text in Hindi & English")
|
91 |
+
query_input = gr.Textbox(
|
92 |
+
label="Search in extracted text", placeholder="Type to search...")
|
93 |
+
search_button = gr.Button("Search")
|
94 |
+
|
95 |
+
def process_and_display(image):
|
96 |
+
combined_text = process_image(image)
|
97 |
+
return combined_text
|
98 |
+
|
99 |
+
def search_text(combined_text, query):
|
100 |
+
highlighted = highlight_search(combined_text, query)
|
101 |
+
return highlighted
|
102 |
+
|
103 |
+
run_ocr_button.click(fn=process_and_display,
|
104 |
+
inputs=image_input, outputs=output_text)
|
105 |
+
|
106 |
+
search_button.click(fn=search_text, inputs=[
|
107 |
+
output_text, query_input], outputs=output_text)
|
108 |
+
|
109 |
+
ocr_interface.launch()
|
110 |
+
|
111 |
+
# Developed by Tirtha Debnath.
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
Pillow
|
3 |
+
surya-ocr
|
4 |
+
torch
|
5 |
+
transformers
|
6 |
+
tiktoken
|
7 |
+
torchvision
|
8 |
+
verovio
|
9 |
+
accelerate
|
10 |
+
rapidfuzz
|
11 |
+
gradio
|