Spaces:
Sleeping
Sleeping
UniquePratham
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import io
|
2 |
import streamlit as st
|
3 |
from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
|
4 |
from PIL import Image
|
@@ -6,7 +5,6 @@ import torch
|
|
6 |
import os
|
7 |
import re
|
8 |
import json
|
9 |
-
import io
|
10 |
import base64
|
11 |
from groq import Groq
|
12 |
from st_keyup import st_keyup
|
@@ -26,7 +24,7 @@ def init_got_model():
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained(
|
27 |
'srimanth-d/GOT_CPU', trust_remote_code=True)
|
28 |
model = AutoModel.from_pretrained(
|
29 |
-
'srimanth-d/GOT_CPU', trust_remote_code=True,
|
30 |
return model.eval(), tokenizer
|
31 |
|
32 |
|
@@ -35,7 +33,7 @@ def init_got_gpu_model():
|
|
35 |
tokenizer = AutoTokenizer.from_pretrained(
|
36 |
'ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
37 |
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True,
|
38 |
-
device_map='cuda',
|
39 |
return model.eval().cuda(), tokenizer
|
40 |
|
41 |
# Load Qwen Model
|
@@ -60,7 +58,7 @@ def clean_extracted_text(text):
|
|
60 |
|
61 |
|
62 |
def polish_text_with_ai(cleaned_text):
|
63 |
-
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text.
|
64 |
client = Groq(
|
65 |
api_key="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")
|
66 |
chat_completion = client.chat.completions.create(
|
@@ -129,13 +127,12 @@ model_choice = st.sidebar.selectbox(
|
|
129 |
|
130 |
# Upload Section
|
131 |
uploaded_file = st.sidebar.file_uploader(
|
132 |
-
"Choose An Image
|
133 |
|
134 |
# Input from clipboard
|
135 |
# Paste image button
|
136 |
clipboard_use = False
|
137 |
-
image_data = paste(
|
138 |
-
label="Paste From Clipboard", key="image_clipboard")
|
139 |
if image_data is not None:
|
140 |
clipboard_use = True
|
141 |
header, encoded = image_data.split(",", 1)
|
@@ -144,7 +141,7 @@ if image_data is not None:
|
|
144 |
uploaded_file = img_stream
|
145 |
|
146 |
# Input from camera
|
147 |
-
camera_file = st.sidebar.camera_input("Capture From Camera
|
148 |
if camera_file:
|
149 |
uploaded_file = camera_file
|
150 |
|
@@ -157,12 +154,6 @@ col1, col2 = st.columns([2, 1])
|
|
157 |
cleaned_text = ""
|
158 |
polished_text = ""
|
159 |
|
160 |
-
# Display extracted text
|
161 |
-
if 'cleaned_text' not in st.session_state:
|
162 |
-
st.session_state.cleaned_text = ""
|
163 |
-
if 'polished_text' not in st.session_state:
|
164 |
-
st.session_state.polished_text = ""
|
165 |
-
|
166 |
# Display image preview
|
167 |
if uploaded_file:
|
168 |
image = Image.open(uploaded_file)
|
@@ -184,6 +175,12 @@ if uploaded_file:
|
|
184 |
result_path = os.path.join(
|
185 |
results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
|
186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
# Handle predictions
|
188 |
if predict_button:
|
189 |
if os.path.exists(result_path):
|
@@ -209,20 +206,17 @@ if uploaded_file:
|
|
209 |
extracted_text = extract_text_qwen(
|
210 |
image_path, qwen_model, qwen_processor)
|
211 |
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
|
216 |
-
"GOT_CPU", "GOT_GPU"] else cleaned_text
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
json.dump(result_data, f)
|
224 |
|
225 |
-
|
226 |
st.session_state.cleaned_text = cleaned_text
|
227 |
st.session_state.polished_text = polished_text
|
228 |
|
@@ -232,15 +226,30 @@ if st.session_state.cleaned_text:
|
|
232 |
st.markdown(st.session_state.cleaned_text, unsafe_allow_html=True)
|
233 |
if st.session_state.polished_text:
|
234 |
st.markdown(st.session_state.polished_text, unsafe_allow_html=True)
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
|
3 |
from PIL import Image
|
|
|
5 |
import os
|
6 |
import re
|
7 |
import json
|
|
|
8 |
import base64
|
9 |
from groq import Groq
|
10 |
from st_keyup import st_keyup
|
|
|
24 |
tokenizer = AutoTokenizer.from_pretrained(
|
25 |
'srimanth-d/GOT_CPU', trust_remote_code=True)
|
26 |
model = AutoModel.from_pretrained(
|
27 |
+
'srimanth-d/GOT_CPU', trust_remote_code=True, pad_token_id=tokenizer.eos_token_id)
|
28 |
return model.eval(), tokenizer
|
29 |
|
30 |
|
|
|
33 |
tokenizer = AutoTokenizer.from_pretrained(
|
34 |
'ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
35 |
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True,
|
36 |
+
device_map='cuda', pad_token_id=tokenizer.eos_token_id)
|
37 |
return model.eval().cuda(), tokenizer
|
38 |
|
39 |
# Load Qwen Model
|
|
|
58 |
|
59 |
|
60 |
def polish_text_with_ai(cleaned_text):
|
61 |
+
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text. Extracted Text: {cleaned_text}"
|
62 |
client = Groq(
|
63 |
api_key="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")
|
64 |
chat_completion = client.chat.completions.create(
|
|
|
127 |
|
128 |
# Upload Section
|
129 |
uploaded_file = st.sidebar.file_uploader(
|
130 |
+
"Choose An Image:", type=["png", "jpg", "jpeg"])
|
131 |
|
132 |
# Input from clipboard
|
133 |
# Paste image button
|
134 |
clipboard_use = False
|
135 |
+
image_data = paste(label="Paste From Clipboard", key="image_clipboard")
|
|
|
136 |
if image_data is not None:
|
137 |
clipboard_use = True
|
138 |
header, encoded = image_data.split(",", 1)
|
|
|
141 |
uploaded_file = img_stream
|
142 |
|
143 |
# Input from camera
|
144 |
+
camera_file = st.sidebar.camera_input("Capture From Camera:")
|
145 |
if camera_file:
|
146 |
uploaded_file = camera_file
|
147 |
|
|
|
154 |
cleaned_text = ""
|
155 |
polished_text = ""
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
# Display image preview
|
158 |
if uploaded_file:
|
159 |
image = Image.open(uploaded_file)
|
|
|
175 |
result_path = os.path.join(
|
176 |
results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
|
177 |
|
178 |
+
# Display extracted text
|
179 |
+
if 'cleaned_text' not in st.session_state:
|
180 |
+
st.session_state.cleaned_text = ""
|
181 |
+
if 'polished_text' not in st.session_state:
|
182 |
+
st.session_state.polished_text = ""
|
183 |
+
|
184 |
# Handle predictions
|
185 |
if predict_button:
|
186 |
if os.path.exists(result_path):
|
|
|
206 |
extracted_text = extract_text_qwen(
|
207 |
image_path, qwen_model, qwen_processor)
|
208 |
|
209 |
+
cleaned_text = clean_extracted_text(extracted_text)
|
210 |
+
polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
|
211 |
+
"GOT_CPU", "GOT_GPU"] else cleaned_text
|
|
|
|
|
212 |
|
213 |
+
# Save results to JSON file
|
214 |
+
result_data = {"extracted_text": extracted_text,
|
215 |
+
"cleaned_text": cleaned_text, "polished_text": polished_text}
|
216 |
+
with open(result_path, 'w') as f:
|
217 |
+
json.dump(result_data, f)
|
|
|
218 |
|
219 |
+
# Save results to session state
|
220 |
st.session_state.cleaned_text = cleaned_text
|
221 |
st.session_state.polished_text = polished_text
|
222 |
|
|
|
226 |
st.markdown(st.session_state.cleaned_text, unsafe_allow_html=True)
|
227 |
if st.session_state.polished_text:
|
228 |
st.markdown(st.session_state.polished_text, unsafe_allow_html=True)
|
229 |
+
|
230 |
+
# Input search term
|
231 |
+
search_term = st.text_input("Search Keywords (Update live):")
|
232 |
+
|
233 |
+
# Highlight search results in real-time
|
234 |
+
if search_term and st.session_state.cleaned_text:
|
235 |
+
search_keywords = search_term.split()
|
236 |
+
for keyword in search_keywords:
|
237 |
+
# Find all matches of the keyword in the text and apply highlighting
|
238 |
+
matches = re.finditer(re.escape(keyword),
|
239 |
+
st.session_state.cleaned_text, re.IGNORECASE)
|
240 |
+
for match in matches:
|
241 |
+
start, end = match.span()
|
242 |
+
highlight_text(st.session_state.cleaned_text, start, end)
|
243 |
+
|
244 |
+
# Display the highlighted text in the output section
|
245 |
+
col2.subheader("Highlighted Text with Keywords")
|
246 |
+
highlighted_text = text_highlighter(
|
247 |
+
text=st.session_state.cleaned_text,
|
248 |
+
labels=[("KEYWORD", "#ffcc00")], # Color for the highlight
|
249 |
+
annotations=[
|
250 |
+
{"start": match.start(), "end": match.end(), "tag": "KEYWORD"}
|
251 |
+
for keyword in search_keywords
|
252 |
+
for match in re.finditer(re.escape(keyword), st.session_state.cleaned_text, re.IGNORECASE)
|
253 |
+
],
|
254 |
+
)
|
255 |
+
col2.write(highlighted_text, unsafe_allow_html=True)
|