UniquePratham commited on
Commit
bc75943
·
verified ·
1 Parent(s): acb8143

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -39
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import io
2
  import streamlit as st
3
  from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
4
  from PIL import Image
@@ -6,7 +5,6 @@ import torch
6
  import os
7
  import re
8
  import json
9
- import io
10
  import base64
11
  from groq import Groq
12
  from st_keyup import st_keyup
@@ -26,7 +24,7 @@ def init_got_model():
26
  tokenizer = AutoTokenizer.from_pretrained(
27
  'srimanth-d/GOT_CPU', trust_remote_code=True)
28
  model = AutoModel.from_pretrained(
29
- 'srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
30
  return model.eval(), tokenizer
31
 
32
 
@@ -35,7 +33,7 @@ def init_got_gpu_model():
35
  tokenizer = AutoTokenizer.from_pretrained(
36
  'ucaslcl/GOT-OCR2_0', trust_remote_code=True)
37
  model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True,
38
- device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
39
  return model.eval().cuda(), tokenizer
40
 
41
  # Load Qwen Model
@@ -60,7 +58,7 @@ def clean_extracted_text(text):
60
 
61
 
62
  def polish_text_with_ai(cleaned_text):
63
- prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text. Extracted Text : {cleaned_text}"
64
  client = Groq(
65
  api_key="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")
66
  chat_completion = client.chat.completions.create(
@@ -129,13 +127,12 @@ model_choice = st.sidebar.selectbox(
129
 
130
  # Upload Section
131
  uploaded_file = st.sidebar.file_uploader(
132
- "Choose An Image : ", type=["png", "jpg", "jpeg"])
133
 
134
  # Input from clipboard
135
  # Paste image button
136
  clipboard_use = False
137
- image_data = paste(
138
- label="Paste From Clipboard", key="image_clipboard")
139
  if image_data is not None:
140
  clipboard_use = True
141
  header, encoded = image_data.split(",", 1)
@@ -144,7 +141,7 @@ if image_data is not None:
144
  uploaded_file = img_stream
145
 
146
  # Input from camera
147
- camera_file = st.sidebar.camera_input("Capture From Camera : ")
148
  if camera_file:
149
  uploaded_file = camera_file
150
 
@@ -157,12 +154,6 @@ col1, col2 = st.columns([2, 1])
157
  cleaned_text = ""
158
  polished_text = ""
159
 
160
- # Display extracted text
161
- if 'cleaned_text' not in st.session_state:
162
- st.session_state.cleaned_text = ""
163
- if 'polished_text' not in st.session_state:
164
- st.session_state.polished_text = ""
165
-
166
  # Display image preview
167
  if uploaded_file:
168
  image = Image.open(uploaded_file)
@@ -184,6 +175,12 @@ if uploaded_file:
184
  result_path = os.path.join(
185
  results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
186
 
 
 
 
 
 
 
187
  # Handle predictions
188
  if predict_button:
189
  if os.path.exists(result_path):
@@ -209,20 +206,17 @@ if uploaded_file:
209
  extracted_text = extract_text_qwen(
210
  image_path, qwen_model, qwen_processor)
211
 
212
- # Clean and polish extracted text
213
- if not cleaned_text and polished_text:
214
- cleaned_text = clean_extracted_text(extracted_text)
215
- polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
216
- "GOT_CPU", "GOT_GPU"] else cleaned_text
217
 
218
- # Save results to JSON file
219
- if not os.path.exists(result_path):
220
- result_data = {"extracted_text": extracted_text,
221
- "cleaned_text": cleaned_text, "polished_text": polished_text}
222
- with open(result_path, 'w') as f:
223
- json.dump(result_data, f)
224
 
225
- # Save results to session state
226
  st.session_state.cleaned_text = cleaned_text
227
  st.session_state.polished_text = polished_text
228
 
@@ -232,15 +226,30 @@ if st.session_state.cleaned_text:
232
  st.markdown(st.session_state.cleaned_text, unsafe_allow_html=True)
233
  if st.session_state.polished_text:
234
  st.markdown(st.session_state.polished_text, unsafe_allow_html=True)
235
- # Input search term with real-time update on key press
236
- search_query = st_keyup("Search in extracted text:")
237
-
238
- if search_query:
239
- index = st.session_state.cleaned_text.find(search_query)
240
- start = index
241
- len = len(search_query)
242
- end = index + len
243
- if index != -1:
244
- highlight_text(st.session_state.cleaned_text, start, end)
245
- else:
246
- st.write("No Search Found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
3
  from PIL import Image
 
5
  import os
6
  import re
7
  import json
 
8
  import base64
9
  from groq import Groq
10
  from st_keyup import st_keyup
 
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  'srimanth-d/GOT_CPU', trust_remote_code=True)
26
  model = AutoModel.from_pretrained(
27
+ 'srimanth-d/GOT_CPU', trust_remote_code=True, pad_token_id=tokenizer.eos_token_id)
28
  return model.eval(), tokenizer
29
 
30
 
 
33
  tokenizer = AutoTokenizer.from_pretrained(
34
  'ucaslcl/GOT-OCR2_0', trust_remote_code=True)
35
  model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True,
36
+ device_map='cuda', pad_token_id=tokenizer.eos_token_id)
37
  return model.eval().cuda(), tokenizer
38
 
39
  # Load Qwen Model
 
58
 
59
 
60
  def polish_text_with_ai(cleaned_text):
61
+ prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text. Extracted Text: {cleaned_text}"
62
  client = Groq(
63
  api_key="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")
64
  chat_completion = client.chat.completions.create(
 
127
 
128
  # Upload Section
129
  uploaded_file = st.sidebar.file_uploader(
130
+ "Choose An Image:", type=["png", "jpg", "jpeg"])
131
 
132
  # Input from clipboard
133
  # Paste image button
134
  clipboard_use = False
135
+ image_data = paste(label="Paste From Clipboard", key="image_clipboard")
 
136
  if image_data is not None:
137
  clipboard_use = True
138
  header, encoded = image_data.split(",", 1)
 
141
  uploaded_file = img_stream
142
 
143
  # Input from camera
144
+ camera_file = st.sidebar.camera_input("Capture From Camera:")
145
  if camera_file:
146
  uploaded_file = camera_file
147
 
 
154
  cleaned_text = ""
155
  polished_text = ""
156
 
 
 
 
 
 
 
157
  # Display image preview
158
  if uploaded_file:
159
  image = Image.open(uploaded_file)
 
175
  result_path = os.path.join(
176
  results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
177
 
178
+ # Display extracted text
179
+ if 'cleaned_text' not in st.session_state:
180
+ st.session_state.cleaned_text = ""
181
+ if 'polished_text' not in st.session_state:
182
+ st.session_state.polished_text = ""
183
+
184
  # Handle predictions
185
  if predict_button:
186
  if os.path.exists(result_path):
 
206
  extracted_text = extract_text_qwen(
207
  image_path, qwen_model, qwen_processor)
208
 
209
+ cleaned_text = clean_extracted_text(extracted_text)
210
+ polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
211
+ "GOT_CPU", "GOT_GPU"] else cleaned_text
 
 
212
 
213
+ # Save results to JSON file
214
+ result_data = {"extracted_text": extracted_text,
215
+ "cleaned_text": cleaned_text, "polished_text": polished_text}
216
+ with open(result_path, 'w') as f:
217
+ json.dump(result_data, f)
 
218
 
219
+ # Save results to session state
220
  st.session_state.cleaned_text = cleaned_text
221
  st.session_state.polished_text = polished_text
222
 
 
226
  st.markdown(st.session_state.cleaned_text, unsafe_allow_html=True)
227
  if st.session_state.polished_text:
228
  st.markdown(st.session_state.polished_text, unsafe_allow_html=True)
229
+
230
+ # Input search term
231
+ search_term = st.text_input("Search Keywords (Update live):")
232
+
233
+ # Highlight search results in real-time
234
+ if search_term and st.session_state.cleaned_text:
235
+ search_keywords = search_term.split()
236
+ for keyword in search_keywords:
237
+ # Find all matches of the keyword in the text and apply highlighting
238
+ matches = re.finditer(re.escape(keyword),
239
+ st.session_state.cleaned_text, re.IGNORECASE)
240
+ for match in matches:
241
+ start, end = match.span()
242
+ highlight_text(st.session_state.cleaned_text, start, end)
243
+
244
+ # Display the highlighted text in the output section
245
+ col2.subheader("Highlighted Text with Keywords")
246
+ highlighted_text = text_highlighter(
247
+ text=st.session_state.cleaned_text,
248
+ labels=[("KEYWORD", "#ffcc00")], # Color for the highlight
249
+ annotations=[
250
+ {"start": match.start(), "end": match.end(), "tag": "KEYWORD"}
251
+ for keyword in search_keywords
252
+ for match in re.finditer(re.escape(keyword), st.session_state.cleaned_text, re.IGNORECASE)
253
+ ],
254
+ )
255
+ col2.write(highlighted_text, unsafe_allow_html=True)