Spaces:

UniquePratham
/

DualTextOCRFusion

Sleeping

App Files Files Community

UniquePratham commited on Sep 30, 2024

Commit

bc75943

verified ·

1 Parent(s): acb8143

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -39

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import io
 import streamlit as st
 from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
 from PIL import Image
@@ -6,7 +5,6 @@ import torch
 import os
 import re
 import json
-import io
 import base64
 from groq import Groq
 from st_keyup import st_keyup
@@ -26,7 +24,7 @@ def init_got_model():
     tokenizer = AutoTokenizer.from_pretrained(
         'srimanth-d/GOT_CPU', trust_remote_code=True)
     model = AutoModel.from_pretrained(
-        'srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
     return model.eval(), tokenizer
@@ -35,7 +33,7 @@ def init_got_gpu_model():
     tokenizer = AutoTokenizer.from_pretrained(
         'ucaslcl/GOT-OCR2_0', trust_remote_code=True)
     model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True,
-                                      device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
     return model.eval().cuda(), tokenizer
 # Load Qwen Model
@@ -60,7 +58,7 @@ def clean_extracted_text(text):
 def polish_text_with_ai(cleaned_text):
-    prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text.  Extracted Text : {cleaned_text}"
     client = Groq(
         api_key="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")
     chat_completion = client.chat.completions.create(
@@ -129,13 +127,12 @@ model_choice = st.sidebar.selectbox(
 # Upload Section
 uploaded_file = st.sidebar.file_uploader(
-    "Choose An Image : ", type=["png", "jpg", "jpeg"])
 # Input from clipboard
 # Paste image button
 clipboard_use = False
-image_data = paste(
-    label="Paste From Clipboard", key="image_clipboard")
 if image_data is not None:
     clipboard_use = True
     header, encoded = image_data.split(",", 1)
@@ -144,7 +141,7 @@ if image_data is not None:
     uploaded_file = img_stream
 # Input from camera
-camera_file = st.sidebar.camera_input("Capture From Camera : ")
 if camera_file:
     uploaded_file = camera_file
@@ -157,12 +154,6 @@ col1, col2 = st.columns([2, 1])
 cleaned_text = ""
 polished_text = ""
-# Display extracted text
-if 'cleaned_text' not in st.session_state:
-    st.session_state.cleaned_text = ""
-if 'polished_text' not in st.session_state:
-    st.session_state.polished_text = ""
 # Display image preview
 if uploaded_file:
     image = Image.open(uploaded_file)
@@ -184,6 +175,12 @@ if uploaded_file:
     result_path = os.path.join(
         results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
     # Handle predictions
     if predict_button:
         if os.path.exists(result_path):
@@ -209,20 +206,17 @@ if uploaded_file:
                     extracted_text = extract_text_qwen(
                         image_path, qwen_model, qwen_processor)
-        # Clean and polish extracted text
-        if not cleaned_text and polished_text:
-            cleaned_text = clean_extracted_text(extracted_text)
-            polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
-                "GOT_CPU", "GOT_GPU"] else cleaned_text
-        # Save results to JSON file
-        if not os.path.exists(result_path):
-            result_data = {"extracted_text": extracted_text,
-                           "cleaned_text": cleaned_text, "polished_text": polished_text}
-            with open(result_path, 'w') as f:
-                json.dump(result_data, f)
-         # Save results to session state
         st.session_state.cleaned_text = cleaned_text
         st.session_state.polished_text = polished_text
@@ -232,15 +226,30 @@ if st.session_state.cleaned_text:
     st.markdown(st.session_state.cleaned_text, unsafe_allow_html=True)
 if st.session_state.polished_text:
     st.markdown(st.session_state.polished_text, unsafe_allow_html=True)
-# Input search term with real-time update on key press
-search_query = st_keyup("Search in extracted text:")
-if search_query:
-    index = st.session_state.cleaned_text.find(search_query)
-    start = index
-    len = len(search_query)
-    end = index + len
-    if index != -1:
-        highlight_text(st.session_state.cleaned_text, start, end)
-    else:
-        st.write("No Search Found.")

 import streamlit as st
 from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
 from PIL import Image
 import os
 import re
 import json
 import base64
 from groq import Groq
 from st_keyup import st_keyup
     tokenizer = AutoTokenizer.from_pretrained(
         'srimanth-d/GOT_CPU', trust_remote_code=True)
     model = AutoModel.from_pretrained(
+        'srimanth-d/GOT_CPU', trust_remote_code=True, pad_token_id=tokenizer.eos_token_id)
     return model.eval(), tokenizer
     tokenizer = AutoTokenizer.from_pretrained(
         'ucaslcl/GOT-OCR2_0', trust_remote_code=True)
     model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True,
+                                      device_map='cuda', pad_token_id=tokenizer.eos_token_id)
     return model.eval().cuda(), tokenizer
 # Load Qwen Model
 def polish_text_with_ai(cleaned_text):
+    prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text. Extracted Text: {cleaned_text}"
     client = Groq(
         api_key="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")
     chat_completion = client.chat.completions.create(
 # Upload Section
 uploaded_file = st.sidebar.file_uploader(
+    "Choose An Image:", type=["png", "jpg", "jpeg"])
 # Input from clipboard
 # Paste image button
 clipboard_use = False
+image_data = paste(label="Paste From Clipboard", key="image_clipboard")
 if image_data is not None:
     clipboard_use = True
     header, encoded = image_data.split(",", 1)
     uploaded_file = img_stream
 # Input from camera
+camera_file = st.sidebar.camera_input("Capture From Camera:")
 if camera_file:
     uploaded_file = camera_file
 cleaned_text = ""
 polished_text = ""
 # Display image preview
 if uploaded_file:
     image = Image.open(uploaded_file)
     result_path = os.path.join(
         results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
+    # Display extracted text
+    if 'cleaned_text' not in st.session_state:
+        st.session_state.cleaned_text = ""
+    if 'polished_text' not in st.session_state:
+        st.session_state.polished_text = ""
     # Handle predictions
     if predict_button:
         if os.path.exists(result_path):
                     extracted_text = extract_text_qwen(
                         image_path, qwen_model, qwen_processor)
+                cleaned_text = clean_extracted_text(extracted_text)
+                polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
+                    "GOT_CPU", "GOT_GPU"] else cleaned_text
+                # Save results to JSON file
+                result_data = {"extracted_text": extracted_text,
+                               "cleaned_text": cleaned_text, "polished_text": polished_text}
+                with open(result_path, 'w') as f:
+                    json.dump(result_data, f)
+        # Save results to session state
         st.session_state.cleaned_text = cleaned_text
         st.session_state.polished_text = polished_text
     st.markdown(st.session_state.cleaned_text, unsafe_allow_html=True)
 if st.session_state.polished_text:
     st.markdown(st.session_state.polished_text, unsafe_allow_html=True)
+# Input search term
+search_term = st.text_input("Search Keywords (Update live):")
+# Highlight search results in real-time
+if search_term and st.session_state.cleaned_text:
+    search_keywords = search_term.split()
+    for keyword in search_keywords:
+        # Find all matches of the keyword in the text and apply highlighting
+        matches = re.finditer(re.escape(keyword),
+                              st.session_state.cleaned_text, re.IGNORECASE)
+        for match in matches:
+            start, end = match.span()
+            highlight_text(st.session_state.cleaned_text, start, end)
+    # Display the highlighted text in the output section
+    col2.subheader("Highlighted Text with Keywords")
+    highlighted_text = text_highlighter(
+        text=st.session_state.cleaned_text,
+        labels=[("KEYWORD", "#ffcc00")],  # Color for the highlight
+        annotations=[
+            {"start": match.start(), "end": match.end(), "tag": "KEYWORD"}
+            for keyword in search_keywords
+            for match in re.finditer(re.escape(keyword), st.session_state.cleaned_text, re.IGNORECASE)
+        ],
+    )
+    col2.write(highlighted_text, unsafe_allow_html=True)