Spaces:

RufusRubin777
/

Qwen2VL-OCR_CPU

Runtime error

App Files Files Community

RufusRubin777 commited on Sep 27

Commit

d89f6ab

•

1 Parent(s): c39c19e

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -33

app.py CHANGED Viewed

@@ -5,12 +5,12 @@ from byaldi import RAGMultiModalModel
 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 import torch
 # Load models
 def load_models():
     RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
-    model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
-                                                            trust_remote_code=True, torch_dtype=torch.float32)  # float32 for CPU
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
     return RAG, model, processor
@@ -18,7 +18,6 @@ RAG, model, processor = load_models()
 # Function for OCR and search
 def ocr_and_search(image, keyword):
     text_query = "Extract all the text in Sanskrit and English from the image."
     # Prepare message for Qwen model
@@ -42,53 +41,49 @@ def ocr_and_search(image, keyword):
         padding=True,
         return_tensors="pt",
     ).to("cpu")  # Use CPU
     # Generate text
     with torch.no_grad():
         generated_ids = model.generate(**inputs, max_new_tokens=2000)
-        generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
-        extracted_text = processor.batch_decode(
-            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )[0]
-    # Save extracted text to JSON
-    output_json = {"query": text_query, "extracted_text": extracted_text}
-    # json_output = json.dumps(output_json, ensure_ascii=False, indent=4)
-    gr.Textbox(label= extracted_text)
-    # Perform keyword search
     keyword_lower = keyword.lower()
     sentences = extracted_text.split('. ')
-    matched_sentences = [sentence for sentence in sentences if keyword_lower in sentence.lower()]
-    gr.Textbox(label= matched_sentences)
-    return extracted_text, matched_sentences #, json_output
-# Gradio App
 def app(image, keyword):
     extracted_text, search_results = ocr_and_search(image, keyword)
-    search_results_str = "\n".join(search_results) if search_results else "No matches found."
-    return extracted_text, search_results_str #, json_output
 # Gradio Interface
 iface = gr.Interface(
-    fn=app,
     inputs=[
-        gr.Image(type="pil", label="Upload an Image"),
         gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
-    ],
     outputs=[
         gr.Textbox(label="Extracted Text"),
-        gr.Textbox(label="Search Results"),
-        # gr.JSON(label="JSON Output")
     ],
     title="OCR and Keyword Search in Images",
 )

 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 import torch
+import re
 # Load models
 def load_models():
     RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
+    model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype=torch.float32)  # float32 for CPU
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
     return RAG, model, processor
 # Function for OCR and search
 def ocr_and_search(image, keyword):
     text_query = "Extract all the text in Sanskrit and English from the image."
     # Prepare message for Qwen model
         padding=True,
         return_tensors="pt",
     ).to("cpu")  # Use CPU
     # Generate text
     with torch.no_grad():
         generated_ids = model.generate(**inputs, max_new_tokens=2000)
+    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+    extracted_text = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False
+    )[0]
+    # Perform keyword search with highlighting
     keyword_lower = keyword.lower()
     sentences = extracted_text.split('. ')
+    matched_sentences = []
+    for sentence in sentences:
+        if keyword_lower in sentence.lower():
+            highlighted_sentence = re.sub(
+                f'({re.escape(keyword)})',
+                r'<mark>\1</mark>',
+                sentence,
+                flags=re.IGNORECASE
+            )
+            matched_sentences.append(highlighted_sentence)
+    return extracted_text, matched_sentences
+# Gradio App
 def app(image, keyword):
     extracted_text, search_results = ocr_and_search(image, keyword)
+    search_results_str = "<br>".join(search_results) if search_results else "No matches found."
+    return extracted_text, search_results_str
 # Gradio Interface
 iface = gr.Interface(
+    fn=app,
     inputs=[
+        gr.Image(type="pil", label="Upload an Image"),
         gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
+    ],
     outputs=[
         gr.Textbox(label="Extracted Text"),
+        gr.HTML(label="Search Results"),
     ],
     title="OCR and Keyword Search in Images",
 )