Swekerr commited on
Commit
f6aa2ce
1 Parent(s): f782070

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -10,16 +10,15 @@ import torch
10
  def load_models():
11
  RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
12
  model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
13
- trust_remote_code=True, torch_dtype=torch.float32) # Change to float32 for CPU
14
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
15
  return RAG, model, processor
16
 
17
  RAG, model, processor = load_models()
18
 
19
  # Function for OCR and search
20
- # Skip RAG search and use Qwen2VL for direct OCR
21
  def ocr_and_search(image, keyword):
22
- # Hardcoded query to extract text in English, Sanskrit, and Hindi
23
  text_query = "Extract all the text in Sanskrit and English from the image."
24
 
25
  # Prepare message for Qwen model
@@ -64,9 +63,9 @@ def ocr_and_search(image, keyword):
64
  return extracted_text, matched_sentences, json_output
65
 
66
 
67
- # Gradio App function
68
  def app(image, keyword):
69
- # Call OCR and search function
70
  extracted_text, search_results, json_output = ocr_and_search(image, keyword)
71
 
72
  search_results_str = "\n".join(search_results) if search_results else "No matches found."
@@ -77,7 +76,7 @@ def app(image, keyword):
77
  iface = gr.Interface(
78
  fn=app,
79
  inputs=[
80
- gr.Image(type="pil", label="Upload an Image"), # Corrected to gr.Image
81
  gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
82
  ],
83
  outputs=[
 
10
  def load_models():
11
  RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
12
  model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
13
+ trust_remote_code=True, torch_dtype=torch.float32) # float32 for CPU
14
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
15
  return RAG, model, processor
16
 
17
  RAG, model, processor = load_models()
18
 
19
  # Function for OCR and search
 
20
  def ocr_and_search(image, keyword):
21
+
22
  text_query = "Extract all the text in Sanskrit and English from the image."
23
 
24
  # Prepare message for Qwen model
 
63
  return extracted_text, matched_sentences, json_output
64
 
65
 
66
+ # Gradio App
67
  def app(image, keyword):
68
+
69
  extracted_text, search_results, json_output = ocr_and_search(image, keyword)
70
 
71
  search_results_str = "\n".join(search_results) if search_results else "No matches found."
 
76
  iface = gr.Interface(
77
  fn=app,
78
  inputs=[
79
+ gr.Image(type="pil", label="Upload an Image"),
80
  gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
81
  ],
82
  outputs=[