Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,16 +10,15 @@ import torch
|
|
10 |
def load_models():
|
11 |
RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
|
12 |
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
|
13 |
-
trust_remote_code=True, torch_dtype=torch.float32) #
|
14 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
|
15 |
return RAG, model, processor
|
16 |
|
17 |
RAG, model, processor = load_models()
|
18 |
|
19 |
# Function for OCR and search
|
20 |
-
# Skip RAG search and use Qwen2VL for direct OCR
|
21 |
def ocr_and_search(image, keyword):
|
22 |
-
|
23 |
text_query = "Extract all the text in Sanskrit and English from the image."
|
24 |
|
25 |
# Prepare message for Qwen model
|
@@ -64,9 +63,9 @@ def ocr_and_search(image, keyword):
|
|
64 |
return extracted_text, matched_sentences, json_output
|
65 |
|
66 |
|
67 |
-
# Gradio App
|
68 |
def app(image, keyword):
|
69 |
-
|
70 |
extracted_text, search_results, json_output = ocr_and_search(image, keyword)
|
71 |
|
72 |
search_results_str = "\n".join(search_results) if search_results else "No matches found."
|
@@ -77,7 +76,7 @@ def app(image, keyword):
|
|
77 |
iface = gr.Interface(
|
78 |
fn=app,
|
79 |
inputs=[
|
80 |
-
gr.Image(type="pil", label="Upload an Image"),
|
81 |
gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
|
82 |
],
|
83 |
outputs=[
|
|
|
10 |
def load_models():
|
11 |
RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
|
12 |
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
|
13 |
+
trust_remote_code=True, torch_dtype=torch.float32) # float32 for CPU
|
14 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
|
15 |
return RAG, model, processor
|
16 |
|
17 |
RAG, model, processor = load_models()
|
18 |
|
19 |
# Function for OCR and search
|
|
|
20 |
def ocr_and_search(image, keyword):
|
21 |
+
|
22 |
text_query = "Extract all the text in Sanskrit and English from the image."
|
23 |
|
24 |
# Prepare message for Qwen model
|
|
|
63 |
return extracted_text, matched_sentences, json_output
|
64 |
|
65 |
|
66 |
+
# Gradio App
|
67 |
def app(image, keyword):
|
68 |
+
|
69 |
extracted_text, search_results, json_output = ocr_and_search(image, keyword)
|
70 |
|
71 |
search_results_str = "\n".join(search_results) if search_results else "No matches found."
|
|
|
76 |
iface = gr.Interface(
|
77 |
fn=app,
|
78 |
inputs=[
|
79 |
+
gr.Image(type="pil", label="Upload an Image"),
|
80 |
gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
|
81 |
],
|
82 |
outputs=[
|