Multimodal-OCR

Paused

App Files Files Community

prithivMLmods commited on Jan 28

Commit

91cda81

verified ·

1 Parent(s): 5d63d59

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -52

app.py CHANGED Viewed

@@ -14,31 +14,28 @@ MODEL_OPTIONS = {
     "Text Analogy Ocrtest": "prithivMLmods/Qwen2-VL-Ocrtest-2B-Instruct"
 }
-# Global variables for model and processor
-model = None
-processor = None
-# Function to load the selected model
-def load_model(model_name):
-    global model, processor
-    model_id = MODEL_OPTIONS[model_name]
-    print(f"Loading model: {model_id}")
-    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-    model = Qwen2VLForConditionalGeneration.from_pretrained(
-        model_id,
-        trust_remote_code=True,
-        torch_dtype=torch.float16
-    ).to("cuda").eval()
-    print(f"Model {model_id} loaded successfully!")
-    return f"Model {model_name} loaded!"
 @spaces.GPU
-def model_inference(input_dict, history, model_choice):
     global model, processor
-    # Load the selected model if not already loaded
-    if model is None or processor is None:
-        load_model(model_choice)
     text = input_dict["text"]
     files = input_dict["files"]
@@ -107,35 +104,21 @@ examples = [
     [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
 ]
-# Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# **Qwen2.5-VL-3B-Instruct**")
-    # Model selection dropdown
-    model_choice = gr.Dropdown(
-        label="Model Selection",
-        choices=list(MODEL_OPTIONS.keys()),
-        value="Latex OCR"
-    )
-    # Load model button
-    load_model_btn = gr.Button("Load Model")
-    load_model_output = gr.Textbox(label="Model Load Status")
-    # Chat interface
-    chat_interface = gr.ChatInterface(
-        fn=model_inference,
-        description="Interact with the selected Qwen2-VL model.",
-        examples=examples,
-        textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
-        stop_btn="Stop Generation",
-        multimodal=True,
-        cache_examples=False,
-        additional_inputs=[model_choice]  # Pass model_choice as an additional input
-    )
-    # Link the load model button to the load_model function
-    load_model_btn.click(load_model, inputs=model_choice, outputs=load_model_output)
-# Launch the demo
 demo.launch(debug=True)

     "Text Analogy Ocrtest": "prithivMLmods/Qwen2-VL-Ocrtest-2B-Instruct"
 }
+# Default model setup
+current_model_id = MODEL_OPTIONS["Latex OCR"]
+processor = AutoProcessor.from_pretrained(current_model_id, trust_remote_code=True)
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    current_model_id,
+    trust_remote_code=True,
+    torch_dtype=torch.float16
+).to("cuda").eval()
 @spaces.GPU
+def model_inference(input_dict, history, model_id):
     global model, processor
+    # Reload the model and processor if the model selection changes
+    if model_id != current_model_id:
+        current_model_id = model_id
+        processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+        model = Qwen2VLForConditionalGeneration.from_pretrained(
+            model_id,
+            trust_remote_code=True,
+            torch_dtype=torch.float16
+        ).to("cuda").eval()
     text = input_dict["text"]
     files = input_dict["files"]
     [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
 ]
+# Gradio components
+model_choice = gr.Dropdown(
+    label="Model Selection",
+    choices=list(MODEL_OPTIONS.keys()),
+    value="Latex OCR"
+)
+demo = gr.ChatInterface(
+    fn=lambda inputs, history: model_inference(inputs, history, MODEL_OPTIONS[model_choice.value]),
+    description="# **Qwen2.5-VL-3B-Instruct**",
+    examples=examples,
+    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
+    stop_btn="Stop Generation",
+    multimodal=True,
+    cache_examples=False,
+)
 demo.launch(debug=True)