Qwen2.5-VL-7B-Instruct

Runtime error

App Files Files Community

tuandunghcmut commited on May 21

Commit

398fce5

1 Parent(s): 412dc28

update

Browse files

Files changed (2) hide show

app.py +124 -22
models.py +49 -0

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import spaces
 import cv2
 import numpy as np
 from PIL import Image
 def progress_bar_html(label: str) -> str:
     """
@@ -54,16 +55,49 @@ def downsample_video(video_path):
     vidcap.release()
     return frames
-MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"  # Alternatively: "Qwen/Qwen2.5-VL-3B-Instruct"
-processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
-model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID,
-    trust_remote_code=True,
-    torch_dtype=torch.bfloat16
-).to("cuda").eval()
 @spaces.GPU
-def model_inference(input_dict, history):
     text = input_dict["text"]
     files = input_dict["files"]
@@ -102,11 +136,18 @@ def model_inference(input_dict, history):
         ).to("cuda")
         # Set up streaming generation.
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-        generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
         thread.start()
         buffer = ""
-        yield progress_bar_html("Processing video with Qwen2.5VL Model")
         for new_text in streamer:
             buffer += new_text
             time.sleep(0.01)
@@ -144,11 +185,18 @@ def model_inference(input_dict, history):
         padding=True,
     ).to("cuda")
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     buffer = ""
-    yield progress_bar_html("Processing with Qwen2.5VL Model")
     for new_text in streamer:
         buffer += new_text
         time.sleep(0.01)
@@ -161,15 +209,69 @@ examples = [
     [{"text": "@video-infer Explain the content of the video.", "files": ["example_images/sky.mp4"]}],
 ]
-demo = gr.ChatInterface(
-    fn=model_inference,
-    description="# **Qwen2.5 Series (add `@video-infer` for video understanding)**",
-    examples=examples,
-    fill_height=True,
-    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),
-    stop_btn="Stop Generation",
-    multimodal=True,
-    cache_examples=False,
-)
 demo.launch(debug=True)

 import cv2
 import numpy as np
 from PIL import Image
+from models import get_model_list, get_model_info, DEFAULT_GENERATION_PARAMS
 def progress_bar_html(label: str) -> str:
     """
     vidcap.release()
     return frames
+# Initial model will be loaded when the first request comes in
+processor = None
+model = None
+current_model_name = None
+def load_model(model_name):
+    """
+    Loads the model and processor based on the model name.
+    Returns the model and processor.
+    """
+    global processor, model, current_model_name
+    # If the model is already loaded, return it
+    if model is not None and current_model_name == model_name:
+        return model, processor
+    # Get model info
+    model_info = get_model_info(model_name)
+    MODEL_ID = model_info["id"]
+    # Set dtype based on model info
+    dtype = getattr(torch, model_info["dtype"])
+    # Load processor and model
+    processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        MODEL_ID,
+        trust_remote_code=True,
+        torch_dtype=dtype
+    ).to(model_info["device"]).eval()
+    # Update current model name
+    current_model_name = model_name
+    return model, processor
 @spaces.GPU
+def model_inference(input_dict, history, model_name, temperature=DEFAULT_GENERATION_PARAMS["temperature"],
+                   top_p=DEFAULT_GENERATION_PARAMS["top_p"], top_k=DEFAULT_GENERATION_PARAMS["top_k"],
+                   max_new_tokens=DEFAULT_GENERATION_PARAMS["max_new_tokens"]):
+    # Load the selected model
+    model, processor = load_model(model_name)
     text = input_dict["text"]
     files = input_dict["files"]
         ).to("cuda")
         # Set up streaming generation.
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = dict(
+            inputs,
+            streamer=streamer,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k
+        )
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
         thread.start()
         buffer = ""
+        yield progress_bar_html(f"Processing video with {model_name}")
         for new_text in streamer:
             buffer += new_text
             time.sleep(0.01)
         padding=True,
     ).to("cuda")
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(
+        inputs,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k
+    )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     buffer = ""
+    yield progress_bar_html(f"Processing with {model_name}")
     for new_text in streamer:
         buffer += new_text
         time.sleep(0.01)
     [{"text": "@video-infer Explain the content of the video.", "files": ["example_images/sky.mp4"]}],
 ]
+def create_interface():
+    # Get the list of available models
+    model_options = get_model_list()
+    with gr.Blocks() as demo:
+        gr.Markdown("# **Qwen2.5 Series (add `@video-infer` for video understanding)**")
+        with gr.Accordion("Model Settings", open=True):
+            with gr.Row():
+                model_dropdown = gr.Dropdown(
+                    choices=model_options,
+                    value=model_options[0],
+                    label="Select Model"
+                )
+            with gr.Row():
+                temperature = gr.Slider(
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=DEFAULT_GENERATION_PARAMS["temperature"],
+                    step=0.1,
+                    label="Temperature",
+                    info="Higher values produce more diverse outputs"
+                )
+                top_p = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=DEFAULT_GENERATION_PARAMS["top_p"],
+                    step=0.05,
+                    label="Top P",
+                    info="Nucleus sampling: limit sampling to top P% of probability mass"
+                )
+            with gr.Row():
+                top_k = gr.Slider(
+                    minimum=1,
+                    maximum=100,
+                    value=DEFAULT_GENERATION_PARAMS["top_k"],
+                    step=1,
+                    label="Top K",
+                    info="Limit sampling to top K most likely tokens"
+                )
+                max_tokens = gr.Slider(
+                    minimum=64,
+                    maximum=2048,
+                    value=DEFAULT_GENERATION_PARAMS["max_new_tokens"],
+                    step=64,
+                    label="Max New Tokens",
+                    info="Maximum number of tokens to generate"
+                )
+        chatbot = gr.ChatInterface(
+            fn=model_inference,
+            additional_inputs=[model_dropdown, temperature, top_p, top_k, max_tokens],
+            examples=examples,
+            fill_height=True,
+            textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),
+            stop_btn="Stop Generation",
+            multimodal=True,
+            cache_examples=False,
+        )
+    return demo
+demo = create_interface()
 demo.launch(debug=True)

models.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+Module containing model recommendations and configurations for the Qwen2.5 VL application.
+"""
+# Dictionary of recommended models with their specifications
+RECOMMENDED_MODELS = {
+    "Qwen2.5-VL-7B-Instruct": {
+        "id": "Qwen/Qwen2.5-VL-7B-Instruct",
+        "description": "7B parameter vision-language model with instruction tuning",
+        "dtype": "bfloat16",
+        "device": "cuda"
+    },
+    "Qwen2.5-VL-3B-Instruct": {
+        "id": "Qwen/Qwen2.5-VL-3B-Instruct",
+        "description": "3B parameter vision-language model with instruction tuning",
+        "dtype": "bfloat16",
+        "device": "cuda"
+    }
+}
+# Default generation parameters
+DEFAULT_GENERATION_PARAMS = {
+    "max_new_tokens": 1024,
+    "temperature": 0.7,
+    "top_p": 0.9,
+    "top_k": 50,
+    "repetition_penalty": 1.0
+}
+def get_model_info(model_name):
+    """
+    Returns the model information for a given model name.
+    Args:
+        model_name (str): Name of the model
+    Returns:
+        dict: Model specifications
+    """
+    return RECOMMENDED_MODELS.get(model_name, RECOMMENDED_MODELS["Qwen2.5-VL-7B-Instruct"])
+def get_model_list():
+    """
+    Returns a list of available models for selection.
+    Returns:
+        list: List of model names
+    """
+    return list(RECOMMENDED_MODELS.keys())