Spaces:

prithivMLmods
/

Qwen3-VL-Outpost

Running on Zero

App Files Files Community

prithivMLmods commited on 7 days ago

Commit

38b4e1b

verified ·

1 Parent(s): c5713d0

update app

Browse files

Files changed (1) hide show

app.py +6 -31

app.py CHANGED Viewed

@@ -102,24 +102,6 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load Qwen2.5-VL-7B-Instruct
-MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
-processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
-model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_M,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to(device).eval()
-# Load Qwen2.5-VL-3B-Instruct
-MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
-processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
-model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_X,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to(device).eval()
 # Load Qwen3-VL-4B-Instruct
 MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
 processor_q = AutoProcessor.from_pretrained(MODEL_ID_Q, trust_remote_code=True)
@@ -179,11 +161,8 @@ def generate_image(model_name: str, text: str, image: Image.Image,
     """
     Generates responses using the selected model for image input.
     """
-    if model_name == "Qwen2.5-VL-7B-Instruct":
-        processor, model = processor_m, model_m
-    elif model_name == "Qwen2.5-VL-3B-Instruct":
-        processor, model = processor_x, model_x
-    elif model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
@@ -221,11 +200,7 @@ def generate_video(model_name: str, text: str, video_path: str,
     """
     Generates responses using the selected model for video input.
     """
-    if model_name == "Qwen2.5-VL-7B-Instruct":
-        processor, model = processor_m, model_m
-    elif model_name == "Qwen2.5-VL-3B-Instruct":
-        processor, model = processor_x, model_x
-    elif model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
@@ -264,7 +239,7 @@ def generate_video(model_name: str, text: str, video_path: str,
     buffer = ""
     for new_text in streamer:
         buffer += new_text
-        buffer = buffer.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer, buffer
@@ -325,9 +300,9 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                 markdown_output = gr.Markdown()
             model_choice = gr.Radio(
-                choices=["Qwen3-VL-2B-Instruct", "Qwen3-VL-4B-Instruct", "Qwen2.5-VL-7B-Instruct", "Qwen3-VL-8B-Instruct", "Qwen2.5-VL-3B-Instruct"],
                 label="Select Model",
-                value="Qwen2.5-VL-7B-Instruct"
             )
     image_submit.click(

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # Load Qwen3-VL-4B-Instruct
 MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
 processor_q = AutoProcessor.from_pretrained(MODEL_ID_Q, trust_remote_code=True)
     """
     Generates responses using the selected model for image input.
     """
+    if model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
     """
     Generates responses using the selected model for video input.
     """
+    if model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
     buffer = ""
     for new_text in streamer:
         buffer += new_text
+        #buffer = buffer.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer, buffer
                 markdown_output = gr.Markdown()
             model_choice = gr.Radio(
+                choices=["Qwen3-VL-4B-Instruct", "Qwen3-VL-2B-Instruct", "Qwen3-VL-8B-Instruct"],
                 label="Select Model",
+                value="Qwen3-VL-4B-Instruct"
             )
     image_submit.click(