Spaces:

prithivMLmods
/

Qwen3-VL-HF-Demo

Running on Zero

prithivMLmods commited on 11 days ago

Commit

6ff172b

verified ·

1 Parent(s): c661b6b

update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -88,22 +88,14 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = "auto"
 qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
-    "Qwen/Qwen3-VL-8B-Instruct",
     dtype=DTYPE,
     device_map=DEVICE,
 ).eval()
 qwen_processor = Qwen3VLProcessor.from_pretrained(
-    "Qwen/Qwen3-VL-8B-Instruct",
 )
-CATEGORIES = ["Query", "Caption", "Point", "Detect"]
-PLACEHOLDERS = {
-    "Query": "What's in this image?",
-    "Caption": "Enter caption length: short, normal, or long",
-    "Point": "Enter the object for keypoint detection (e.g., 'the person's face')",
-    "Detect": "Enter the object to detect (e.g., 'the person')",
-}
 def safe_parse_json(text: str):
     text = text.strip()
     text = re.sub(r"^```(json)?", "", text)
@@ -245,7 +237,6 @@ def on_category_change(category):
         interactive=True,
     )
 def process_inputs(image, category, prompt):
     if image is None:
         raise gr.Error("Please upload an image.")
@@ -259,6 +250,14 @@ def process_inputs(image, category, prompt):
     return qwen_annotated_image, qwen_text
 css="""
 #col-container {
     margin: 0 auto;

 DTYPE = "auto"
 qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen3-VL-4B-Instruct",
     dtype=DTYPE,
     device_map=DEVICE,
 ).eval()
 qwen_processor = Qwen3VLProcessor.from_pretrained(
+    "Qwen/Qwen3-VL-4B-Instruct",
 )
 def safe_parse_json(text: str):
     text = text.strip()
     text = re.sub(r"^```(json)?", "", text)
         interactive=True,
     )
 def process_inputs(image, category, prompt):
     if image is None:
         raise gr.Error("Please upload an image.")
     return qwen_annotated_image, qwen_text
+CATEGORIES = ["Query", "Caption", "Point", "Detect"]
+PLACEHOLDERS = {
+    "Query": "What's in this image?",
+    "Caption": "Enter caption length: short, normal, or long",
+    "Point": "Enter the object for keypoint detection (e.g., 'the person's face')",
+    "Detect": "Enter the object to detect (e.g., 'the person')",
+}
 css="""
 #col-container {
     margin: 0 auto;