Spaces:

prithivMLmods
/

Qwen3-VL-HF-Demo

Running on Zero

App Files Files Community

prithivMLmods commited on 5 days ago

Commit

c7a140a

verified ·

1 Parent(s): ceece79

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -9

app.py CHANGED Viewed

@@ -173,18 +173,14 @@ def annotate_image(image: Image.Image, result: dict):
 @GPU
 def process_qwen(image: Image.Image, category: str, prompt: str):
-    """Processes the input based on the selected category using the Qwen model."""
     if category == "Query":
         return run_qwen_inference(image, prompt), {}
     elif category == "Caption":
         full_prompt = f"Provide a {prompt} length caption for the image."
         return run_qwen_inference(image, full_prompt), {}
     elif category == "Point":
         full_prompt = (
-            f"Provide 2d point coordinates for {prompt}. Report in JSON format like "
-            `[{"point_2d": [x, y]}]` " where coordinates are from 0 to 1000."
         )
         output_text = run_qwen_inference(image, full_prompt)
         parsed_json = safe_parse_json(output_text)
@@ -195,11 +191,9 @@ def process_qwen(image: Image.Image, category: str, prompt: str):
                     x, y = item["point_2d"]
                     points_result["points"].append({"x": x / 1000.0, "y": y / 1000.0})
         return json.dumps(points_result, indent=2), points_result
     elif category == "Detect":
         full_prompt = (
-            f"Provide bounding box coordinates for {prompt}. Report in JSON format like "
-            `[{"bbox_2d": [xmin, ymin, xmax, ymax]}]` " where coordinates are from 0 to 1000."
         )
         output_text = run_qwen_inference(image, full_prompt)
         parsed_json = safe_parse_json(output_text)
@@ -217,7 +211,6 @@ def process_qwen(image: Image.Image, category: str, prompt: str):
                         }
                     )
         return json.dumps(objects_result, indent=2), objects_result
     return "Invalid category", {}

 @GPU
 def process_qwen(image: Image.Image, category: str, prompt: str):
     if category == "Query":
         return run_qwen_inference(image, prompt), {}
     elif category == "Caption":
         full_prompt = f"Provide a {prompt} length caption for the image."
         return run_qwen_inference(image, full_prompt), {}
     elif category == "Point":
         full_prompt = (
+            f"Provide 2d point coordinates for {prompt}. Report in JSON format."
         )
         output_text = run_qwen_inference(image, full_prompt)
         parsed_json = safe_parse_json(output_text)
                     x, y = item["point_2d"]
                     points_result["points"].append({"x": x / 1000.0, "y": y / 1000.0})
         return json.dumps(points_result, indent=2), points_result
     elif category == "Detect":
         full_prompt = (
+            f"Provide bounding box coordinates for {prompt}. Report in JSON format."
         )
         output_text = run_qwen_inference(image, full_prompt)
         parsed_json = safe_parse_json(output_text)
                         }
                     )
         return json.dumps(objects_result, indent=2), objects_result
     return "Invalid category", {}