Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -88,22 +88,14 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 88 |
DTYPE = "auto"
|
| 89 |
|
| 90 |
qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 91 |
-
"Qwen/Qwen3-VL-
|
| 92 |
dtype=DTYPE,
|
| 93 |
device_map=DEVICE,
|
| 94 |
).eval()
|
| 95 |
qwen_processor = Qwen3VLProcessor.from_pretrained(
|
| 96 |
-
"Qwen/Qwen3-VL-
|
| 97 |
)
|
| 98 |
|
| 99 |
-
CATEGORIES = ["Query", "Caption", "Point", "Detect"]
|
| 100 |
-
PLACEHOLDERS = {
|
| 101 |
-
"Query": "What's in this image?",
|
| 102 |
-
"Caption": "Enter caption length: short, normal, or long",
|
| 103 |
-
"Point": "Enter the object for keypoint detection (e.g., 'the person's face')",
|
| 104 |
-
"Detect": "Enter the object to detect (e.g., 'the person')",
|
| 105 |
-
}
|
| 106 |
-
|
| 107 |
def safe_parse_json(text: str):
|
| 108 |
text = text.strip()
|
| 109 |
text = re.sub(r"^```(json)?", "", text)
|
|
@@ -245,7 +237,6 @@ def on_category_change(category):
|
|
| 245 |
interactive=True,
|
| 246 |
)
|
| 247 |
|
| 248 |
-
|
| 249 |
def process_inputs(image, category, prompt):
|
| 250 |
if image is None:
|
| 251 |
raise gr.Error("Please upload an image.")
|
|
@@ -259,6 +250,14 @@ def process_inputs(image, category, prompt):
|
|
| 259 |
|
| 260 |
return qwen_annotated_image, qwen_text
|
| 261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
css="""
|
| 263 |
#col-container {
|
| 264 |
margin: 0 auto;
|
|
|
|
| 88 |
DTYPE = "auto"
|
| 89 |
|
| 90 |
qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 91 |
+
"Qwen/Qwen3-VL-4B-Instruct",
|
| 92 |
dtype=DTYPE,
|
| 93 |
device_map=DEVICE,
|
| 94 |
).eval()
|
| 95 |
qwen_processor = Qwen3VLProcessor.from_pretrained(
|
| 96 |
+
"Qwen/Qwen3-VL-4B-Instruct",
|
| 97 |
)
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
def safe_parse_json(text: str):
|
| 100 |
text = text.strip()
|
| 101 |
text = re.sub(r"^```(json)?", "", text)
|
|
|
|
| 237 |
interactive=True,
|
| 238 |
)
|
| 239 |
|
|
|
|
| 240 |
def process_inputs(image, category, prompt):
|
| 241 |
if image is None:
|
| 242 |
raise gr.Error("Please upload an image.")
|
|
|
|
| 250 |
|
| 251 |
return qwen_annotated_image, qwen_text
|
| 252 |
|
| 253 |
+
CATEGORIES = ["Query", "Caption", "Point", "Detect"]
|
| 254 |
+
PLACEHOLDERS = {
|
| 255 |
+
"Query": "What's in this image?",
|
| 256 |
+
"Caption": "Enter caption length: short, normal, or long",
|
| 257 |
+
"Point": "Enter the object for keypoint detection (e.g., 'the person's face')",
|
| 258 |
+
"Detect": "Enter the object to detect (e.g., 'the person')",
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
css="""
|
| 262 |
#col-container {
|
| 263 |
margin: 0 auto;
|