prithivMLmods commited on
Commit
6ff172b
·
verified ·
1 Parent(s): c661b6b

update app

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -88,22 +88,14 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
88
  DTYPE = "auto"
89
 
90
  qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
91
- "Qwen/Qwen3-VL-8B-Instruct",
92
  dtype=DTYPE,
93
  device_map=DEVICE,
94
  ).eval()
95
  qwen_processor = Qwen3VLProcessor.from_pretrained(
96
- "Qwen/Qwen3-VL-8B-Instruct",
97
  )
98
 
99
- CATEGORIES = ["Query", "Caption", "Point", "Detect"]
100
- PLACEHOLDERS = {
101
- "Query": "What's in this image?",
102
- "Caption": "Enter caption length: short, normal, or long",
103
- "Point": "Enter the object for keypoint detection (e.g., 'the person's face')",
104
- "Detect": "Enter the object to detect (e.g., 'the person')",
105
- }
106
-
107
  def safe_parse_json(text: str):
108
  text = text.strip()
109
  text = re.sub(r"^```(json)?", "", text)
@@ -245,7 +237,6 @@ def on_category_change(category):
245
  interactive=True,
246
  )
247
 
248
-
249
  def process_inputs(image, category, prompt):
250
  if image is None:
251
  raise gr.Error("Please upload an image.")
@@ -259,6 +250,14 @@ def process_inputs(image, category, prompt):
259
 
260
  return qwen_annotated_image, qwen_text
261
 
 
 
 
 
 
 
 
 
262
  css="""
263
  #col-container {
264
  margin: 0 auto;
 
88
  DTYPE = "auto"
89
 
90
  qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
91
+ "Qwen/Qwen3-VL-4B-Instruct",
92
  dtype=DTYPE,
93
  device_map=DEVICE,
94
  ).eval()
95
  qwen_processor = Qwen3VLProcessor.from_pretrained(
96
+ "Qwen/Qwen3-VL-4B-Instruct",
97
  )
98
 
 
 
 
 
 
 
 
 
99
  def safe_parse_json(text: str):
100
  text = text.strip()
101
  text = re.sub(r"^```(json)?", "", text)
 
237
  interactive=True,
238
  )
239
 
 
240
  def process_inputs(image, category, prompt):
241
  if image is None:
242
  raise gr.Error("Please upload an image.")
 
250
 
251
  return qwen_annotated_image, qwen_text
252
 
253
+ CATEGORIES = ["Query", "Caption", "Point", "Detect"]
254
+ PLACEHOLDERS = {
255
+ "Query": "What's in this image?",
256
+ "Caption": "Enter caption length: short, normal, or long",
257
+ "Point": "Enter the object for keypoint detection (e.g., 'the person's face')",
258
+ "Detect": "Enter the object to detect (e.g., 'the person')",
259
+ }
260
+
261
  css="""
262
  #col-container {
263
  margin: 0 auto;