aiqtech commited on
Commit
bf957c3
1 Parent(s): 36163a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -39
app.py CHANGED
@@ -5,16 +5,27 @@ import torch
5
  import gradio as gr
6
  from threading import Thread
7
  from PIL import Image
8
- from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
9
- from qwen_vl_utils import process_vision_info
10
 
11
  # Model and processor initialization
12
- model = Qwen2VLForConditionalGeneration.from_pretrained(
13
- "Qwen/QVQ-72B-Preview",
14
- torch_dtype="auto",
15
- device_map="auto"
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
- processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
18
 
19
  # Footer
20
  footer = """
@@ -38,47 +49,21 @@ def process_image(image, text_input=None):
38
  messages = [
39
  {
40
  "role": "system",
41
- "content": [
42
- {"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
43
- ],
44
  },
45
  {
46
  "role": "user",
47
  "content": [
48
- {"type": "image", "image": image},
49
- {"type": "text", "text": text_input}
50
- ],
51
  }
52
  ]
53
 
54
  # Process inputs
55
- text = processor.apply_chat_template(
56
- messages,
57
- tokenize=False,
58
- add_generation_prompt=True
59
- )
60
- image_inputs, video_inputs = process_vision_info(messages)
61
- inputs = processor(
62
- text=[text],
63
- images=image_inputs,
64
- videos=video_inputs,
65
- padding=True,
66
- return_tensors="pt",
67
- )
68
- inputs = inputs.to("cuda")
69
-
70
- # Generate response
71
- generated_ids = model.generate(**inputs, max_new_tokens=8192)
72
- generated_ids_trimmed = [
73
- out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
74
- ]
75
- output_text = processor.batch_decode(
76
- generated_ids_trimmed,
77
- skip_special_tokens=True,
78
- clean_up_tokenization_spaces=False
79
- )[0]
80
 
81
- return output_text
82
  except Exception as e:
83
  return f"Error processing image: {str(e)}"
84
 
 
5
  import gradio as gr
6
  from threading import Thread
7
  from PIL import Image
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
 
9
 
10
  # Model and processor initialization
11
+ model_name = "Qwen/QVQ-72B-Preview"
12
+
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ model_name,
15
+ trust_remote_code=True,
16
+ device_map="auto",
17
+ torch_dtype=torch.float16
18
+ )
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained(
21
+ model_name,
22
+ trust_remote_code=True
23
+ )
24
+
25
+ processor = AutoProcessor.from_pretrained(
26
+ model_name,
27
+ trust_remote_code=True
28
  )
 
29
 
30
  # Footer
31
  footer = """
 
49
  messages = [
50
  {
51
  "role": "system",
52
+ "content": "You are a helpful and harmless assistant."
 
 
53
  },
54
  {
55
  "role": "user",
56
  "content": [
57
+ {"image": image},
58
+ {"text": text_input}
59
+ ]
60
  }
61
  ]
62
 
63
  # Process inputs
64
+ response = model.chat(tokenizer, messages)
65
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
 
67
  except Exception as e:
68
  return f"Error processing image: {str(e)}"
69