davanstrien HF staff commited on
Commit
3f53d8e
β€’
1 Parent(s): 2b7d2f5
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  import spaces
2
  import gradio as gr
3
 
@@ -11,6 +18,7 @@ from typing import Tuple
11
 
12
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
13
 
 
14
  model = Qwen2VLForConditionalGeneration.from_pretrained(
15
  "Qwen/Qwen2-VL-7B-Instruct",
16
  torch_dtype=torch.bfloat16,
@@ -93,14 +101,13 @@ def _prep_data_for_input(image):
93
 
94
  image_inputs, video_inputs = process_vision_info(messages)
95
 
96
- inputs = processor(
97
  text=[text],
98
  images=image_inputs,
99
  videos=video_inputs,
100
  padding=True,
101
  return_tensors="pt",
102
  )
103
- return inputs
104
 
105
 
106
  @spaces.GPU
@@ -120,8 +127,7 @@ def generate_response(image):
120
  clean_up_tokenization_spaces=False,
121
  )
122
  try:
123
- data = json.loads(output_text[0])
124
- return data
125
  except Exception:
126
  return {}
127
 
 
1
+ import subprocess # πŸ₯²
2
+
3
+ subprocess.run(
4
+ "pip install flash-attn --no-build-isolation",
5
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
6
+ shell=True,
7
+ )
8
  import spaces
9
  import gradio as gr
10
 
 
18
 
19
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
20
 
21
+
22
  model = Qwen2VLForConditionalGeneration.from_pretrained(
23
  "Qwen/Qwen2-VL-7B-Instruct",
24
  torch_dtype=torch.bfloat16,
 
101
 
102
  image_inputs, video_inputs = process_vision_info(messages)
103
 
104
+ return processor(
105
  text=[text],
106
  images=image_inputs,
107
  videos=video_inputs,
108
  padding=True,
109
  return_tensors="pt",
110
  )
 
111
 
112
 
113
  @spaces.GPU
 
127
  clean_up_tokenization_spaces=False,
128
  )
129
  try:
130
+ return json.loads(output_text[0])
 
131
  except Exception:
132
  return {}
133