dwb2023 commited on
Commit
99b770e
1 Parent(s): 1bf5f66

Update app.py

Browse files

update gpu to cpu

Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -8,18 +8,19 @@ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausal
8
  import subprocess
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
 
 
 
11
  model_id = "vikhyatk/moondream2"
12
  revision = "2024-05-08"
13
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
14
  moondream = AutoModelForCausalLM.from_pretrained(
15
- model_id, trust_remote_code=True, revision=revision,
16
- torch_dtype=torch.bfloat16, device_map={"": "cuda"},
17
- attn_implementation="flash_attention_2"
18
- )
19
  moondream.eval()
20
 
21
 
22
- @spaces.GPU(duration=10)
23
  def answer_question(img, prompt):
24
  image_embeds = moondream.encode_image(img)
25
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
 
8
  import subprocess
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
11
+ device = torch.device("cpu")
12
+ dtype = torch.float32
13
+
14
  model_id = "vikhyatk/moondream2"
15
  revision = "2024-05-08"
16
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
17
  moondream = AutoModelForCausalLM.from_pretrained(
18
+ model_id, trust_remote_code=True, revision=revision
19
+ ).to(device=device, dtype=dtype)
 
 
20
  moondream.eval()
21
 
22
 
23
+ @spaces.CPU(duration=10)
24
  def answer_question(img, prompt):
25
  image_embeds = moondream.encode_image(img)
26
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)