Csplk commited on
Commit
790209e
β€’
1 Parent(s): 3a1e471

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -5,17 +5,21 @@ import gradio as gr
5
  from threading import Thread
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
 
8
- import subprocess
9
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
 
 
 
 
 
10
 
11
  model_id = "vikhyatk/moondream2"
12
  revision = "2024-04-02"
13
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
14
  moondream = AutoModelForCausalLM.from_pretrained(
15
- model_id, trust_remote_code=True, revision=revision,
16
- torch_dtype=torch.bfloat16, device_map={"": "cuda"},
17
- attn_implementation="flash_attention_2"
18
- )
19
  moondream.eval()
20
 
21
 
@@ -56,4 +60,4 @@ with gr.Blocks() as demo:
56
  submit.click(answer_question, [img, prompt], output)
57
  prompt.submit(answer_question, [img, prompt], output)
58
 
59
- demo.queue().launch()
 
5
  from threading import Thread
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
 
8
+ #import subprocess
9
+ #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
+
11
+ if torch.cuda.is_available():
12
+ device, dtype = "cuda", torch.float16
13
+ else:
14
+ device, dtype = "cpu", torch.float32
15
+
16
 
17
  model_id = "vikhyatk/moondream2"
18
  revision = "2024-04-02"
19
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
20
  moondream = AutoModelForCausalLM.from_pretrained(
21
+ model_id, trust_remote_code=True, revision=revision
22
+ ).to(device=device, dtype=dtype)
 
 
23
  moondream.eval()
24
 
25
 
 
60
  submit.click(answer_question, [img, prompt], output)
61
  prompt.submit(answer_question, [img, prompt], output)
62
 
63
+ demo.queue().launch()