vikhyatk commited on
Commit
1322687
1 Parent(s): f9bc591

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -1,19 +1,20 @@
1
  import spaces
2
- import argparse
3
  import torch
4
  import re
5
  import gradio as gr
6
  from threading import Thread
7
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
8
 
9
- parser = argparse.ArgumentParser()
 
10
 
11
  model_id = "vikhyatk/moondream2"
12
  revision = "2024-04-02"
13
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
14
  moondream = AutoModelForCausalLM.from_pretrained(
15
  model_id, trust_remote_code=True, revision=revision,
16
- torch_dtype=torch.float32
 
17
  )
18
  moondream.eval()
19
 
@@ -48,7 +49,7 @@ with gr.Blocks() as demo:
48
  """
49
  )
50
  with gr.Row():
51
- prompt = gr.Textbox(label="Input", placeholder="Type here...", scale=4)
52
  submit = gr.Button("Submit")
53
  with gr.Row():
54
  img = gr.Image(type="pil", label="Upload an Image")
 
1
  import spaces
 
2
  import torch
3
  import re
4
  import gradio as gr
5
  from threading import Thread
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
 
8
+ import subprocess
9
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
11
  model_id = "vikhyatk/moondream2"
12
  revision = "2024-04-02"
13
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
14
  moondream = AutoModelForCausalLM.from_pretrained(
15
  model_id, trust_remote_code=True, revision=revision,
16
+ torch_dtype=torch.bfloat16, device_map={"": "cuda"},
17
+ attn_implementation="flash_attention_2"
18
  )
19
  moondream.eval()
20
 
 
49
  """
50
  )
51
  with gr.Row():
52
+ prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
53
  submit = gr.Button("Submit")
54
  with gr.Row():
55
  img = gr.Image(type="pil", label="Upload an Image")