hanzla commited on
Commit
b23fbc0
1 Parent(s): 1014254

chat interface

Browse files
Files changed (1) hide show
  1. app.py +34 -24
app.py CHANGED
@@ -1,21 +1,18 @@
1
  import spaces
2
  import torch
 
3
  import gradio as gr
4
  from threading import Thread
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
 
7
- # Install the necessary package for the model
8
  import subprocess
 
9
 
10
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
11
- shell=True)
12
-
13
- # Initialize the tokenizer and model
14
  model_id = "vikhyatk/moondream2"
15
  revision = "2024-04-02"
16
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
17
  moondream = AutoModelForCausalLM.from_pretrained(
18
- model_id, revision=revision, trust_remote_code=True,
19
  torch_dtype=torch.bfloat16, device_map={"": "cuda"},
20
  attn_implementation="flash_attention_2"
21
  )
@@ -23,28 +20,41 @@ moondream.eval()
23
 
24
 
25
  @spaces.GPU(duration=10)
26
- def chatbot_response(img, text_input):
27
- # Here we assume an encoded image processing if needed
28
  image_embeds = moondream.encode_image(img)
29
- inputs = tokenizer.encode(text_input, return_tensors="pt")
30
- outputs = moondream.generate(inputs, max_length=200)
31
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
- return response
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
 
35
- # Setting up Gradio Interface
36
  with gr.Blocks(theme="Monochrome") as demo:
37
- gr.Markdown("# AskMoondream Chatbot")
38
- with gr.Row():
39
- img = gr.Image(type="pil", label="Upload an Image")
40
- text_input = gr.Textbox(label="Ask a question or describe an image", placeholder="Type here...")
 
 
 
41
  with gr.Row():
 
42
  submit = gr.Button("Submit")
43
- response = gr.TextArea(label="Response", placeholder="Moondream's response will appear here...")
44
-
45
- # Define what happens when the user interacts with the interface
46
- submit.click(chatbot_response, inputs=[img, text_input], outputs=response)
47
- text_input.submit(chatbot_response, inputs=[img, text_input], outputs=response)
48
 
49
- # Launch the demo
50
  demo.queue().launch()
 
1
  import spaces
2
  import torch
3
+ import re
4
  import gradio as gr
5
  from threading import Thread
6
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
 
 
8
  import subprocess
9
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
 
 
 
 
11
  model_id = "vikhyatk/moondream2"
12
  revision = "2024-04-02"
13
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
14
  moondream = AutoModelForCausalLM.from_pretrained(
15
+ model_id, trust_remote_code=True, revision=revision,
16
  torch_dtype=torch.bfloat16, device_map={"": "cuda"},
17
  attn_implementation="flash_attention_2"
18
  )
 
20
 
21
 
22
  @spaces.GPU(duration=10)
23
+ def answer_question(img, prompt):
 
24
  image_embeds = moondream.encode_image(img)
25
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
26
+ thread = Thread(
27
+ target=moondream.answer_question,
28
+ kwargs={
29
+ "image_embeds": image_embeds,
30
+ "question": prompt,
31
+ "tokenizer": tokenizer,
32
+ "streamer": streamer,
33
+ },
34
+ )
35
+ thread.start()
36
+
37
+ buffer = ""
38
+ for new_text in streamer:
39
+ buffer += new_text
40
+ yield buffer.strip()
41
 
42
 
 
43
  with gr.Blocks(theme="Monochrome") as demo:
44
+ gr.Markdown(
45
+ """
46
+ # AskMoondream: Moondream 2 Demonstration Space
47
+ Moondream2 is a 1.86B parameter model initialized with weights from SigLIP and Phi 1.5.
48
+ Modularity AI presents this open source huggingface space for running fast experimental inferences on Moondream2.
49
+ """
50
+ )
51
  with gr.Row():
52
+ prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
53
  submit = gr.Button("Submit")
54
+ with gr.Row():
55
+ img = gr.Image(type="pil", label="Upload an Image")
56
+ output = gr.TextArea(label="Response")
57
+ submit.click(answer_question, [img, prompt], output)
58
+ prompt.submit(answer_question, [img, prompt], output)
59
 
 
60
  demo.queue().launch()