moondream2 / app.py
dwb2023's picture
Update app.py
6b0243b verified
raw
history blame
No virus
1.78 kB
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
import subprocess
# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
device = torch.device("cuda")
dtype = torch.float32
model_id = "vikhyatk/moondream2"
revision = "2024-05-08"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision
).to(device=device, dtype=dtype)
moondream.eval()
@spaces.GPU(duration=60)
def answer_question(img, prompt):
image_embeds = moondream.encode_image(img)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": prompt,
"tokenizer": tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer.strip()
with gr.Blocks() as demo:
gr.Markdown(
"""
# 🌔 moondream2
A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)
"""
)
with gr.Row():
prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
submit = gr.Button("Submit")
with gr.Row():
img = gr.Image(type="pil", label="Upload an Image")
output = gr.TextArea(label="Response")
submit.click(answer_question, [img, prompt], output)
prompt.submit(answer_question, [img, prompt], output)
demo.queue().launch()