d-delaurier's picture
Update app.py
a5ebb82 verified
raw
history blame contribute delete
No virus
1.93 kB
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
model_id = "vikhyatk/moondream2"
revision = "2024-05-20"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
revision=revision,
torch_dtype=torch.bfloat16,
device_map={"": "cuda"},
attn_implementation="flash_attention_2"
)
moondream.eval()
@spaces.GPU(duration=10)
def answer_question(img, prompt):
if img is None:
raise gr.Error("Please upload an image.")
image_embeds = moondream.encode_image(img)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": prompt,
"tokenizer": tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer.strip()
with gr.Blocks() as demo:
gr.Markdown(
"""
# myAI - AMI Vision Module
A lightweight Computer Vision model by @vikhyat - 🌔 [moondream2](https://github.com/vikhyat/moondream)
"""
)
with gr.Row():
prompt = gr.Textbox(label="Input", value="Identify people in this image", scale=4)
submit = gr.Button("Submit")
with gr.Row():
img = gr.Image(type="pil", label="Upload an Image")
output = gr.TextArea(label="Response")
submit.click(answer_question, [img, prompt], output)
prompt.submit(answer_question, [img, prompt], output)
demo.queue().launch()