Csplk's picture
Rename app.py to batchapp.py
51972f7 verified
raw
history blame
1.5 kB
import spaces
import argparse
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from PIL import Image
parser = argparse.ArgumentParser()
model_id = "vikhyat/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision,
torch_dtype=torch.float32
)
moondream.eval()
@spaces.GPU(duration=10)
def answer_question(images, prompts):
image_embeds = [moondream.encode_image(img) for img in images]
image_embeds = torch.cat(image_embeds, dim=0)
answers = moondream.batch_answer(
images=image_embeds,
prompts=prompts,
tokenizer=tokenizer
)
return [answer for answer in answers]
with gr.Blocks() as demo:
gr.Markdown(
"""
# πŸŒ” moondream2
A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)
"""
)
with gr.Row():
prompts = gr.Textbox(label="Input", placeholder="Type here...", scale=4)
submit = gr.Button("Submit")
with gr.Row():
images = gr.Image(type="pil", label="Upload Images", multiple=True)
output = gr.Textbox(label="Response", multiple=True)
submit.click(answer_question, [images, prompts], output)
prompts.submit(answer_question, [images, prompts], output)
demo.queue().launch()