ImageToText / app.py
Kartik2503's picture
remove use via API button
92d7bf9 verified
import spaces
import argparse
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
parser = argparse.ArgumentParser()
if torch.cuda.is_available():
device, dtype = "cuda", torch.float16
else:
device, dtype = "cpu", torch.float32
model_id = "vikhyatk/moondream2"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision="2024-03-06")
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision="2024-03-06"
).to(device=device, dtype=dtype)
moondream.eval()
@spaces.GPU(duration=10)
def answer_question(img, prompt):
image_embeds = moondream.encode_image(img)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": prompt,
"tokenizer": tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
clean_text = re.sub("<$|<END$", "", new_text)
buffer += clean_text
yield buffer
with gr.Blocks() as demo:
with gr.Row():
prompt = gr.Textbox(label="Input", placeholder="Type here...", scale=4)
submit = gr.Button("Submit")
with gr.Row():
img = gr.Image(type="pil", label="Upload an Image")
output = gr.TextArea(label="Response")
submit.click(answer_question, [img, prompt], output)
prompt.submit(answer_question, [img, prompt], output)
demo.queue(api_open=False).launch(show_api=False)