Build

Paused

App Files Files Community

Build / app1.py

ManishThota

Rename app.py to app1.py

cf454cd verified 8 months ago

raw

history blame contribute delete

1.81 kB

	import spaces
	import argparse
	import torch
	import re
	import gradio as gr
	from threading import Thread
	from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM

	parser = argparse.ArgumentParser()

	if torch.cuda.is_available():
	device, dtype = "cuda", torch.float16
	else:
	device, dtype = "cpu", torch.float32

	model_id = "vikhyatk/moondream2"
	tokenizer = AutoTokenizer.from_pretrained(model_id, revision="2024-03-06")
	moondream = AutoModelForCausalLM.from_pretrained(
	model_id, trust_remote_code=True, revision="2024-03-06"
	).to(device=device, dtype=dtype)
	moondream.eval()


	@spaces.GPU(duration=10)
	def answer_question(img, prompt):
	image_embeds = moondream.encode_image(img)
	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
	thread = Thread(
	target=moondream.answer_question,
	kwargs={
	"image_embeds": image_embeds,
	"question": prompt,
	"tokenizer": tokenizer,
	"streamer": streamer,
	},
	)
	thread.start()

	buffer = ""
	for new_text in streamer:
	clean_text = re.sub("<$\|<END$", "", new_text)
	buffer += clean_text
	yield buffer


	with gr.Blocks() as demo:
	gr.Image("data/redhen.ico")
	gr.Markdown(
	"""
	# Super Rapid Annotator - Multimodal vision tool to annotate videos with LLaVA framework
	"""
	)
	with gr.Row():
	prompt = gr.Textbox(label="Input", placeholder="Type here...", scale=4)
	submit = gr.Button("Submit")
	with gr.Row():
	img = gr.Image(type="pil", label="Upload an Image")
	output = gr.TextArea(label="Response")
	submit.click(answer_question, [img, prompt], output)
	prompt.submit(answer_question, [img, prompt], output)

	demo.queue().launch()