myAssistant_moondream_v2

Sleeping

App Files Files Community

myAssistant_moondream_v2 / app.py

d-delaurier

Update app.py

a5ebb82 verified over 1 year ago

raw

history blame contribute delete

1.93 kB

	import spaces
	import torch
	import re
	import gradio as gr
	from threading import Thread
	from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
	import subprocess

	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	model_id = "vikhyatk/moondream2"
	revision = "2024-05-20"
	tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
	moondream = AutoModelForCausalLM.from_pretrained(
	model_id,
	trust_remote_code=True,
	revision=revision,
	torch_dtype=torch.bfloat16,
	device_map={"": "cuda"},
	attn_implementation="flash_attention_2"
	)
	moondream.eval()

	@spaces.GPU(duration=10)
	def answer_question(img, prompt):
	if img is None:
	raise gr.Error("Please upload an image.")

	image_embeds = moondream.encode_image(img)
	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
	thread = Thread(
	target=moondream.answer_question,
	kwargs={
	"image_embeds": image_embeds,
	"question": prompt,
	"tokenizer": tokenizer,
	"streamer": streamer,
	},
	)
	thread.start()
	buffer = ""
	for new_text in streamer:
	buffer += new_text
	yield buffer.strip()

	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# myAI - AMI Vision Module
	A lightweight Computer Vision model by @vikhyat - 🌔 [moondream2](https://github.com/vikhyat/moondream)
	"""
	)
	with gr.Row():
	prompt = gr.Textbox(label="Input", value="Identify people in this image", scale=4)
	submit = gr.Button("Submit")
	with gr.Row():
	img = gr.Image(type="pil", label="Upload an Image")
	output = gr.TextArea(label="Response")

	submit.click(answer_question, [img, prompt], output)
	prompt.submit(answer_question, [img, prompt], output)

	demo.queue().launch()