ai-tube-model-moondream2

Paused

App Files Files Community

ai-tube-model-moondream2 / app.py

jbilcke-hf HF staff

Update app.py

566b8be verified 6 months ago

raw

history blame contribute delete

No virus

3.39 kB

	import torch
	import re
	import os
	import gradio as gr
	from threading import Thread
	from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM

	from PIL import Image
	from io import BytesIO
	import base64

	SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')

	# Regex pattern to match data URI scheme
	data_uri_pattern = re.compile(r'data:image/(png\|jpeg\|jpg\|webp);base64,')

	def readb64(b64):
	# Remove any data URI scheme prefix with regex
	b64 = data_uri_pattern.sub("", b64)
	# Decode and open the image with PIL
	img = Image.open(BytesIO(base64.b64decode(b64)))
	return img


	#
	# this version work in the official demo but not when I fork it, doesn't work, and I'm not sure why
	#
	#import subprocess
	#subprocess.run('pip3 install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
	#model_id = "vikhyatk/moondream2"
	#revision = "2024-04-02"
	#tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
	#moondream = AutoModelForCausalLM.from_pretrained(
	# model_id, trust_remote_code=True, revision=revision,
	# torch_dtype=torch.bfloat16, device_map={"": "cuda"},
	# attn_implementation="flash_attention_2"
	#)
	#moondream.eval()

	# so let's use an older version
	if torch.cuda.is_available():
	device, dtype = "cuda", torch.float16
	else:
	device, dtype = "cpu", torch.float32
	model_id = "vikhyatk/moondream2"
	tokenizer = AutoTokenizer.from_pretrained(model_id, revision="2024-03-06")
	moondream = AutoModelForCausalLM.from_pretrained(
	model_id, trust_remote_code=True, revision="2024-03-06"
	).to(device=device, dtype=dtype)
	moondream.eval()

	def answer_question(secret_token, input, prompt):
	if secret_token != SECRET_TOKEN:
	raise gr.Error(
	f'Invalid secret token. Please fork the original space if you want to use it for yourself.')

	img = readb64(input)

	image_embeds = moondream.encode_image(img)

	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

	thread = Thread(
	target=moondream.answer_question,
	kwargs={
	"image_embeds": image_embeds,
	"question": prompt,
	"tokenizer": tokenizer,
	"streamer": streamer,
	},
	)
	thread.start()

	buffer = ""
	for new_text in streamer:

	# do we really need this?
	clean_text = re.sub("<$\|<END$", "", new_text)

	buffer += new_text

	return buffer.strip()

	with gr.Blocks() as demo:
	gr.HTML("""
	<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
	<div style="text-align: center; color: black;">
	<p style="color: black;">This space is a headless component of the cloud rendering engine used by AiTube.</p>
	<p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/vikhyatk/moondream2" target="_blank">original space</a>.</p>
	</div>
	</div>""")
	token = gr.Textbox()
	input = gr.Textbox()
	prompt = gr.Textbox()
	submit = gr.Button()
	output = gr.Textbox()
	submit.click(answer_question, [token, input, prompt], output)

	demo.queue().launch()