jbilcke-hf's picture
jbilcke-hf HF staff
Update app.py
9cdab82 verified
raw history blame
No virus
2.78 kB
import torch
import re
import os
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from PIL import Image
from io import BytesIO
import base64
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
# Regex pattern to match data URI scheme
data_uri_pattern = re.compile(r'data:image/(png|jpeg|jpg|webp);base64,')
def readb64(b64):
# Remove any data URI scheme prefix with regex
b64 = data_uri_pattern.sub("", b64)
# Decode and open the image with PIL
img = Image.open(BytesIO(base64.b64decode(b64)))
return img
# not sure why
#import subprocess
#subprocess.run('pip3 install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision,
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
attn_implementation="flash_attention_2"
)
moondream.eval()
def answer_question(secret_token, input, prompt):
if secret_token != SECRET_TOKEN:
raise gr.Error(
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
img = readb64(input)
image_embeds = moondream.encode_image(img)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": prompt,
"tokenizer": tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
buffer.strip()
return buffer
with gr.Blocks() as demo:
gr.HTML("""
<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
<div style="text-align: center; color: black;">
<p style="color: black;">This space is a headless component of the cloud rendering engine used by AiTube.</p>
<p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/vikhyatk/moondream2" target="_blank">original space</a>.</p>
</div>
</div>""")
token = gr.Textbox()
input = gr.Textbox()
prompt = gr.Textbox()
submit = gr.Button()
output = gr.Textbox()
submit.click(answer_question, [token, input, prompt], output)
demo.queue().launch()