import torch import re import os import gradio as gr from threading import Thread from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM from PIL import Image from io import BytesIO import base64 SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret') # Regex pattern to match data URI scheme data_uri_pattern = re.compile(r'data:image/(png|jpeg|jpg|webp);base64,') def readb64(b64): # Remove any data URI scheme prefix with regex b64 = data_uri_pattern.sub("", b64) # Decode and open the image with PIL img = Image.open(BytesIO(base64.b64decode(b64))) return img # not sure why #import subprocess #subprocess.run('pip3 install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) model_id = "vikhyatk/moondream2" revision = "2024-04-02" tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) moondream = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, revision=revision, torch_dtype=torch.bfloat16, device_map={"": "cuda"}, attn_implementation="flash_attention_2" ) moondream.eval() def answer_question(secret_token, input, prompt): if secret_token != SECRET_TOKEN: raise gr.Error( f'Invalid secret token. Please fork the original space if you want to use it for yourself.') img = readb64(input) image_embeds = moondream.encode_image(img) streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) thread = Thread( target=moondream.answer_question, kwargs={ "image_embeds": image_embeds, "question": prompt, "tokenizer": tokenizer, "streamer": streamer, }, ) thread.start() buffer = "" for new_text in streamer: buffer += new_text buffer.strip() return buffer with gr.Blocks() as demo: gr.HTML("""
This space is a headless component of the cloud rendering engine used by AiTube.
It is not available for public use, but you can use the original space.