import torch import re import os import gradio as gr from threading import Thread from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM from PIL import Image from io import BytesIO import base64 SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret') # Regex pattern to match data URI scheme data_uri_pattern = re.compile(r'data:image/(png|jpeg|jpg|webp);base64,') def readb64(b64): # Remove any data URI scheme prefix with regex b64 = data_uri_pattern.sub("", b64) # Decode and open the image with PIL img = Image.open(BytesIO(base64.b64decode(b64))) return img # # this version work in the official demo but not when I fork it, doesn't work, and I'm not sure why # #import subprocess #subprocess.run('pip3 install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) #model_id = "vikhyatk/moondream2" #revision = "2024-04-02" #tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) #moondream = AutoModelForCausalLM.from_pretrained( # model_id, trust_remote_code=True, revision=revision, # torch_dtype=torch.bfloat16, device_map={"": "cuda"}, # attn_implementation="flash_attention_2" #) #moondream.eval() # so let's use an older version if torch.cuda.is_available(): device, dtype = "cuda", torch.float16 else: device, dtype = "cpu", torch.float32 model_id = "vikhyatk/moondream2" tokenizer = AutoTokenizer.from_pretrained(model_id, revision="2024-03-06") moondream = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, revision="2024-03-06" ).to(device=device, dtype=dtype) moondream.eval() def answer_question(secret_token, input, prompt): if secret_token != SECRET_TOKEN: raise gr.Error( f'Invalid secret token. Please fork the original space if you want to use it for yourself.') img = readb64(input) image_embeds = moondream.encode_image(img) streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) thread = Thread( target=moondream.answer_question, kwargs={ "image_embeds": image_embeds, "question": prompt, "tokenizer": tokenizer, "streamer": streamer, }, ) thread.start() buffer = "" for new_text in streamer: # do we really need this? clean_text = re.sub("<$|

This space is a headless component of the cloud rendering engine used by AiTube.

It is not available for public use, but you can use the original space.

""") token = gr.Textbox() input = gr.Textbox() prompt = gr.Textbox() submit = gr.Button() output = gr.Textbox() submit.click(answer_question, [token, input, prompt], output) demo.queue().launch()