ai-tube-model-moondream2

Paused

File size: 2,775 Bytes

5781b89
 
4a5cedb
5781b89
 
 
f6b324c
4a5cedb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9cdab82
 
 
5781b89
 
b35f285
12e7969
5781b89
60e7a28
1322687
 
bac7d5d
5781b89
 
4a5cedb
 
 
 
 
 
 
5781b89
4a5cedb
5781b89
4a5cedb
5781b89
 
 
 
 
 
 
 
 
 
 
 
 
6b26249
4a5cedb
 
5781b89
4a5cedb
5781b89
 
4a5cedb
 
 
 
 
 
 
 
 
 
 
 
 
5781b89

import torch
import re
import os
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM

from PIL import Image
from io import BytesIO
import base64

SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')

# Regex pattern to match data URI scheme
data_uri_pattern = re.compile(r'data:image/(png|jpeg|jpg|webp);base64,')

def readb64(b64):
    # Remove any data URI scheme prefix with regex
    b64 = data_uri_pattern.sub("", b64)
    # Decode and open the image with PIL
    img = Image.open(BytesIO(base64.b64decode(b64)))
    return img


# not sure why
#import subprocess
#subprocess.run('pip3 install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision,
    torch_dtype=torch.bfloat16, device_map={"": "cuda"},
    attn_implementation="flash_attention_2"
)
moondream.eval()

def answer_question(secret_token, input, prompt):
    if secret_token != SECRET_TOKEN:
        raise gr.Error(
            f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
        
    img = readb64(input)
    
    image_embeds = moondream.encode_image(img)
    
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
    
    thread = Thread(
        target=moondream.answer_question,
        kwargs={
            "image_embeds": image_embeds,
            "question": prompt,
            "tokenizer": tokenizer,
            "streamer": streamer,
        },
    )
    thread.start()

    buffer = ""
    for new_text in streamer:
        buffer += new_text
    
    buffer.strip()

    return buffer

with gr.Blocks() as demo:
    gr.HTML("""
        <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
        <div style="text-align: center; color: black;">
        <p style="color: black;">This space is a headless component of the cloud rendering engine used by AiTube.</p>
        <p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/vikhyatk/moondream2" target="_blank">original space</a>.</p>
        </div>
        </div>""")
    token = gr.Textbox()
    input = gr.Textbox()
    prompt = gr.Textbox()
    submit = gr.Button()
    output = gr.Textbox()
    submit.click(answer_question, [token, input, prompt], output)

demo.queue().launch()