myAssistant_moondream_v2

Sleeping

File size: 1,926 Bytes

fda8dae
5781b89
 
 
 
 
1322687
a5ebb82
1322687
5781b89
 
6d0cb8a
12e7969
5781b89
a5ebb82
 
 
 
 
1322687
bac7d5d
5781b89
 
fda8dae
5781b89
a5ebb82
 
 
5781b89
 
 
 
 
 
 
 
 
 
 
 
 
 
6b26249
 
5781b89
 
 
 
485e277
a5ebb82
5781b89
 
 
a5ebb82
5781b89
 
 
 
a5ebb82
5781b89
 
 
a5ebb82

import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
import subprocess

subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

model_id = "vikhyatk/moondream2"
revision = "2024-05-20"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    revision=revision,
    torch_dtype=torch.bfloat16,
    device_map={"": "cuda"},
    attn_implementation="flash_attention_2"
)
moondream.eval()

@spaces.GPU(duration=10)
def answer_question(img, prompt):
    if img is None:
        raise gr.Error("Please upload an image.")

    image_embeds = moondream.encode_image(img)
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
    thread = Thread(
        target=moondream.answer_question,
        kwargs={
            "image_embeds": image_embeds,
            "question": prompt,
            "tokenizer": tokenizer,
            "streamer": streamer,
        },
    )
    thread.start()
    buffer = ""
    for new_text in streamer:
        buffer += new_text
        yield buffer.strip()

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # myAI - AMI Vision Module
        A lightweight Computer Vision model by @vikhyat - 🌔 [moondream2](https://github.com/vikhyat/moondream)
        """
    )
    with gr.Row():
        prompt = gr.Textbox(label="Input", value="Identify people in this image", scale=4)
        submit = gr.Button("Submit")
    with gr.Row():
        img = gr.Image(type="pil", label="Upload an Image")
        output = gr.TextArea(label="Response")

    submit.click(answer_question, [img, prompt], output)
    prompt.submit(answer_question, [img, prompt], output)

demo.queue().launch()