File size: 3,263 Bytes
2ac2001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cff67c8
2ac2001
 
 
 
babdbe7
143381d
2ac2001
 
babdbe7
 
2ac2001
 
 
 
586fe20
babdbe7
2ac2001
 
 
2bf6cea
104d077
2ac2001
 
 
 
 
 
8ea9610
babdbe7
 
 
b111794
babdbe7
 
2ac2001
 
 
02ca315
818cb74
2ac2001
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import os
import time
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from PIL import Image

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision,
    torch_dtype=torch.bfloat16, device_map={"": "cuda"},
    attn_implementation="flash_attention_2"
)
moondream.eval()
def print_like_dislike(x: gr.LikeData):
    print(x.index, x.value, x.liked)

def add_message(history, message):
    # Handle image and text input
    if message["files"]:
        for x in message["files"]:
            history.append(((x,), None))
    if message["text"] is not None:
        history.append((message["text"], None))
    return history, gr.MultimodalTextbox(value=None, interactive=False)
@spaces.GPU(duration=10)
def bot(history):
    # Reverse search through the last 5 messages for an image file
    last_five_messages = history[-5:]  # Get the last five messages
    image_path = None
    last_message = None
    for message in last_five_messages:
        if isinstance(message[0], tuple) and isinstance(message[0][0], str):
            image_path = message[0][0]
        if isinstance(message[0],str):
            last_message = message[0]
    if image_path:
        try:
            image = Image.open(image_path)  # Try to open the image using Pillow
            image_embeds = moondream.encode_image(image)
            print(image_embeds.shape)
            response = moondream.answer_question(image_embeds, last_message, tokenizer)
        except IOError:
            response = "Failed to open image. Please check the image path or file permissions."
    else:
        image_embeds = torch.zeros(1, 729, 2048, dtype=torch.bfloat16, device='cuda')
        response = moondream.answer_question(image_embeds, last_message, tokenizer)

    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        yield history

with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="emerald")) as demo:
    gr.Markdown(
        """
        # AskMoondream: Moondream 2 Demonstration Space
        ## Modularity AI presents this open source huggingface space for running fast experimental inferences on Moondream2.
        """
    )
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False,
        height = 550
    )

    chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
    chat_msg = chat_input.submit(add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input])
    bot_msg = chat_msg.then(bot, inputs=chatbot, outputs=chatbot, api_name="bot_response")
    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, outputs=[chat_input])

    chatbot.like(print_like_dislike, None, None)

demo.queue()
demo.launch()