File size: 2,657 Bytes
b9696cd
477ae0a
 
b9696cd
b729344
b9696cd
477ae0a
 
 
 
 
b9696cd
 
b729344
477ae0a
 
 
 
 
 
 
 
314c413
 
 
 
 
 
 
477ae0a
 
 
 
 
 
 
 
 
b9696cd
477ae0a
 
 
 
 
 
b9696cd
477ae0a
 
b9696cd
314c413
 
b9696cd
 
477ae0a
 
 
b9696cd
477ae0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9696cd
477ae0a
b9696cd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForVision2Seq

from web_data import custom_css, header, footer

processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
model = AutoModelForVision2Seq.from_pretrained(
    "HuggingFaceTB/SmolVLM-256M-Instruct",
    torch_dtype=torch.bfloat16,
)


def respond(message, history: list[tuple[str, str]], image, system_message):
    messages = [
        {
            "role": "system",
            "content": [
                {"type": "text", "text": system_message}
            ]
        },
    ]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    user_message = {
        "role": "user",
        "content": [
            {"type": "text", "text": message}
        ]
    }
    if image:
        user_message['content'].append({"type": "image"})
    messages.append(user_message)

    prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
    if image:
        resized_image = image.resize((32, 32))
        inputs = processor(text=prompt, images=[resized_image], return_tensors="pt")
    else:
        inputs = processor(text=prompt, return_tensors="pt")

    generated_ids = model.generate(**inputs, max_new_tokens=500)
    generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)

    response = generated_texts[0].split('Assistant:')[-1].strip()
    yield response


# ======the code below this section is pure vibing coding======
with gr.Blocks(theme=gr.themes.Glass(), css=custom_css) as demo:
    gr.HTML(header)

    with gr.Row(variant="panel"):
        with gr.Column(scale=1):
            with gr.Accordion("πŸŒ€ CONTROL PANEL", open=True):
                system_message = gr.Textbox(
                    value="You're a cool AI that speaks Gen-Z slang 😎",
                    label="πŸ€– BOT PERSONALITY",
                    lines=2,
                    max_lines=4,
                    elem_classes="cyber-input"
                )
                image_input = gr.Image(
                    type="pil",
                    label="πŸ“Έ UPLOAD PIC",
                    height=200,
                    elem_classes="glow-border"
                )

        with gr.Column(scale=3):
            chat_interface = gr.ChatInterface(
                respond,
                additional_inputs=[image_input, system_message],
                submit_btn="πŸ“€ SEND",
            )

    gr.HTML(footer)

if __name__ == "__main__":
    demo.launch()