File size: 4,014 Bytes
5e3a4bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99252ee
 
5e3a4bc
 
 
0332d7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3a4bc
99252ee
 
5e3a4bc
 
562a177
 
5e3a4bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba5d9a3
5e3a4bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcbabd5
5e3a4bc
 
 
 
 
 
 
 
 
 
 
 
 
14ef04c
5e3a4bc
 
 
 
 
562a177
0332d7d
5e3a4bc
 
 
 
 
 
62e19d9
5e3a4bc
 
 
 
 
 
 
 
 
df2fefd
5e3a4bc
 
 
 
 
 
99252ee
5e3a4bc
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer,
)
import os
from threading import Thread
import spaces
import time
import subprocess

PLACEHOLDER = """
<div style="padding: 40px; text-align: center; display: flex; flex-direction: column; align-items: center;">
   <img src="https://i.imgur.com/yu0sVwC.png" style="width: 90%; max-width: 650px; height: auto; opacity: 0.8; border-radius: 20px;"> 
   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Fammi una domanda!</p>
</div>
"""

css = """
.message-row {
    justify-content: space-evenly !important;
}
.message-bubble-border {
    border-radius: 6px !important;
}
.dark.message-bubble-border {
    border-color: #21293b !important;
}
.dark.user {
    background: #0a1120 !important;
}
.dark.assistant {
    background: transparent !important;
}
"""

DESCRIPTION = """<div>
<p>🇮🇹 Italian LLM <a href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta"><b>Maestrale Chat v0.4 beta</b></a>. Maestrale is a powerful language model for Italian, trained by mii-llm, based on Mistral 7B.</p>
<p>🔎 For more details about Maestrale and how to use it with <code>transformers</code>, visit the <a href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta">model card</a>.</p>
</div>"""

tokenizer = AutoTokenizer.from_pretrained("mii-llm/maestrale-chat-v0.4-beta")
model = AutoModelForCausalLM.from_pretrained("mii-llm/maestrale-chat-v0.4-beta", torch_dtype=torch.bfloat16, device_map="auto")

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|im_end|>")
]

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
else:
    device = torch.device("cpu")
    print("Using CPU")


model = model.to(device)


@spaces.GPU()
def chat(message, history, system, temperature, do_sample, max_tokens):
    chat = [{"role": "system", "content": system}] if system else []
    chat.extend(
        {"role": role, "content": content}
        for user, assistant in history
        for role, content in [("user", user), ("assistant", assistant)]
    )
    chat.append({"role": "user", "content": message})

    messages = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([messages], return_tensors="pt").to(device)

    streamer = TextIteratorStreamer(
        tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
    )

    generate_kwargs = {
        **model_inputs,
        "streamer": streamer,
        "max_new_tokens": max_tokens,
        "do_sample": do_sample,
        "temperature": temperature,
        "eos_token_id": terminators,
        "pad_token_id": tokenizer.eos_token_id
    }

    thread = Thread(target=model.generate, kwargs=generate_kwargs)
    thread.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        yield partial_text

    yield partial_text


chatbot = gr.Chatbot(height=550, placeholder=PLACEHOLDER, label='Conversazione', show_copy_button=True)

demo = gr.ChatInterface(
    fn=chat,
    chatbot=chatbot,
    fill_height=True,
    theme=gr.themes.Soft(),
    css=css,
    additional_inputs_accordion=gr.Accordion(
        label="⚙️ Parametri", open=False, render=False
    ),
    additional_inputs=[
        gr.Textbox(
            label="System",
            value="Sei un assistente utile.",
        ),
        gr.Slider(
            minimum=0, maximum=1, step=0.1, value=0.7, label="Temperature", render=False
        ),
        gr.Checkbox(label="Sampling", value=True),
        gr.Slider(
            minimum=128,
            maximum=4096,
            step=1,
            value=768,
            label="Max new tokens",
            render=False,
        ),
    ],
    stop_btn="Stop Generation",
    cache_examples=False,
    title="Maestrale Chat v0.4 beta",
    description=DESCRIPTION
)

demo.launch()