File size: 6,674 Bytes
4a27403
 
 
 
50a4065
4a27403
 
 
 
f1beb5e
 
4a27403
caca471
 
 
4a27403
 
 
 
 
 
 
d42be36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a27403
caca471
ba177eb
 
 
4a27403
 
 
 
 
 
 
 
 
 
 
d9f8d6b
ba177eb
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
caca471
4a27403
 
 
 
 
 
 
 
 
 
d42be36
4a27403
 
 
 
 
 
 
 
 
 
 
93fd4c4
f1beb5e
93fd4c4
f1beb5e
93fd4c4
f1beb5e
93fd4c4
4a27403
 
 
 
 
d42be36
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1affa63
 
4a27403
1affa63
4a27403
 
 
 
 
 
 
ab4b64a
4a27403
 
 
 
 
 
 
 
 
f1beb5e
4a27403
 
 
d9f8d6b
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb02b4
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b19f953
25097e6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import gradio as gr

import os

from huggingface_hub.file_download import http_get
from llama_cpp import Llama


SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."


def get_message_tokens(model, role, content):
    content = f"{role}\n{content}\n</s>"
    content = content.encode("utf-8")
    return model.tokenize(content, special=True)


def get_system_tokens(model):
    system_message = {"role": "system", "content": SYSTEM_PROMPT}
    return get_message_tokens(model, **system_message)


def load_model(
    directory: str = ".",
    model_name: str = "model-q4_K.gguf",
    model_url: str = "https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf"
):
    final_model_path = os.path.join(directory, model_name)
    
    print("Downloading all files...")
    if not os.path.exists(final_model_path):
        with open(final_model_path, "wb") as f:
            http_get(model_url, f)
    os.chmod(final_model_path, 0o777)
    print("Files downloaded!")
    
    model = Llama(
        model_path=final_model_path,
        n_ctx=2048
    )
    
    print("Model loaded!")
    return model


MODEL = load_model()


def user(message, history):
    new_history = history + [[message, None]]
    return "", new_history


def bot(
    history,
    system_prompt,
    top_p,
    top_k,
    temp
):
    model = MODEL
    tokens = get_system_tokens(model)[:]

    for user_message, bot_message in history[:-1]:
        message_tokens = get_message_tokens(model=model, role="user", content=user_message)
        tokens.extend(message_tokens)
        if bot_message:
            message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
            tokens.extend(message_tokens)

    last_user_message = history[-1][0]
    message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
    tokens.extend(message_tokens)

    role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
    tokens.extend(role_tokens)
    generator = model.generate(
        tokens,
        top_k=top_k,
        top_p=top_p,
        temp=temp
    )

    partial_text = ""
    for i, token in enumerate(generator):
        if token == model.token_eos():
            break
        partial_text += model.detokenize([token]).decode("utf-8", "ignore")
        history[-1][1] = partial_text
        yield history


with gr.Blocks(
    theme=gr.themes.Soft()
) as demo:
    favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
    gr.Markdown(
        f"""<h1><center>{favicon}Saiga Mistral 7B GGUF Q4_K</center></h1>

        This is a demo of a **Russian**-speaking Mistral-based model. If you are interested in other languages, please check other models, such as [MPT-7B-Chat](https://huggingface.co/spaces/mosaicml/mpt-7b-chat).

        Это демонстрационная версия [квантованной Сайги/Мистраль с 7 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf), работающая на CPU.

        Сайга — это разговорная языковая модель, дообученная на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
        """
    )
    with gr.Row():
        with gr.Column(scale=5):
            system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
            chatbot = gr.Chatbot(label="Диалог", height=400)
        with gr.Column(min_width=80, scale=1):
            with gr.Tab(label="Параметры генерации"):
                top_p = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.9,
                    step=0.05,
                    interactive=True,
                    label="Top-p",
                )
                top_k = gr.Slider(
                    minimum=10,
                    maximum=100,
                    value=30,
                    step=5,
                    interactive=True,
                    label="Top-k",
                )
                temp = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.01,
                    step=0.01,
                    interactive=True,
                    label="Температура"
                )
    with gr.Row():
        with gr.Column():
            msg = gr.Textbox(
                label="Отправить сообщение",
                placeholder="Отправить сообщение",
                show_label=False,
            )
        with gr.Column():
            with gr.Row():
                submit = gr.Button("Отправить")
                stop = gr.Button("Остановить")
                clear = gr.Button("Очистить")
    with gr.Row():
        gr.Markdown(
            """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
        )

    # Pressing Enter
    submit_event = msg.submit(
        fn=user,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot],
        queue=False,
    ).success(
        fn=bot,
        inputs=[
            chatbot,
            system_prompt,
            top_p,
            top_k,
            temp
        ],
        outputs=chatbot,
        queue=True,
    )

    # Pressing the button
    submit_click_event = submit.click(
        fn=user,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot],
        queue=False,
    ).success(
        fn=bot,
        inputs=[
            chatbot,
            system_prompt,
            top_p,
            top_k,
            temp
        ],
        outputs=chatbot,
        queue=True,
    )

    # Stop generation
    stop.click(
        fn=None,
        inputs=None,
        outputs=None,
        cancels=[submit_event, submit_click_event],
        queue=False,
    )

    # Clear history
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue(max_size=128)
demo.launch()