File size: 6,802 Bytes
4a27403
 
 
 
a7c2e6e
4a27403
 
 
94d051b
f1beb5e
 
4a27403
dc414f9
 
 
4a27403
 
 
 
 
 
 
dc414f9
 
94d051b
fd9ff29
dc414f9
 
94d051b
dc414f9
fd9ff29
dc414f9
 
 
 
 
 
 
 
65c32ca
dc414f9
 
 
 
4a27403
 
dc414f9
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
d9f8d6b
dc414f9
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
949cae6
4a27403
 
 
 
 
 
 
 
 
 
dc414f9
4a27403
 
 
 
 
 
 
 
 
b113cd4
4a27403
b113cd4
f1beb5e
dbb146e
f1beb5e
94d051b
f1beb5e
6ded998
4a27403
 
 
 
 
fdb0542
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1affa63
 
4a27403
1affa63
4a27403
 
 
 
 
 
 
fdb0542
4a27403
 
 
 
 
 
 
 
 
f1beb5e
4a27403
 
 
d9f8d6b
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb02b4
4a27403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94867a8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import gradio as gr

import os

from huggingface_hub.file_download import http_get
from llama_cpp import Llama


SYSTEM_PROMPT = "Ты — Вихрь, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."


def get_message_tokens(model, role, content):
    content = f"{role}\n{content}\n</s>"
    content = content.encode("utf-8")
    return model.tokenize(content, special=True)


def get_system_tokens(model):
    system_message = {"role": "system", "content": SYSTEM_PROMPT}
    return get_message_tokens(model, **system_message)


def load_model(
    directory: str = ".",
    model_name: str = "vikhr-7b-instruct_0.2.Q4_K_S.gguf",
    model_url: str = "https://huggingface.co/pirbis/Vikhr-7B-instruct_0.2-GGUF/resolve/main/vikhr-7b-instruct_0.2.Q4_K_S.gguf"
):
    final_model_path = os.path.join(directory, model_name)
    #pirbis/Vikhr-7B-instruct_0.2-GGUF
    print("Downloading all files...")
    print(final_model_path)
    if not os.path.exists(final_model_path):
        with open(final_model_path, "wb") as f:
            http_get(model_url, f)
    os.chmod(final_model_path, 0o777)
    print("Files downloaded!")
    
    model = Llama(
        model_path=final_model_path,
        n_ctx=1024
    )
    
    print("Model loaded!")
    return model


MODEL = load_model()


def user(message, history):
    new_history = history + [[message, None]]
    return "", new_history


def bot(
    history,
    system_prompt,
    top_p,
    top_k,
    temp
):
    model = MODEL
    tokens = get_system_tokens(model)[:]

    for user_message, bot_message in history[:-1]:
        message_tokens = get_message_tokens(model=model, role="user", content=user_message)
        tokens.extend(message_tokens)
        if bot_message:
            message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
            tokens.extend(message_tokens)

    last_user_message = history[-1][0]
    message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
    tokens.extend(message_tokens)

    role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
    tokens.extend(role_tokens)
    generator = model.generate(
        tokens,
        top_k=top_k,
        top_p=top_p,
        temp=temp
    )

    partial_text = ""
    for i, token in enumerate(generator):
        if token == model.token_eos():
            break
        partial_text += model.detokenize([token]).decode("utf-8", "ignore")
        history[-1][1] = partial_text
        yield history


with gr.Blocks(
    theme=gr.themes.Soft()
) as demo:
    #favicon = '<img src="photo_2024-01-21_23-23-17.jpg" width="48px" style="display: inline">'
    gr.Markdown(
        f"""<h1><center>Vikhr Instruct2 GGUF Q4_K</center></h1>

        This is a demo of a **Russian**-speaking Mistral-based model. 

        Это демонстрационная версия [квантованной Вихрь-0.2 с 7 миллиардами параметров](https://huggingface.co/pirbis/Vikhr-7B-instruct_0.2-GGUF/), работающая на CPU.

        Bихрь — это семейство переведенных языковых моделей, которая основаных на Mistral, LLama, CPM, T5, имеют расширенные словари и дообучены на русски корпусах, инструктивные версии дообучены на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
        """
    )
    with gr.Row():
        with gr.Column(scale=5):
            system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
            chatbot = gr.Chatbot(label="Диалог")
        with gr.Column(min_width=80, scale=1):
            with gr.Tab(label="Параметры генерации"):
                top_p = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.9,
                    step=0.05,
                    interactive=True,
                    label="Top-p",
                )
                top_k = gr.Slider(
                    minimum=10,
                    maximum=100,
                    value=30,
                    step=5,
                    interactive=True,
                    label="Top-k",
                )
                temp = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.01,
                    step=0.01,
                    interactive=True,
                    label="Температура"
                )
    with gr.Row():
        with gr.Column():
            msg = gr.Textbox(
                label="Отправить сообщение",
                placeholder="Отправить сообщение",
                show_label=False,
            )
        with gr.Column():
            with gr.Row():
                submit = gr.Button("Отправить")
                stop = gr.Button("Остановить")
                clear = gr.Button("Очистить")
    with gr.Row():
        gr.Markdown(
            """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
        )

    # Pressing Enter
    submit_event = msg.submit(
        fn=user,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot],
        queue=False,
    ).success(
        fn=bot,
        inputs=[
            chatbot,
            system_prompt,
            top_p,
            top_k,
            temp
        ],
        outputs=chatbot,
        queue=True,
    )

    # Pressing the button
    submit_click_event = submit.click(
        fn=user,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot],
        queue=False,
    ).success(
        fn=bot,
        inputs=[
            chatbot,
            system_prompt,
            top_p,
            top_k,
            temp
        ],
        outputs=chatbot,
        queue=True,
    )

    # Stop generation
    stop.click(
        fn=None,
        inputs=None,
        outputs=None,
        cancels=[submit_event, submit_click_event],
        queue=False,
    )

    # Clear history
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue(max_size=128)
demo.launch(show_error=True)