rus78 commited on
Commit
2f9a269
1 Parent(s): 4a51585

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -0
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import copy
4
+ import random
5
+ import os
6
+ import requests
7
+ import time
8
+ import sys
9
+
10
+ from huggingface_hub import snapshot_download
11
+ from llama_cpp import Llama
12
+
13
+
14
+ SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
15
+ SYSTEM_TOKEN = 1587
16
+ USER_TOKEN = 2188
17
+ BOT_TOKEN = 12435
18
+ LINEBREAK_TOKEN = 13
19
+
20
+
21
+ ROLE_TOKENS = {
22
+ "user": USER_TOKEN,
23
+ "bot": BOT_TOKEN,
24
+ "system": SYSTEM_TOKEN
25
+ }
26
+
27
+
28
+ def get_message_tokens(model, role, content):
29
+ message_tokens = model.tokenize(content.encode("utf-8"))
30
+ message_tokens.insert(1, ROLE_TOKENS[role])
31
+ message_tokens.insert(2, LINEBREAK_TOKEN)
32
+ message_tokens.append(model.token_eos())
33
+ return message_tokens
34
+
35
+
36
+ def get_system_tokens(model):
37
+ system_message = {"role": "system", "content": SYSTEM_PROMPT}
38
+ return get_message_tokens(model, **system_message)
39
+
40
+
41
+ repo_name = "IlyaGusev/saiga_mistral_7b_gguf"
42
+ model_name = "model-q4_K.gguf"
43
+
44
+ snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
45
+
46
+ model = Llama(
47
+ model_path=model_name,
48
+ n_ctx=2000,
49
+ n_parts=1,
50
+ )
51
+
52
+ max_new_tokens = 1500
53
+
54
+ def user(message, history):
55
+ new_history = history + [[message, None]]
56
+ return "", new_history
57
+
58
+
59
+ def bot(
60
+ history,
61
+ system_prompt,
62
+ top_p,
63
+ top_k,
64
+ temp
65
+ ):
66
+ tokens = get_system_tokens(model)[:]
67
+ tokens.append(LINEBREAK_TOKEN)
68
+
69
+ for user_message, bot_message in history[:-1]:
70
+ message_tokens = get_message_tokens(model=model, role="user", content=user_message)
71
+ tokens.extend(message_tokens)
72
+ if bot_message:
73
+ message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
74
+ tokens.extend(message_tokens)
75
+
76
+ last_user_message = history[-1][0]
77
+ message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
78
+ tokens.extend(message_tokens)
79
+
80
+ role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
81
+ tokens.extend(role_tokens)
82
+ generator = model.generate(
83
+ tokens,
84
+ top_k=top_k,
85
+ top_p=top_p,
86
+ temp=temp
87
+ )
88
+
89
+ partial_text = ""
90
+ for i, token in enumerate(generator):
91
+ if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
92
+ break
93
+ partial_text += model.detokenize([token]).decode("utf-8", "ignore")
94
+ history[-1][1] = partial_text
95
+ yield history
96
+
97
+
98
+ with gr.Blocks(
99
+ theme=gr.themes.Soft()
100
+ ) as demo:
101
+ favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
102
+ gr.Markdown(
103
+ f"""<h1><center>{favicon}Saiga Mistral 7B GGUF Q4_K</center></h1>
104
+ This is a demo of a **Russian**-speaking Mistral-based model. If you are interested in other languages, please check other models, such as [MPT-7B-Chat](https://huggingface.co/spaces/mosaicml/mpt-7b-chat).
105
+ Это демонстрационная версия [квантованной Сайги/Мистраль с 7 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf), работающая на CPU.
106
+ Сайга — это разговорная языковая модель, дообученная на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
107
+ """
108
+ )
109
+ with gr.Row():
110
+ with gr.Column(scale=5):
111
+ system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
112
+ chatbot = gr.Chatbot(label="Диалог").style(height=400)
113
+ with gr.Column(min_width=80, scale=1):
114
+ with gr.Tab(label="Параметры генерации"):
115
+ top_p = gr.Slider(
116
+ minimum=0.0,
117
+ maximum=1.0,
118
+ value=0.9,
119
+ step=0.05,
120
+ interactive=True,
121
+ label="Top-p",
122
+ )
123
+ top_k = gr.Slider(
124
+ minimum=10,
125
+ maximum=100,
126
+ value=30,
127
+ step=5,
128
+ interactive=True,
129
+ label="Top-k",
130
+ )
131
+ temp = gr.Slider(
132
+ minimum=0.0,
133
+ maximum=2.0,
134
+ value=0.01,
135
+ step=0.01,
136
+ interactive=True,
137
+ label="Температура"
138
+ )
139
+ with gr.Row():
140
+ with gr.Column():
141
+ msg = gr.Textbox(
142
+ label="��тправить сообщение",
143
+ placeholder="Отправить сообщение",
144
+ show_label=False,
145
+ ).style(container=False)
146
+ with gr.Column():
147
+ with gr.Row():
148
+ submit = gr.Button("Отправить")
149
+ stop = gr.Button("Остановить")
150
+ clear = gr.Button("Очистить")
151
+ with gr.Row():
152
+ gr.Markdown(
153
+ """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
154
+ )
155
+
156
+ # Pressing Enter
157
+ submit_event = msg.submit(
158
+ fn=user,
159
+ inputs=[msg, chatbot],
160
+ outputs=[msg, chatbot],
161
+ queue=False,
162
+ ).success(
163
+ fn=bot,
164
+ inputs=[
165
+ chatbot,
166
+ system_prompt,
167
+ top_p,
168
+ top_k,
169
+ temp
170
+ ],
171
+ outputs=chatbot,
172
+ queue=True,
173
+ )
174
+
175
+ # Pressing the button
176
+ submit_click_event = submit.click(
177
+ fn=user,
178
+ inputs=[msg, chatbot],
179
+ outputs=[msg, chatbot],
180
+ queue=False,
181
+ ).success(
182
+ fn=bot,
183
+ inputs=[
184
+ chatbot,
185
+ system_prompt,
186
+ top_p,
187
+ top_k,
188
+ temp
189
+ ],
190
+ outputs=chatbot,
191
+ queue=True,
192
+ )
193
+
194
+ # Stop generation
195
+ stop.click(
196
+ fn=None,
197
+ inputs=None,
198
+ outputs=None,
199
+ cancels=[submit_event, submit_click_event],
200
+ queue=False,
201
+ )
202
+
203
+ # Clear history
204
+ clear.click(lambda: None, None, chatbot, queue=False)
205
+
206
+ demo.queue(max_size=128, concurrency_count=1)
207
+ demo.launch()