IlyaGusev commited on
Commit
68073b0
1 Parent(s): 66183c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -26,15 +26,6 @@ from llama_cpp import Llama
26
 
27
 
28
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
29
- SYSTEM_TOKEN = 1788
30
- USER_TOKEN = 1404
31
- BOT_TOKEN = 9225
32
- LINEBREAK_TOKEN = 13
33
- ROLE_TOKENS = {
34
- "user": USER_TOKEN,
35
- "bot": BOT_TOKEN,
36
- "system": SYSTEM_TOKEN
37
- }
38
 
39
  LOADER_MAPPING = {
40
  ".csv": (CSVLoader, {}),
@@ -76,7 +67,6 @@ def load_model(
76
  return model
77
 
78
 
79
- MAX_NEW_TOKENS = 1500
80
  EMBEDDER = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
81
  MODEL = load_model()
82
 
@@ -94,11 +84,9 @@ def load_single_document(file_path: str) -> Document:
94
 
95
 
96
  def get_message_tokens(model, role, content):
97
- message_tokens = model.tokenize(content.encode("utf-8"))
98
- message_tokens.insert(1, ROLE_TOKENS[role])
99
- message_tokens.insert(2, LINEBREAK_TOKEN)
100
- message_tokens.append(model.token_eos())
101
- return message_tokens
102
 
103
 
104
  def get_system_tokens(model):
@@ -168,28 +156,28 @@ def bot(
168
  top_k,
169
  temp
170
  ):
 
171
  if not history:
172
  return
173
 
174
- tokens = get_system_tokens(MODEL)[:]
175
- tokens.append(LINEBREAK_TOKEN)
176
 
177
  for user_message, bot_message in history[:-1]:
178
- message_tokens = get_message_tokens(model=MODEL, role="user", content=user_message)
179
  tokens.extend(message_tokens)
180
  if bot_message:
181
- message_tokens = get_message_tokens(model=MODEL, role="bot", content=bot_message)
182
  tokens.extend(message_tokens)
183
 
184
  last_user_message = history[-1][0]
185
  if retrieved_docs:
186
  last_user_message = f"Контекст: {retrieved_docs}\n\nИспользуя контекст, ответь на вопрос: {last_user_message}"
187
- message_tokens = get_message_tokens(model=MODEL, role="user", content=last_user_message)
188
  tokens.extend(message_tokens)
189
 
190
- role_tokens = [MODEL.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
191
  tokens.extend(role_tokens)
192
- generator = MODEL.generate(
193
  tokens,
194
  top_k=top_k,
195
  top_p=top_p,
@@ -198,9 +186,9 @@ def bot(
198
 
199
  partial_text = ""
200
  for i, token in enumerate(generator):
201
- if token == MODEL.token_eos() or (MAX_NEW_TOKENS is not None and i >= MAX_NEW_TOKENS):
202
  break
203
- partial_text += MODEL.detokenize([token]).decode("utf-8", "ignore")
204
  history[-1][1] = partial_text
205
  yield history
206
 
 
26
 
27
 
28
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
 
 
 
 
 
 
 
 
 
29
 
30
  LOADER_MAPPING = {
31
  ".csv": (CSVLoader, {}),
 
67
  return model
68
 
69
 
 
70
  EMBEDDER = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
71
  MODEL = load_model()
72
 
 
84
 
85
 
86
  def get_message_tokens(model, role, content):
87
+ content = f"{role}\n{content}\n</s>"
88
+ content = content.encode("utf-8")
89
+ return model.tokenize(content, special=True)
 
 
90
 
91
 
92
  def get_system_tokens(model):
 
156
  top_k,
157
  temp
158
  ):
159
+ model = MODEL
160
  if not history:
161
  return
162
 
163
+ tokens = get_system_tokens(model)[:]
 
164
 
165
  for user_message, bot_message in history[:-1]:
166
+ message_tokens = get_message_tokens(model=model, role="user", content=user_message)
167
  tokens.extend(message_tokens)
168
  if bot_message:
169
+ message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
170
  tokens.extend(message_tokens)
171
 
172
  last_user_message = history[-1][0]
173
  if retrieved_docs:
174
  last_user_message = f"Контекст: {retrieved_docs}\n\nИспользуя контекст, ответь на вопрос: {last_user_message}"
175
+ message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
176
  tokens.extend(message_tokens)
177
 
178
+ role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
179
  tokens.extend(role_tokens)
180
+ generator = model.generate(
181
  tokens,
182
  top_k=top_k,
183
  top_p=top_p,
 
186
 
187
  partial_text = ""
188
  for i, token in enumerate(generator):
189
+ if token == model.token_eos():
190
  break
191
+ partial_text += model.detokenize([token]).decode("utf-8", "ignore")
192
  history[-1][1] = partial_text
193
  yield history
194