IlyaGusev commited on
Commit
dc414f9
1 Parent(s): b6d8ff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -26
app.py CHANGED
@@ -12,25 +12,12 @@ from llama_cpp import Llama
12
 
13
 
14
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
15
- SYSTEM_TOKEN = 1788
16
- USER_TOKEN = 1404
17
- BOT_TOKEN = 9225
18
- LINEBREAK_TOKEN = 13
19
-
20
-
21
- ROLE_TOKENS = {
22
- "user": USER_TOKEN,
23
- "bot": BOT_TOKEN,
24
- "system": SYSTEM_TOKEN
25
- }
26
 
27
 
28
  def get_message_tokens(model, role, content):
29
- message_tokens = model.tokenize(content.encode("utf-8"))
30
- message_tokens.insert(1, ROLE_TOKENS[role])
31
- message_tokens.insert(2, LINEBREAK_TOKEN)
32
- message_tokens.append(model.token_eos())
33
- return message_tokens
34
 
35
 
36
  def get_system_tokens(model):
@@ -38,18 +25,31 @@ def get_system_tokens(model):
38
  return get_message_tokens(model, **system_message)
39
 
40
 
41
- repo_name = "IlyaGusev/saiga2_13b_gguf"
42
- model_name = "model-q4_K.gguf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
45
 
46
- model = Llama(
47
- model_path=model_name,
48
- n_ctx=2000,
49
- n_parts=1,
50
- )
51
 
52
- max_new_tokens = 1500
53
 
54
  def user(message, history):
55
  new_history = history + [[message, None]]
@@ -63,6 +63,7 @@ def bot(
63
  top_k,
64
  temp
65
  ):
 
66
  tokens = get_system_tokens(model)[:]
67
  tokens.append(LINEBREAK_TOKEN)
68
 
@@ -88,7 +89,7 @@ def bot(
88
 
89
  partial_text = ""
90
  for i, token in enumerate(generator):
91
- if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
92
  break
93
  partial_text += model.detokenize([token]).decode("utf-8", "ignore")
94
  history[-1][1] = partial_text
 
12
 
13
 
14
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def get_message_tokens(model, role, content):
18
+ content = f"{role}\n{content}\n</s>"
19
+ content = content.encode("utf-8")
20
+ return model.tokenize(content, special=True)
 
 
21
 
22
 
23
  def get_system_tokens(model):
 
25
  return get_message_tokens(model, **system_message)
26
 
27
 
28
+ def load_model(
29
+ directory: str = ".",
30
+ model_name: str = "model-q4_K.gguf",
31
+ model_url: str = "https://huggingface.co/IlyaGusev/saiga2_13b_gguf/resolve/main/model-q4_K.gguf"
32
+ ):
33
+ final_model_path = os.path.join(directory, model_name)
34
+
35
+ print("Downloading all files...")
36
+ if not os.path.exists(final_model_path):
37
+ with open(final_model_path, "wb") as f:
38
+ http_get(model_url, f)
39
+ os.chmod(final_model_path, 0o777)
40
+ print("Files downloaded!")
41
+
42
+ model = Llama(
43
+ model_path=final_model_path,
44
+ n_ctx=2048
45
+ )
46
+
47
+ print("Model loaded!")
48
+ return model
49
 
 
50
 
51
+ MODEL = load_model()
 
 
 
 
52
 
 
53
 
54
  def user(message, history):
55
  new_history = history + [[message, None]]
 
63
  top_k,
64
  temp
65
  ):
66
+ model = MODEL
67
  tokens = get_system_tokens(model)[:]
68
  tokens.append(LINEBREAK_TOKEN)
69
 
 
89
 
90
  partial_text = ""
91
  for i, token in enumerate(generator):
92
+ if token == model.token_eos():
93
  break
94
  partial_text += model.detokenize([token]).decode("utf-8", "ignore")
95
  history[-1][1] = partial_text