IlyaGusev commited on
Commit
346d734
1 Parent(s): 946ce87
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +20 -55
  3. requirements.txt +2 -3
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Saiga Nemo 12B GGUF Q4_K_M
3
  emoji: 🔥
4
  colorFrom: pink
5
  colorTo: yellow
@@ -7,4 +7,4 @@ sdk: gradio
7
  sdk_version: 5.8.0
8
  app_file: app.py
9
  pinned: false
10
- ---
 
1
  ---
2
+ title: Saiga Nemo 12B GPTQ 8-bit
3
  emoji: 🔥
4
  colorFrom: pink
5
  colorTo: yellow
 
7
  sdk_version: 5.8.0
8
  app_file: app.py
9
  pinned: false
10
+ ---
app.py CHANGED
@@ -1,38 +1,13 @@
1
- import gradio as gr
2
-
3
  import os
4
 
5
- from huggingface_hub.file_download import http_get
6
- from llama_cpp import Llama
7
-
8
 
9
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
10
-
11
-
12
- def load_model(
13
- directory: str = ".",
14
- model_name: str = "saiga_nemo_12b.Q4_K_M.gguf",
15
- model_url: str = "https://huggingface.co/IlyaGusev/saiga_nemo_12b_gguf/resolve/main/saiga_nemo_12b.Q4_K_M.gguf"
16
- ):
17
- final_model_path = os.path.join(directory, model_name)
18
-
19
- print("Downloading all files...")
20
- if not os.path.exists(final_model_path):
21
- with open(final_model_path, "wb") as f:
22
- http_get(model_url, f)
23
- os.chmod(final_model_path, 0o777)
24
- print("Files downloaded!")
25
-
26
- model = Llama(
27
- model_path=final_model_path,
28
- n_ctx=8192
29
- )
30
-
31
- print("Model loaded!")
32
- return model
33
-
34
-
35
- MODEL = load_model()
36
 
37
 
38
  def user(message, history):
@@ -44,10 +19,8 @@ def bot(
44
  history,
45
  system_prompt,
46
  top_p,
47
- top_k,
48
  temp
49
  ):
50
- model = MODEL
51
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
52
 
53
  for user_message, bot_message in history[:-1]:
@@ -57,19 +30,21 @@ def bot(
57
 
58
  last_user_message = history[-1][0]
59
  messages.append({"role": "user", "content": last_user_message})
60
- partial_text = ""
61
- for part in model.create_chat_completion(
62
- messages,
 
63
  temperature=temp,
64
- top_k=top_k,
65
  top_p=top_p,
66
  stream=True,
67
- ):
68
- delta = part["choices"][0]["delta"]
69
- if "content" in delta:
70
- partial_text += delta["content"]
71
- history[-1][1] = partial_text
72
- yield history
 
 
73
 
74
 
75
  with gr.Blocks(
@@ -77,11 +52,11 @@ with gr.Blocks(
77
  ) as demo:
78
  favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
79
  gr.Markdown(
80
- f"""<h1><center>{favicon}Saiga Nemo 12B GGUF Q4_K_M</center></h1>
81
 
82
  This is a demo of a **Russian**-speaking Mistral Nemo based model.
83
 
84
- Это демонстрационная версия [квантованной Сайги Немо с 12 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga_nemo_12b_gguf), работающая на CPU.
85
  """
86
  )
87
  with gr.Row():
@@ -98,14 +73,6 @@ with gr.Blocks(
98
  interactive=True,
99
  label="Top-p",
100
  )
101
- top_k = gr.Slider(
102
- minimum=10,
103
- maximum=100,
104
- value=30,
105
- step=5,
106
- interactive=True,
107
- label="Top-k",
108
- )
109
  temp = gr.Slider(
110
  minimum=0.0,
111
  maximum=2.0,
@@ -143,7 +110,6 @@ with gr.Blocks(
143
  chatbot,
144
  system_prompt,
145
  top_p,
146
- top_k,
147
  temp
148
  ],
149
  outputs=chatbot,
@@ -162,7 +128,6 @@ with gr.Blocks(
162
  chatbot,
163
  system_prompt,
164
  top_p,
165
- top_k,
166
  temp
167
  ],
168
  outputs=chatbot,
 
 
 
1
  import os
2
 
3
+ import gradio as gr
4
+ from openai import OpenAI
 
5
 
6
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
7
+ BASE_URL = os.getenv("BASE_URL")
8
+ API_KEY = os.getenv("API_KEY")
9
+ MODEL_NAME = "IlyaGusev/saiga_nemo_12b_gptq_8bit"
10
+ CLIENT = OpenAI(base_url=BASE_URL, api_key=API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  def user(message, history):
 
19
  history,
20
  system_prompt,
21
  top_p,
 
22
  temp
23
  ):
 
24
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
25
 
26
  for user_message, bot_message in history[:-1]:
 
30
 
31
  last_user_message = history[-1][0]
32
  messages.append({"role": "user", "content": last_user_message})
33
+
34
+ response = CLIENT.chat.completions.create(
35
+ model=MODEL_NAME,
36
+ messages=messages,
37
  temperature=temp,
 
38
  top_p=top_p,
39
  stream=True,
40
+ )
41
+
42
+ partial_text = ""
43
+ for chunk in response:
44
+ content = chunk.choices[0].delta.content
45
+ partial_text += content
46
+ history[-1][1] = partial_text
47
+ yield history
48
 
49
 
50
  with gr.Blocks(
 
52
  ) as demo:
53
  favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
54
  gr.Markdown(
55
+ f"""<h1><center>{favicon}Saiga Nemo 12B GPTQ 8 bit</center></h1>
56
 
57
  This is a demo of a **Russian**-speaking Mistral Nemo based model.
58
 
59
+ Это демонстрационная версия [квантованной Сайги Немо с 12 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga_nemo_12b).
60
  """
61
  )
62
  with gr.Row():
 
73
  interactive=True,
74
  label="Top-p",
75
  )
 
 
 
 
 
 
 
 
76
  temp = gr.Slider(
77
  minimum=0.0,
78
  maximum=2.0,
 
110
  chatbot,
111
  system_prompt,
112
  top_p,
 
113
  temp
114
  ],
115
  outputs=chatbot,
 
128
  chatbot,
129
  system_prompt,
130
  top_p,
 
131
  temp
132
  ],
133
  outputs=chatbot,
requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
- llama-cpp-python==0.3.2
2
- huggingface-hub==0.19.4
3
- gradio
 
1
+ openai==1.57.0
2
+ gradio