nchen909 commited on
Commit
6805d46
1 Parent(s): 00e079e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. .gradio/certificate.pem +31 -0
  2. app_new.py +153 -124
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app_new.py CHANGED
@@ -1,63 +1,97 @@
1
  import gradio as gr
2
-
3
  import os
4
-
5
  from huggingface_hub.file_download import http_get
6
  from llama_cpp import Llama
7
 
8
-
9
  SYSTEM_PROMPT = "You are Apollo, a multilingual medical model. You communicate with people and assist them."
10
 
 
 
11
 
12
  def get_message_tokens(model, role, content):
13
  content = f"{role}\n{content}\n</s>"
14
  content = content.encode("utf-8")
15
  return model.tokenize(content, special=True)
16
 
17
-
18
  def get_system_tokens(model):
19
  system_message = {"role": "system", "content": SYSTEM_PROMPT}
20
  return get_message_tokens(model, **system_message)
21
 
22
-
23
- def load_model(
24
- directory: str = ".",
25
- model_name: str = "apollo2-7b-q4_k_m.gguf",
26
- model_url: str = "https://huggingface.co/nchen909/Apollo2-7B-Q4_K_M-GGUF/resolve/main/apollo2-7b-q4_k_m.gguf"
27
- ):
28
  final_model_path = os.path.join(directory, model_name)
29
-
30
- print("Downloading all files...")
31
  if not os.path.exists(final_model_path):
 
32
  with open(final_model_path, "wb") as f:
33
  http_get(model_url, f)
34
  os.chmod(final_model_path, 0o777)
35
- print("Files downloaded!")
36
-
37
- model = Llama(
38
- model_path=final_model_path,
39
- n_ctx=1024
40
- )
41
-
42
- print("Model loaded!")
43
  return model
44
 
45
-
46
- MODEL = load_model()
47
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def user(message, history):
50
  new_history = history + [[message, None]]
51
  return "", new_history
52
 
53
-
54
- def bot(
55
- history,
56
- system_prompt,
57
- top_p,
58
- top_k,
59
- temp
60
- ):
61
  model = MODEL
62
  tokens = get_system_tokens(model)[:]
63
 
@@ -74,132 +108,127 @@ def bot(
74
 
75
  role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
76
  tokens.extend(role_tokens)
77
- generator = model.generate(
78
- tokens,
79
- top_k=top_k,
80
- top_p=top_p,
81
- temp=temp
82
- )
83
 
84
  partial_text = ""
85
  for i, token in enumerate(generator):
86
  if token == model.token_eos():
87
  break
 
88
  partial_text += model.detokenize([token]).decode("utf-8", "ignore")
89
  history[-1][1] = partial_text
90
  yield history
91
 
92
-
93
  with gr.Blocks(
94
- theme=gr.themes.Soft()
 
95
  ) as demo:
96
- favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
97
  gr.Markdown(
98
- f"""<h1><center>{favicon}Saiga2 13B GGUF Q4_K</center></h1>
99
 
100
- This is a demo of a **Russian**-speaking LLaMA2-based model. If you are interested in other languages, please check other models, such as [MPT-7B-Chat](https://huggingface.co/spaces/mosaicml/mpt-7b-chat).
101
 
102
- Это демонстрационная версия [квантованной Сайги-2 с 13 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga2_13b_ggml), работающая на CPU.
103
-
104
- Сайга-2 — это разговорная языковая модель, которая основана на [LLaMA-2](https://ai.meta.com/llama/) и дообучена на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
105
  """
106
  )
107
  with gr.Row():
108
- with gr.Column(scale=5):
109
- system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
110
- chatbot = gr.Chatbot(label="Диалог")
111
- with gr.Column(min_width=80, scale=1):
112
- with gr.Tab(label="Параметры генерации"):
113
- top_p = gr.Slider(
114
- minimum=0.0,
115
- maximum=1.0,
116
- value=0.9,
117
- step=0.05,
118
- interactive=True,
119
- label="Top-p",
120
- )
121
- top_k = gr.Slider(
122
- minimum=10,
123
- maximum=100,
124
- value=30,
125
- step=5,
126
  interactive=True,
127
- label="Top-k",
128
  )
129
- temp = gr.Slider(
130
- minimum=0.0,
131
- maximum=2.0,
132
- value=0.01,
133
- step=0.01,
134
  interactive=True,
135
- label="Температура"
136
  )
137
- with gr.Row():
138
- with gr.Column():
139
- msg = gr.Textbox(
140
- label="Отправить сообщение",
141
- placeholder="Отправить сообщение",
142
- show_label=False,
 
 
143
  )
144
- with gr.Column():
145
- with gr.Row():
146
- submit = gr.Button("Отправить")
147
- stop = gr.Button("Остановить")
148
- clear = gr.Button("Очистить")
149
- with gr.Row():
150
- gr.Markdown(
151
- """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
152
- )
 
 
 
 
 
 
 
 
 
 
 
153
 
154
- # Pressing Enter
155
- submit_event = msg.submit(
156
- fn=user,
157
- inputs=[msg, chatbot],
158
- outputs=[msg, chatbot],
159
- queue=False,
160
- ).success(
161
- fn=bot,
162
- inputs=[
163
- chatbot,
164
- system_prompt,
165
- top_p,
166
- top_k,
167
- temp
168
- ],
169
- outputs=chatbot,
170
- queue=True,
171
- )
172
 
173
- # Pressing the button
174
- submit_click_event = submit.click(
 
 
 
 
 
175
  fn=user,
176
  inputs=[msg, chatbot],
177
  outputs=[msg, chatbot],
178
  queue=False,
179
  ).success(
180
  fn=bot,
181
- inputs=[
182
- chatbot,
183
- system_prompt,
184
- top_p,
185
- top_k,
186
- temp
187
- ],
188
  outputs=chatbot,
189
  queue=True,
190
  )
191
 
192
- # Stop generation
193
- stop.click(
194
- fn=None,
195
- inputs=None,
196
- outputs=None,
197
- cancels=[submit_event, submit_click_event],
198
- queue=False,
199
- )
200
-
201
- # Clear history
202
- clear.click(lambda: None, None, chatbot, queue=False)
203
-
204
  demo.queue(max_size=128)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  demo.launch(show_error=True, share=True)
 
1
  import gradio as gr
 
2
  import os
 
3
  from huggingface_hub.file_download import http_get
4
  from llama_cpp import Llama
5
 
 
6
  SYSTEM_PROMPT = "You are Apollo, a multilingual medical model. You communicate with people and assist them."
7
 
8
+ # Define the directory dynamically
9
+ dir = "."
10
 
11
  def get_message_tokens(model, role, content):
12
  content = f"{role}\n{content}\n</s>"
13
  content = content.encode("utf-8")
14
  return model.tokenize(content, special=True)
15
 
 
16
  def get_system_tokens(model):
17
  system_message = {"role": "system", "content": SYSTEM_PROMPT}
18
  return get_message_tokens(model, **system_message)
19
 
20
+ def load_model(directory, model_name, model_url):
 
 
 
 
 
21
  final_model_path = os.path.join(directory, model_name)
22
+ print(f"Checking model: {model_name}")
 
23
  if not os.path.exists(final_model_path):
24
+ print(f"Downloading model: {model_name}")
25
  with open(final_model_path, "wb") as f:
26
  http_get(model_url, f)
27
  os.chmod(final_model_path, 0o777)
28
+ print(f"Model {model_name} ready!")
29
+ model = Llama(model_path=final_model_path, n_ctx=1024)
30
+ print(f"Model {model_name} loaded successfully!")
 
 
 
 
 
31
  return model
32
 
33
+ MODEL_OPTIONS = {
34
+ "Apollo 0.5B": {
35
+ "directory": dir,
36
+ "model_name": "apollo-0.5b.gguf",
37
+ "model_url": "https://huggingface.co/path_to_apollo_0.5b_model"
38
+ },
39
+ "Apollo 2B": {
40
+ "directory": dir,
41
+ "model_name": "apollo-2b.gguf",
42
+ "model_url": "https://huggingface.co/path_to_apollo_2b_model"
43
+ },
44
+ "Apollo 7B": {
45
+ "directory": dir,
46
+ "model_name": "Apollo-7B-q8_0.gguf",
47
+ "model_url": "https://huggingface.co/FreedomIntelligence/Apollo-7B-GGUF/resolve/main/Apollo-7B-q8_0.gguf"
48
+ },
49
+ "Apollo2 0.5B": {
50
+ "directory": dir,
51
+ "model_name": "Apollo-0.5B-q8_0.gguf",
52
+ "model_url": "https://huggingface.co/FreedomIntelligence/Apollo-0.5B-GGUF/resolve/main/Apollo-0.5B-q8_0.gguf"
53
+ },
54
+ "Apollo2 2B": {
55
+ "directory": dir,
56
+ "model_name": "Apollo-2B-q8_0.gguf",
57
+ "model_url": "https://huggingface.co/FreedomIntelligence/Apollo-2B-GGUF/resolve/main/Apollo-2B-q8_0.gguf"
58
+ },
59
+ "Apollo2 7B": {
60
+ "directory": dir,
61
+ "model_name": "apollo2-7b-q8_0.gguf",
62
+ "model_url": "https://huggingface.co/nchen909/Apollo2-7B-Q8_0-GGUF/resolve/main/apollo2-7b-q8_0.gguf"
63
+ }
64
+ }
65
+
66
+ MODEL = None
67
+
68
+ def get_model_key(model_type, model_size):
69
+ return f"{model_type} {model_size}"
70
+
71
+ def initialize_model(model_type="Apollo2", model_size="7B"):
72
+ global MODEL
73
+ model_key = get_model_key(model_type, model_size)
74
+ try:
75
+ print(f"Initializing model: {model_key}")
76
+ selected_model = MODEL_OPTIONS[model_key]
77
+ MODEL = load_model(
78
+ directory=selected_model["directory"],
79
+ model_name=selected_model["model_name"],
80
+ model_url=selected_model["model_url"]
81
+ )
82
+ print(f"Model initialized: {model_key}")
83
+ except Exception as e:
84
+ print(f"Failed to initialize model {model_key}: {e}")
85
+ MODEL = None
86
 
87
  def user(message, history):
88
  new_history = history + [[message, None]]
89
  return "", new_history
90
 
91
+ def bot(history, top_p, top_k, temp):
92
+ global MODEL
93
+ if MODEL is None:
94
+ raise RuntimeError("Model has not been initialized. Please select a model to load.")
 
 
 
 
95
  model = MODEL
96
  tokens = get_system_tokens(model)[:]
97
 
 
108
 
109
  role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
110
  tokens.extend(role_tokens)
111
+
112
+ generator = model.generate(tokens, top_k=top_k, top_p=top_p, temp=temp)
 
 
 
 
113
 
114
  partial_text = ""
115
  for i, token in enumerate(generator):
116
  if token == model.token_eos():
117
  break
118
+
119
  partial_text += model.detokenize([token]).decode("utf-8", "ignore")
120
  history[-1][1] = partial_text
121
  yield history
122
 
 
123
  with gr.Blocks(
124
+ theme=gr.themes.Monochrome(),
125
+ analytics_enabled=False,
126
  ) as demo:
127
+ favicon = '<img src="https://huggingface.co/FreedomIntelligence/Apollo2-7B/resolve/main/assets/apollo_medium_final.png" width="148px" style="display: inline">'
128
  gr.Markdown(
129
+ f"""# {favicon} Apollo GGUF Playground
130
 
131
+ This is a demo of multilingual medical model series **[Apollo](https://huggingface.co/FreedomIntelligence/Apollo-7B-GGUF)**, GGUF version. [Apollo1](https://arxiv.org/abs/2403.03640) covers 6 languages. [Apollo2](https://arxiv.org/abs/2410.10626) covers 50 languages.
132
 
 
 
 
133
  """
134
  )
135
  with gr.Row():
136
+ with gr.Column(scale=3):
137
+ chatbot = gr.Chatbot(label="Conversation")
138
+ msg = gr.Textbox(
139
+ label="Send Message",
140
+ placeholder="Send Message",
141
+ show_label=False,
142
+ elem_id="send-message-box"
143
+ )
144
+ with gr.Column(scale=1):
145
+ # 将 model_type 和 model_size 包含在同一个 gr.Row 中
146
+ with gr.Row(equal_height=False):
147
+ model_type = gr.Dropdown(
148
+ choices=["Apollo", "Apollo2"],
149
+ value="Apollo2",
150
+ label="Select Model",
 
 
 
151
  interactive=True,
152
+ elem_id="model-type-dropdown",
153
  )
154
+ model_size = gr.Dropdown(
155
+ choices=["0.5B", "2B", "7B"],
156
+ value="7B",
157
+ label="Select Size",
 
158
  interactive=True,
159
+ elem_id="model-size-dropdown",
160
  )
161
+ #gr.Markdown("### Generation Parameters")
162
+ top_p = gr.Slider(
163
+ minimum=0.0,
164
+ maximum=1.0,
165
+ value=0.9,
166
+ step=0.05,
167
+ interactive=True,
168
+ label="Top-p",
169
  )
170
+ top_k = gr.Slider(
171
+ minimum=10,
172
+ maximum=100,
173
+ value=30,
174
+ step=5,
175
+ interactive=True,
176
+ label="Top-k",
177
+ )
178
+ temp = gr.Slider(
179
+ minimum=0.0,
180
+ maximum=2.0,
181
+ value=0.01,
182
+ step=0.01,
183
+ interactive=True,
184
+ label="Temperature"
185
+ )
186
+ with gr.Row(equal_height=False):
187
+ submit = gr.Button("Send", elem_id="send-btn")
188
+ stop = gr.Button("Stop", elem_id="stop-btn")
189
+ clear = gr.Button("Clear", elem_id="clear-btn")
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ def update_model(model_type, model_size):
193
+ initialize_model(model_type, model_size)
194
+
195
+ model_type.change(update_model, [model_type, model_size], None)
196
+ model_size.change(update_model, [model_type, model_size], None)
197
+
198
+ msg.submit(
199
  fn=user,
200
  inputs=[msg, chatbot],
201
  outputs=[msg, chatbot],
202
  queue=False,
203
  ).success(
204
  fn=bot,
205
+ inputs=[chatbot, top_p, top_k, temp],
 
 
 
 
 
 
206
  outputs=chatbot,
207
  queue=True,
208
  )
209
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  demo.queue(max_size=128)
211
+ demo.css = """
212
+ footer {display: none !important;}
213
+ #send-message-box {width: 100%;}
214
+ #send-btn, #stop-btn, #clear-btn {
215
+ display: inline-block; /* 强制内联块 */
216
+ width: 30%; /* 设置按钮宽度为父容器的 30% */
217
+ margin-right: 2px; /* 按钮之间增加间距 */
218
+ text-align: center; /* 按钮内容居中 */
219
+ }
220
+
221
+ .gr-row {
222
+ display: flex !important; /* 强制使用 flex 布局 */
223
+ flex-direction: row !important; /* 水平排列 */
224
+ justify-content: space-between; /* 组件之间的间距调整 */
225
+ align-items: center; /* 垂直居中对齐 */
226
+ flex-wrap: nowrap; /* 禁止按钮换行 */
227
+ }
228
+ """
229
+
230
+
231
+ # Initialize the default model at startup
232
+ initialize_model("Apollo2", "7B")
233
+
234
  demo.launch(show_error=True, share=True)