ffreemt commited on
Commit
9f406cd
1 Parent(s): 5e39b2d

Update model_file_name

Browse files
Files changed (2) hide show
  1. .flake8 +21 -0
  2. app.py +18 -7
.flake8 ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ ignore =
3
+ # E203 whitespace before ':'
4
+ E203
5
+ D203,
6
+ # line too long
7
+ E501
8
+ per-file-ignores =
9
+ # imported but unused
10
+ # __init__.py: F401
11
+ test_*.py: F401
12
+ exclude =
13
+ .git,
14
+ __pycache__,
15
+ docs/source/conf.py,
16
+ old,
17
+ build,
18
+ dist,
19
+ .venv
20
+ pad*.py
21
+ max-complexity = 25
app.py CHANGED
@@ -21,6 +21,7 @@ if MOSAICML_API_KEY is None:
21
 
22
  ns = SimpleNamespace(response="")
23
 
 
24
  def predict0(prompt, bot):
25
  # logger.debug(f"{prompt=}, {bot=}, {timeout=}")
26
  logger.debug(f"{prompt=}, {bot=}")
@@ -47,6 +48,7 @@ def predict0(prompt, bot):
47
 
48
  return prompt, bot
49
 
 
50
  def predict_api(prompt):
51
  logger.debug(f"{prompt=}")
52
  ns.response = ""
@@ -72,6 +74,7 @@ def predict_api(prompt):
72
 
73
  return response
74
 
 
75
  def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
76
  local_path = os.path.abspath(destination_folder)
77
  return hf_hub_download(
@@ -216,7 +219,9 @@ def call_inf_server(prompt):
216
  # return response[len(clean_prompt) :] # remove the prompt
217
  try:
218
  user_prompt = prompt
219
- generator = generate(llm, generation_config, system_prompt, user_prompt.strip())
 
 
220
  print(assistant_prefix, end=" ", flush=True)
221
  for word in generator:
222
  print(word, end="", flush=True)
@@ -251,6 +256,9 @@ repo_id = "TheBloke/mpt-30B-chat-GGML"
251
  _ = """
252
  mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
253
  mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
 
 
 
254
  """
255
  model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
256
  destination_folder = "models"
@@ -261,7 +269,7 @@ logger.info("done dl")
261
 
262
  config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
263
  llm = AutoModelForCausalLM.from_pretrained(
264
- os.path.abspath("models/mpt-30b-chat.ggmlv0.q4_1.bin"),
265
  model_type="mpt",
266
  config=config,
267
  )
@@ -308,13 +316,15 @@ with gr.Blocks(
308
  """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
309
  )
310
  gr.Markdown(
311
- """<h4><center>mpt-30b-chat-ggml</center></h4>
312
 
313
  This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).
314
 
 
 
315
  It takes about >40 seconds to get a response. Restarting the space takes about 5 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 5 minutes to restart. You need to refresh the browser to reload the new space.
316
  """,
317
- elem_classes="xsmall"
318
  )
319
  conversation = Chat()
320
  chatbot = gr.Chatbot().style(height=700) # 500
@@ -358,14 +368,15 @@ with gr.Blocks(
358
  ["Suggest four metaphors to describe the benefits of AI"],
359
  ["Write a pop song about leaving home for the sandy beaches."],
360
  ["Write a summary demonstrating my ability to tame lions"],
 
361
  ["鲁迅和周树人什么关系"],
 
362
  ["从前有一头牛,这头牛后面有什么?"],
363
  ["正无穷大加一大于正无穷大吗?"],
364
  ["正无穷大加正无穷大大于正无穷大吗?"],
365
  ["-2的平方根等于什么"],
366
  ["树上有5只鸟,猎人开枪打死了一只。树上还有几只鸟?"],
367
  ["树上有11只鸟,猎人开枪打死了一只。树上还有几只鸟?提示:需考虑鸟可能受惊吓飞走。"],
368
- ["鲁迅和周树人什么关系 用英文回答"],
369
  ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
370
  [f"{etext} 翻成中文,列出3个版本"],
371
  [f"{etext} \n 翻成中文,保留原意,但使用文学性的语言。不要写解释。列出3个版本"],
@@ -377,7 +388,7 @@ with gr.Blocks(
377
  ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch"],
378
  ],
379
  inputs=[msg],
380
- examples_per_page=30,
381
  )
382
 
383
  # with gr.Row():
@@ -453,7 +464,7 @@ with gr.Blocks(
453
  outputs=[msg, chatbot],
454
  queue=True,
455
  show_progress="full",
456
- api_name="predict"
457
  )
458
  submit.click(
459
  # fn=conversation.user_turn,
 
21
 
22
  ns = SimpleNamespace(response="")
23
 
24
+
25
  def predict0(prompt, bot):
26
  # logger.debug(f"{prompt=}, {bot=}, {timeout=}")
27
  logger.debug(f"{prompt=}, {bot=}")
 
48
 
49
  return prompt, bot
50
 
51
+
52
  def predict_api(prompt):
53
  logger.debug(f"{prompt=}")
54
  ns.response = ""
 
74
 
75
  return response
76
 
77
+
78
  def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
79
  local_path = os.path.abspath(destination_folder)
80
  return hf_hub_download(
 
219
  # return response[len(clean_prompt) :] # remove the prompt
220
  try:
221
  user_prompt = prompt
222
+ generator = generate(
223
+ llm, generation_config, system_prompt, user_prompt.strip()
224
+ )
225
  print(assistant_prefix, end=" ", flush=True)
226
  for word in generator:
227
  print(word, end="", flush=True)
 
256
  _ = """
257
  mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
258
  mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
259
+ mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
260
+ mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
261
+ mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
262
  """
263
  model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
264
  destination_folder = "models"
 
269
 
270
  config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
271
  llm = AutoModelForCausalLM.from_pretrained(
272
+ os.path.abspath(f"models/{model_filename}"),
273
  model_type="mpt",
274
  config=config,
275
  )
 
316
  """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
317
  )
318
  gr.Markdown(
319
+ """<h4><center>mpt-30b-chat-ggml (q4_1)</center></h4>
320
 
321
  This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).
322
 
323
+ Try to refresh the browser and try again when occasionally errors occur.
324
+
325
  It takes about >40 seconds to get a response. Restarting the space takes about 5 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 5 minutes to restart. You need to refresh the browser to reload the new space.
326
  """,
327
+ elem_classes="xsmall",
328
  )
329
  conversation = Chat()
330
  chatbot = gr.Chatbot().style(height=700) # 500
 
368
  ["Suggest four metaphors to describe the benefits of AI"],
369
  ["Write a pop song about leaving home for the sandy beaches."],
370
  ["Write a summary demonstrating my ability to tame lions"],
371
+ ["鲁迅和周树人什么关系 说中文"],
372
  ["鲁迅和周树人什么关系"],
373
+ ["鲁迅和周树人什么关系 用英文回答"],
374
  ["从前有一头牛,这头牛后面有什么?"],
375
  ["正无穷大加一大于正无穷大吗?"],
376
  ["正无穷大加正无穷大大于正无穷大吗?"],
377
  ["-2的平方根等于什么"],
378
  ["树上有5只鸟,猎人开枪打死了一只。树上还有几只鸟?"],
379
  ["树上有11只鸟,猎人开枪打死了一只。树上还有几只鸟?提示:需考虑鸟可能受惊吓飞走。"],
 
380
  ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
381
  [f"{etext} 翻成中文,列出3个版本"],
382
  [f"{etext} \n 翻成中文,保留原意,但使用文学性的语言。不要写解释。列出3个版本"],
 
388
  ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch"],
389
  ],
390
  inputs=[msg],
391
+ examples_per_page=40,
392
  )
393
 
394
  # with gr.Row():
 
464
  outputs=[msg, chatbot],
465
  queue=True,
466
  show_progress="full",
467
+ api_name="predict",
468
  )
469
  submit.click(
470
  # fn=conversation.user_turn,