ffreemt commited on
Commit
ab343b8
1 Parent(s): 21d3b25

Update predict_api

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +37 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: mpt 30b chat
3
  emoji: 🔥
4
  colorFrom: purple
5
  colorTo: red
@@ -8,6 +8,6 @@ sdk_version: 3.35.2
8
  app_file: app.py
9
  pinned: false
10
  ---
11
- NB: Need a CPU Upgrade (32GB RAM) instance to run on a huggingface space
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: mpt-30b-ggml-chat
3
  emoji: 🔥
4
  colorFrom: purple
5
  colorTo: red
 
8
  app_file: app.py
9
  pinned: false
10
  ---
11
+ NB: Need a CPU UPGRADE (32GB RAM) instance to run on a huggingface space or 19GB+ disk, 22GB+ RAM at a minimum
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -31,6 +31,7 @@ def predict0(prompt, bot):
31
  print(assistant_prefix, end=" ", flush=True)
32
 
33
  response = ""
 
34
  for word in generator:
35
  print(word, end="", flush=True)
36
  response += word
@@ -46,6 +47,30 @@ def predict0(prompt, bot):
46
 
47
  return prompt, bot
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
51
  local_path = os.path.abspath(destination_folder)
@@ -445,6 +470,18 @@ with gr.Blocks(
445
  # AttributeError: 'Blocks' object has no attribute 'run_forever'
446
  # block.run_forever(lambda: ns.response, None, [buff], every=1)
447
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  # concurrency_count=5, max_size=20
449
  # max_size=36, concurrency_count=14
450
  block.queue(concurrency_count=5, max_size=20).launch(debug=True)
 
31
  print(assistant_prefix, end=" ", flush=True)
32
 
33
  response = ""
34
+ buff.update(value="diggin...")
35
  for word in generator:
36
  print(word, end="", flush=True)
37
  response += word
 
47
 
48
  return prompt, bot
49
 
50
+ def predict_api(prompt):
51
+ logger.debug(f"{prompt=}")
52
+ ns.response = ""
53
+ try:
54
+ user_prompt = prompt
55
+ generator = generate(llm, generation_config, system_prompt, user_prompt.strip())
56
+ print(assistant_prefix, end=" ", flush=True)
57
+
58
+ response = ""
59
+ buff.update(value="diggin...")
60
+ for word in generator:
61
+ print(word, end="", flush=True)
62
+ response += word
63
+ ns.response = response
64
+ buff.update(value=response)
65
+ print("")
66
+ logger.debug(f"{response=}")
67
+ except Exception as exc:
68
+ logger.error(exc)
69
+ response = f"{exc=}"
70
+ # bot = {"inputs": [response]}
71
+ # bot = [(prompt, response)]
72
+
73
+ return response
74
 
75
  def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
76
  local_path = os.path.abspath(destination_folder)
 
470
  # AttributeError: 'Blocks' object has no attribute 'run_forever'
471
  # block.run_forever(lambda: ns.response, None, [buff], every=1)
472
 
473
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
474
+ input_text = gr.Text()
475
+ api_btn = gr.Button("Go", variant="primary")
476
+ out_text = gr.Text()
477
+ api_btn.click(
478
+ predict_api,
479
+ input_text,
480
+ out_text,
481
+ # show_progress="full",
482
+ api_name="api",
483
+ )
484
+
485
  # concurrency_count=5, max_size=20
486
  # max_size=36, concurrency_count=14
487
  block.queue(concurrency_count=5, max_size=20).launch(debug=True)