chansung commited on
Commit
88f55d9
1 Parent(s): 9c71d4a
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +10 -0
  2. __init__.py +0 -0
  3. app.py +524 -0
  4. args.py +61 -0
  5. channels.txt +10 -0
  6. chats/__init__.py +0 -0
  7. chats/alpaca.py +108 -0
  8. chats/alpaca_gpt4.py +101 -0
  9. chats/alpacoom.py +101 -0
  10. chats/baize.py +113 -0
  11. chats/central.py +156 -0
  12. chats/flan_alpaca.py +101 -0
  13. chats/guanaco.py +120 -0
  14. chats/koalpaca.py +101 -0
  15. chats/mpt.py +118 -0
  16. chats/os_stablelm.py +112 -0
  17. chats/post.py +3 -0
  18. chats/pre.py +97 -0
  19. chats/redpajama.py +101 -0
  20. chats/stablelm.py +112 -0
  21. chats/starchat.py +112 -0
  22. chats/vicuna.py +109 -0
  23. configs/constraints_config.yaml +4 -0
  24. configs/response_configs/baize.yaml +12 -0
  25. configs/response_configs/camel.yaml +11 -0
  26. configs/response_configs/default.yaml +9 -0
  27. configs/response_configs/flan.yaml +9 -0
  28. configs/response_configs/gpt4_alpaca.yaml +9 -0
  29. configs/response_configs/guanaco.yaml +9 -0
  30. configs/response_configs/koalpaca.yaml +11 -0
  31. configs/response_configs/redpajama.yaml +11 -0
  32. configs/response_configs/stablelm.yaml +11 -0
  33. configs/response_configs/stackllama.yaml +10 -0
  34. configs/response_configs/starchat.yaml +12 -0
  35. configs/response_configs/t5_vicuna.yaml +9 -0
  36. configs/summarization_configs/camel.yaml +11 -0
  37. configs/summarization_configs/default.yaml +11 -0
  38. configs/summarization_configs/koalpaca.yaml +11 -0
  39. configs/summarization_configs/redpajama.yaml +11 -0
  40. configs/summarization_configs/stablelm.yaml +11 -0
  41. configs/summarization_configs/t5_vicuna.yaml +9 -0
  42. examples.txt +2 -0
  43. gens/__init__.py +0 -0
  44. gens/batch_gen.py +32 -0
  45. global_vars.py +194 -0
  46. miscs/__init__.py +0 -0
  47. miscs/js.py +50 -0
  48. miscs/strings.py +83 -0
  49. miscs/styles.py +727 -0
  50. model_cards.json +0 -0
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: LLM As Serve
3
+ emoji: 🦙🚀
4
+ sdk: gradio
5
+ app_file: app.py
6
+ pinned: true
7
+ license: apache-2.0
8
+ colorFrom: yellow
9
+ colorTo: green
10
+ ---
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import json
3
+ import re
4
+ import os
5
+ from os import listdir
6
+ from os.path import isfile, join
7
+ import gradio as gr
8
+ import args
9
+ import global_vars
10
+ from chats import central
11
+ from transformers import AutoModelForCausalLM
12
+ from miscs.styles import MODEL_SELECTION_CSS
13
+ from miscs.js import GET_LOCAL_STORAGE, UPDATE_LEFT_BTNS_STATE
14
+ from utils import get_chat_interface, get_chat_manager, get_global_context
15
+
16
+ ex_file = open("examples.txt", "r")
17
+ examples = ex_file.read().split("\n")
18
+ ex_btns = []
19
+
20
+ chl_file = open("channels.txt", "r")
21
+ channels = chl_file.read().split("\n")
22
+ channel_btns = []
23
+
24
+ global_vars.initialize_globals()
25
+
26
+ response_configs = [
27
+ f"configs/response_configs/{f}"
28
+ for f in listdir("configs/response_configs")
29
+ if isfile(join("configs/response_configs", f))
30
+ ]
31
+
32
+ summarization_configs = [
33
+ f"configs/summarization_configs/{f}"
34
+ for f in listdir("configs/summarization_configs")
35
+ if isfile(join("configs/summarization_configs", f))
36
+ ]
37
+
38
+ model_info = json.load(open("model_cards.json"))
39
+
40
+ def channel_num(btn_title):
41
+ choice = 0
42
+
43
+ for idx, channel in enumerate(channels):
44
+ if channel == btn_title:
45
+ choice = idx
46
+
47
+ return choice
48
+
49
+
50
+ def set_chatbot(btn, ld, state):
51
+ choice = channel_num(btn)
52
+
53
+ res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
54
+ empty = len(res[choice].pingpongs) == 0
55
+ return (
56
+ res[choice].build_uis(),
57
+ choice,
58
+ gr.update(visible=empty),
59
+ gr.update(interactive=not empty)
60
+ )
61
+
62
+
63
+ def set_example(btn):
64
+ return btn, gr.update(visible=False)
65
+
66
+
67
+ def set_popup_visibility(ld, example_block):
68
+ return example_block
69
+
70
+
71
+ def move_to_second_view(btn):
72
+ info = model_info[btn]
73
+
74
+ guard_vram = 5 * 1024.
75
+ vram_req_full = int(info["vram(full)"]) + guard_vram
76
+ vram_req_8bit = int(info["vram(8bit)"]) + guard_vram
77
+ vram_req_4bit = int(info["vram(4bit)"]) + guard_vram
78
+
79
+ load_mode_list = []
80
+
81
+ return (
82
+ gr.update(visible=False),
83
+ gr.update(visible=True),
84
+ info["thumb"],
85
+ f"## {btn}",
86
+ f"**Parameters**\n: Approx. {info['parameters']}",
87
+ f"**🤗 Hub(base)**\n: {info['hub(base)']}",
88
+ f"**🤗 Hub(LoRA)**\n: {info['hub(ckpt)']}",
89
+ info['desc'],
90
+ f"""**Min VRAM requirements** :
91
+ | half precision | load_in_8bit | load_in_4bit |
92
+ | ------------------------------------- | ---------------------------------- | ---------------------------------- |
93
+ | {round(vram_req_full/1024., 1)}GiB | {round(vram_req_8bit/1024., 1)}GiB | {round(vram_req_4bit/1024., 1)}GiB |
94
+ """,
95
+ info['default_gen_config'],
96
+ info['example1'],
97
+ info['example2'],
98
+ info['example3'],
99
+ info['example4'],
100
+ "",
101
+ )
102
+
103
+
104
+ def move_to_first_view():
105
+ return (
106
+ gr.update(visible=True),
107
+ gr.update(visible=False),
108
+ ""
109
+ )
110
+
111
+
112
+ def get_model_num(
113
+ model_name
114
+ ):
115
+ model_num = 0
116
+ re_tag = re.compile(r'<[^>]+>')
117
+ model_name = re_tag.sub('', model_name).strip()
118
+ print(model_name)
119
+
120
+ for idx, item in enumerate(global_vars.models):
121
+ if item["model_name"] == model_name:
122
+ model_num = idx
123
+ print(idx)
124
+ break
125
+
126
+ return "Download completed!", model_num
127
+
128
+ def move_to_third_view(model_num):
129
+ gen_config = global_vars.models[model_num]["gen_config"]
130
+
131
+ return (
132
+ "Preparation done!",
133
+ gr.update(visible=False),
134
+ gr.update(visible=True),
135
+ gr.update(label=global_vars.models[model_num]["model_type"]),
136
+ {
137
+ "ppmanager_type": global_vars.models[model_num]["chat_manager"],
138
+ "model_type": global_vars.models[model_num]["model_type"],
139
+ },
140
+ get_global_context(global_vars.models[model_num]["model_type"]),
141
+ gen_config.temperature,
142
+ gen_config.top_p,
143
+ gen_config.top_k,
144
+ gen_config.repetition_penalty,
145
+ gen_config.max_new_tokens,
146
+ gen_config.num_beams,
147
+ gen_config.use_cache,
148
+ gen_config.do_sample,
149
+ gen_config.eos_token_id,
150
+ gen_config.pad_token_id,
151
+ )
152
+
153
+
154
+ def toggle_inspector(view_selector):
155
+ if view_selector == "with context inspector":
156
+ return gr.update(visible=True)
157
+ else:
158
+ return gr.update(visible=False)
159
+
160
+
161
+ def reset_chat(idx, ld, state):
162
+ res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
163
+ res[idx].pingpongs = []
164
+
165
+ return (
166
+ "",
167
+ [],
168
+ str(res),
169
+ gr.update(visible=True),
170
+ gr.update(interactive=False),
171
+ )
172
+
173
+ def rollback_last(idx, ld, state):
174
+ res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
175
+ last_user_message = res[idx].pingpongs[-1].ping
176
+ res[idx].pingpongs = res[idx].pingpongs[:-1]
177
+
178
+ return (
179
+ last_user_message,
180
+ res[idx].build_uis(),
181
+ str(res),
182
+ gr.update(interactive=False)
183
+ )
184
+
185
+ with gr.Blocks(css=MODEL_SELECTION_CSS, theme='gradio/soft') as demo:
186
+ with gr.Column() as model_choice_view:
187
+ gr.Markdown("# Choose a Model", elem_classes=["center"])
188
+
189
+ with gr.Row(elem_id="container"):
190
+ with gr.Column():
191
+ gr.Markdown("""This application is built and provided for anyone who wants to try out open source Large Language Models for free. All the provided models are pre-downloaded and pre-loaded to maximize your experience. This application is hosted on [jarvislabs.ai](https://jarvislabs.ai/) with 3 x A6000 VM instance. This demo will be hosted until 13/07/2023, but you can run the same application on [jarvislabs.ai](https://jarvislabs.ai/) with arbitrary GPU options of your choice. Also, if you can run the same application on your own environment, be sure to check out the [project repository](https://github.com/deep-diver/LLM-As-Chatbot) for any further information.
192
+
193
+ From this page, choose a model that you would like to try out. By selecting a model, you will see more detailed description of the model in a separate page. Also note that this page will appear whenever you refresh your browser tab. """)
194
+ with gr.Row(elem_classes=["sub-container"]):
195
+ # with gr.Column(min_width=20):
196
+ # llama_deus_7b = gr.Button("llama-deus-7b", elem_id="llama-deus-7b", elem_classes=["square"])
197
+ # gr.Markdown("LLaMA Deus", elem_classes=["center"])
198
+
199
+ with gr.Column(min_width=20):
200
+ baize_7b = gr.Button("baize-7b", elem_id="baize-7b", elem_classes=["square"])
201
+ gr.Markdown("Baize", elem_classes=["center"])
202
+
203
+ # with gr.Column(min_width=20):
204
+ # koalpaca = gr.Button("koalpaca", elem_id="koalpaca", elem_classes=["square"])
205
+ # gr.Markdown("koalpaca", elem_classes=["center"])
206
+
207
+ # with gr.Column(min_width=20):
208
+ # evolinstruct_vicuna_13b = gr.Button("evolinstruct-vicuna-13b", elem_id="evolinstruct-vicuna-13b", elem_classes=["square"])
209
+ # gr.Markdown("EvolInstruct Vicuna", elem_classes=["center"])
210
+
211
+ with gr.Column(min_width=20):
212
+ guanaco_7b = gr.Button("guanaco-7b", elem_id="guanaco-7b", elem_classes=["square"])
213
+ gr.Markdown("Guanaco", elem_classes=["center"])
214
+
215
+ # with gr.Column(min_width=20):
216
+ # nous_hermes_13b = gr.Button("nous-hermes-13b", elem_id="nous-hermes-13b", elem_classes=["square"])
217
+ # gr.Markdown("Nous Hermes", elem_classes=["center"])
218
+
219
+ progress_view = gr.Textbox(label="Progress")
220
+
221
+ with gr.Column(visible=False) as model_review_view:
222
+ gr.Markdown("# Confirm the chosen model", elem_classes=["center"])
223
+
224
+ with gr.Column(elem_id="container2"):
225
+ gr.Markdown("""The model is pre-downloaded and pre-loaded for your convenience in this demo application, so you don't need to worry about the `VRAM requirements`. It is there just as a reference. Also, proper `GenerationConfig` is selected and fixed, but you can adjust some of the hyper-parameters once you enter the chatting mode.
226
+
227
+ Before deciding which model to use, you can expand `Example showcases` to see some of the recorded example pairs of question and answer. It will help you understanding better which model suits you well. Then, click `Confirm` button to enter the chatting mode. If you click `Back` button or refresh the browser tab, the model selection page will appear.
228
+ """)
229
+
230
+ with gr.Row():
231
+ model_image = gr.Image(None, interactive=False, show_label=False)
232
+ with gr.Column():
233
+ model_name = gr.Markdown("**Model name**")
234
+ model_desc = gr.Markdown("...")
235
+ model_params = gr.Markdown("Parameters\n: ...")
236
+ model_base = gr.Markdown("🤗 Hub(base)\n: ...")
237
+ model_ckpt = gr.Markdown("🤗 Hub(LoRA)\n: ...")
238
+ model_vram = gr.Markdown(f"""**Minimal VRAM requirement** :
239
+ | half precision | load_in_8bit | load_in_4bit |
240
+ | ------------------------------ | ------------------------- | ------------------------- |
241
+ | {round(7830/1024., 1)}GiB | {round(5224/1024., 1)}GiB | {round(4324/1024., 1)}GiB |
242
+ """)
243
+ model_thumbnail_tiny = gr.Textbox("", visible=False)
244
+
245
+ with gr.Column():
246
+ gen_config_path = gr.Dropdown(
247
+ response_configs,
248
+ value=response_configs[0],
249
+ interactive=False,
250
+ label="Gen Config(response)",
251
+ )
252
+
253
+ with gr.Accordion("Example showcases", open=False):
254
+ with gr.Tab("Ex1"):
255
+ example_showcase1 = gr.Chatbot(
256
+ [("hello", "world"), ("damn", "good")]
257
+ )
258
+ with gr.Tab("Ex2"):
259
+ example_showcase2 = gr.Chatbot(
260
+ [("hello", "world"), ("damn", "good")]
261
+ )
262
+ with gr.Tab("Ex3"):
263
+ example_showcase3 = gr.Chatbot(
264
+ [("hello", "world"), ("damn", "good")]
265
+ )
266
+ with gr.Tab("Ex4"):
267
+ example_showcase4 = gr.Chatbot(
268
+ [("hello", "world"), ("damn", "good")]
269
+ )
270
+
271
+ with gr.Row():
272
+ back_to_model_choose_btn = gr.Button("Back")
273
+ confirm_btn = gr.Button("Confirm")
274
+
275
+ with gr.Column(elem_classes=["progress-view"]):
276
+ txt_view = gr.Textbox(label="Status")
277
+ progress_view2 = gr.Textbox(label="Progress")
278
+
279
+ with gr.Column(visible=False) as chat_view:
280
+ idx = gr.State(0)
281
+ model_num = gr.State(0)
282
+ chat_state = gr.State()
283
+ local_data = gr.JSON({}, visible=False)
284
+
285
+ gr.Markdown("# Chatting", elem_classes=["center"])
286
+ gr.Markdown("""This entire application is built on top of `Gradio`. You can select one of the 10 channels on the left side to start chatting with the model. The model type you chose appear as a label on the top left corner of the chat component as well. Furthermore, you will see which model has responded to your question in each turn with their unique icons. This is because you can go back and forth to select different models from time to time, and you can continue your conversation with different models. With models' icons, you will understand how the conversation has gone better.""")
287
+
288
+ with gr.Row():
289
+ with gr.Column(scale=1, min_width=180):
290
+ gr.Markdown("GradioChat", elem_id="left-top")
291
+
292
+ with gr.Column(elem_id="left-pane"):
293
+ chat_back_btn = gr.Button("Back", elem_id="chat-back-btn")
294
+
295
+ with gr.Accordion("Histories", elem_id="chat-history-accordion"):
296
+ channel_btns.append(gr.Button(channels[0], elem_classes=["custom-btn-highlight"]))
297
+
298
+ for channel in channels[1:]:
299
+ channel_btns.append(gr.Button(channel, elem_classes=["custom-btn"]))
300
+
301
+ with gr.Column(scale=8, elem_id="right-pane"):
302
+ with gr.Column(
303
+ elem_id="initial-popup", visible=False
304
+ ) as example_block:
305
+ with gr.Row(scale=1):
306
+ with gr.Column(elem_id="initial-popup-left-pane"):
307
+ gr.Markdown("GradioChat", elem_id="initial-popup-title")
308
+ gr.Markdown(
309
+ "Making the community's best AI chat models available to everyone."
310
+ )
311
+ with gr.Column(elem_id="initial-popup-right-pane"):
312
+ gr.Markdown(
313
+ "Chat UI is now open sourced on Hugging Face Hub"
314
+ )
315
+ gr.Markdown(
316
+ "check out the [↗ repository](https://huggingface.co/spaces/chansung/test-multi-conv)"
317
+ )
318
+
319
+ with gr.Column(scale=1):
320
+ gr.Markdown("Examples")
321
+ with gr.Row():
322
+ for example in examples:
323
+ ex_btns.append(gr.Button(example, elem_classes=["example-btn"]))
324
+
325
+ with gr.Column(elem_id="aux-btns-popup", visible=True):
326
+ with gr.Row():
327
+ stop = gr.Button("Stop", elem_classes=["aux-btn"], interactive=False)
328
+ regenerate = gr.Button("Rege", interactive=False, elem_classes=["aux-btn"])
329
+ clean = gr.Button("Clean", elem_classes=["aux-btn"])
330
+
331
+ with gr.Accordion("Context Inspector", elem_id="aux-viewer", open=False):
332
+ context_inspector = gr.Textbox(
333
+ "",
334
+ elem_id="aux-viewer-inspector",
335
+ label="",
336
+ lines=30,
337
+ max_lines=50,
338
+ )
339
+
340
+ chatbot = gr.Chatbot(elem_id='chatbot')
341
+ instruction_txtbox = gr.Textbox(
342
+ placeholder="Ask anything", label="",
343
+ elem_id="prompt-txt"
344
+ )
345
+
346
+ with gr.Accordion("Control Panel", open=False) as control_panel:
347
+ with gr.Column():
348
+ with gr.Column():
349
+ gr.Markdown("#### Global context")
350
+ with gr.Accordion("global context will persist during conversation, and it is placed at the top of the prompt", open=False):
351
+ global_context = gr.Textbox(
352
+ "global context",
353
+ lines=5,
354
+ max_lines=10,
355
+ interactive=True,
356
+ elem_id="global-context"
357
+ )
358
+
359
+ gr.Markdown("#### GenConfig for **response** text generation")
360
+ with gr.Row():
361
+ res_temp = gr.Slider(0.0, 2.0, 0, step=0.1, label="temp", interactive=True)
362
+ res_topp = gr.Slider(0.0, 2.0, 0, step=0.1, label="top_p", interactive=True)
363
+ res_topk = gr.Slider(20, 1000, 0, step=1, label="top_k", interactive=True)
364
+ res_rpen = gr.Slider(0.0, 2.0, 0, step=0.1, label="rep_penalty", interactive=True)
365
+ res_mnts = gr.Slider(64, 2048, 0, step=1, label="new_tokens", interactive=True)
366
+ res_beams = gr.Slider(1, 4, 0, step=1, label="beams")
367
+ res_cache = gr.Radio([True, False], value=0, label="cache", interactive=True)
368
+ res_sample = gr.Radio([True, False], value=0, label="sample", interactive=True)
369
+ res_eosid = gr.Number(value=0, visible=False, precision=0)
370
+ res_padid = gr.Number(value=0, visible=False, precision=0)
371
+
372
+ with gr.Column(visible=False):
373
+ gr.Markdown("#### GenConfig for **summary** text generation")
374
+ with gr.Row():
375
+ sum_temp = gr.Slider(0.0, 2.0, 0, step=0.1, label="temp", interactive=True)
376
+ sum_topp = gr.Slider(0.0, 2.0, 0, step=0.1, label="top_p", interactive=True)
377
+ sum_topk = gr.Slider(20, 1000, 0, step=1, label="top_k", interactive=True)
378
+ sum_rpen = gr.Slider(0.0, 2.0, 0, step=0.1, label="rep_penalty", interactive=True)
379
+ sum_mnts = gr.Slider(64, 2048, 0, step=1, label="new_tokens", interactive=True)
380
+ sum_beams = gr.Slider(1, 8, 0, step=1, label="beams", interactive=True)
381
+ sum_cache = gr.Radio([True, False], value=0, label="cache", interactive=True)
382
+ sum_sample = gr.Radio([True, False], value=0, label="sample", interactive=True)
383
+ sum_eosid = gr.Number(value=0, visible=False, precision=0)
384
+ sum_padid = gr.Number(value=0, visible=False, precision=0)
385
+
386
+ with gr.Column():
387
+ gr.Markdown("#### Context managements")
388
+ with gr.Row():
389
+ ctx_num_lconv = gr.Slider(2, 10, 3, step=1, label="number of recent talks to keep", interactive=True)
390
+ ctx_sum_prompt = gr.Textbox(
391
+ "summarize our conversations. what have we discussed about so far?",
392
+ label="design a prompt to summarize the conversations",
393
+ visible=False
394
+ )
395
+
396
+ gr.Markdown("""The control panel on the bottom side allows you to adjust three major hyper-parameters. First, you can set the global context of the conversation. Appropriate global context that is recommended by each model's authors is provided by default, but you can set it as you like. Second, you can adjust some of the hyper-parameters of the `GenerationConfig` to decide how you want the model to generate text. `Temperature`, `Top K`, and `New Max Tokens` are some of the available ones. Third, you can adjust the number of recent talks to keep track of. With bigger number, the model will see more of the past conversations.
397
+
398
+ Lastly, there is a hidden panel on the top right corner, and it will appear when you hover your mouse around it. When expanding the panel, it shows what the model actually sees. That is you can double check how the entire prompt is constructed and fed into the model at each conversation.
399
+ """)
400
+
401
+ btns = [
402
+ baize_7b, guanaco_7b #nous_hermes_13b, evolinstruct_vicuna_13b, guanaco_13b
403
+ # baize_7b, evolinstruct_vicuna_13b, guanaco_13b, nous_hermes_13b
404
+ # llama_deus_7b, koalpaca, evolinstruct_vicuna_13b, baize_7b, guanaco_33b,
405
+ ]
406
+ for btn in btns:
407
+ btn.click(
408
+ move_to_second_view,
409
+ btn,
410
+ [
411
+ model_choice_view, model_review_view,
412
+ model_image, model_name, model_params, model_base, model_ckpt,
413
+ model_desc, model_vram, gen_config_path,
414
+ example_showcase1, example_showcase2, example_showcase3, example_showcase4,
415
+ progress_view
416
+ ]
417
+ )
418
+
419
+ back_to_model_choose_btn.click(
420
+ move_to_first_view,
421
+ None,
422
+ [model_choice_view, model_review_view, progress_view2]
423
+ )
424
+
425
+ confirm_btn.click(
426
+ get_model_num,
427
+ [model_name],
428
+ [progress_view2, model_num]
429
+ ).then(
430
+ move_to_third_view,
431
+ model_num,
432
+ [progress_view2, model_review_view, chat_view, chatbot, chat_state, global_context,
433
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid]
434
+ )
435
+
436
+ for btn in channel_btns:
437
+ btn.click(
438
+ set_chatbot,
439
+ [btn, local_data, chat_state],
440
+ [chatbot, idx, example_block, regenerate]
441
+ ).then(
442
+ None, btn, None,
443
+ _js=UPDATE_LEFT_BTNS_STATE
444
+ )
445
+
446
+ for btn in ex_btns:
447
+ btn.click(
448
+ set_example,
449
+ [btn],
450
+ [instruction_txtbox, example_block]
451
+ )
452
+
453
+ instruction_txtbox.submit(
454
+ lambda: [
455
+ gr.update(visible=False),
456
+ gr.update(interactive=True)
457
+ ],
458
+ None,
459
+ [example_block, regenerate]
460
+ ).then(
461
+ central.chat_stream,
462
+ [idx, local_data, instruction_txtbox, chat_state, model_num,
463
+ global_context, ctx_num_lconv, ctx_sum_prompt,
464
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid],
465
+ [instruction_txtbox, chatbot, context_inspector, local_data],
466
+ ).then(
467
+ None, local_data, None,
468
+ _js="(v)=>{ setStorage('local_data',v) }"
469
+ )
470
+
471
+ regenerate.click(
472
+ rollback_last,
473
+ [idx, local_data, chat_state],
474
+ [instruction_txtbox, chatbot, local_data, regenerate]
475
+ ).then(
476
+ central.chat_stream,
477
+ [idx, local_data, instruction_txtbox, chat_state, model_num,
478
+ global_context, ctx_num_lconv, ctx_sum_prompt,
479
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid],
480
+ [instruction_txtbox, chatbot, context_inspector, local_data],
481
+ ).then(
482
+ lambda: gr.update(interactive=True),
483
+ None,
484
+ regenerate
485
+ ).then(
486
+ None, local_data, None,
487
+ _js="(v)=>{ setStorage('local_data',v) }"
488
+ )
489
+
490
+ # stop.click(
491
+ # None, None, None,
492
+ # cancels=[send_event]
493
+ # )
494
+
495
+ clean.click(
496
+ reset_chat,
497
+ [idx, local_data, chat_state],
498
+ [instruction_txtbox, chatbot, local_data, example_block, regenerate]
499
+ ).then(
500
+ None, local_data, None,
501
+ _js="(v)=>{ setStorage('local_data',v) }"
502
+ )
503
+
504
+ chat_back_btn.click(
505
+ lambda: [gr.update(visible=False), gr.update(visible=True)],
506
+ None,
507
+ [chat_view, model_choice_view]
508
+ )
509
+
510
+ demo.load(
511
+ None,
512
+ inputs=None,
513
+ outputs=[chatbot, local_data],
514
+ _js=GET_LOCAL_STORAGE,
515
+ )
516
+
517
+ demo.queue(
518
+ concurrency_count=5,
519
+ max_size=256,
520
+ ).launch(
521
+ server_port=6006,
522
+ server_name="0.0.0.0",
523
+ debug=True,
524
+ )
args.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ def parse_args():
4
+ parser = argparse.ArgumentParser(
5
+ description="Gradio Application for LLM as a chatbot service"
6
+ )
7
+ parser.add_argument(
8
+ "--base-url",
9
+ help="Hugging Face Hub URL",
10
+ default="elinas/llama-7b-hf-transformers-4.29",
11
+ type=str,
12
+ )
13
+ parser.add_argument(
14
+ "--ft-ckpt-url",
15
+ help="Hugging Face Hub URL",
16
+ # default="tloen/alpaca-lora-7b",
17
+ default="LLMs/Alpaca-LoRA-7B-elina",
18
+ type=str,
19
+ )
20
+ parser.add_argument(
21
+ "--port",
22
+ help="PORT number where the app is served",
23
+ default=6006,
24
+ type=int,
25
+ )
26
+ parser.add_argument(
27
+ "--share",
28
+ help="Create and share temporary endpoint (useful in Colab env)",
29
+ action='store_true'
30
+ )
31
+ parser.add_argument(
32
+ "--gen-config-path",
33
+ help="path to GenerationConfig file",
34
+ default="configs/response_configs/default.yaml",
35
+ # default="configs/gen_config_koalpaca.yaml",
36
+ # default="configs/gen_config_stablelm.yaml",
37
+ type=str
38
+ )
39
+ parser.add_argument(
40
+ "--gen-config-summarization-path",
41
+ help="path to GenerationConfig file used in context summarization",
42
+ default="configs/summarization_configs/default.yaml",
43
+ type=str
44
+ )
45
+ parser.add_argument(
46
+ "--multi-gpu",
47
+ help="Enable multi gpu mode. This will force not to use Int8 but float16, so you need to check if your system has enough GPU memory",
48
+ action='store_true'
49
+ )
50
+ parser.add_argument(
51
+ "--force-download_ckpt",
52
+ help="Force to download ckpt instead of using cached one",
53
+ action="store_true"
54
+ )
55
+ parser.add_argument(
56
+ "--chat-only-mode",
57
+ help="Only show chatting window. Otherwise, other components will be appeared for more sophisticated control",
58
+ action="store_true"
59
+ )
60
+
61
+ return parser.parse_args()
channels.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ 1st Channel
2
+ 2nd Channel
3
+ 3rd Channel
4
+ 4th Channel
5
+ 5th Channel
6
+ 6th Channel
7
+ 7th Channel
8
+ 8th Channel
9
+ 9th Channel
10
+ 10th Channel
chats/__init__.py ADDED
File without changes
chats/alpaca.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
28
+ count = 0
29
+
30
+ for new_text in streamer:
31
+ if count == 0:
32
+ ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
33
+ count = count + 1
34
+
35
+ ppmanager.append_pong(new_text)
36
+ yield ppmanager, ppmanager.build_uis()
37
+
38
+ yield ppmanager, ppmanager.build_uis()
39
+
40
+ def summarize(
41
+ ppmanager, prompt_to_summarize, win_size,
42
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
43
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
44
+ ):
45
+ ctx = ppmanager.ctx
46
+ last_pong = ppmanager.pingpongs[-1].pong
47
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
48
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
49
+
50
+ _, gen_config_summarization = pre.build_gen_config(
51
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
52
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
53
+ )
54
+ summarize_output = get_output_batch(
55
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
56
+ )[0].split("### Response:")[-1].strip()
57
+ ppmanager.ctx = summarize_output
58
+ ppmanager.pop_pingpong()
59
+ return ppmanager
60
+
61
+ def chat_stream(
62
+ idx, local_data, user_message, state, model_num,
63
+ global_context, ctx_num_lconv, ctx_sum_prompt,
64
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
65
+ ):
66
+ res = [
67
+ state["ppmanager_type"].from_json(json.dumps(ppm))
68
+ for ppm in local_data
69
+ ]
70
+
71
+ ppm = res[idx]
72
+
73
+ # add_ping returns a prompt structured in Alpaca form
74
+ ppm.add_pingpong(
75
+ PingPong(user_message, "")
76
+ )
77
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
78
+
79
+ # prepare text generating streamer & start generating
80
+ gen_kwargs, streamer = pre.build(
81
+ prompt, model_num,
82
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
83
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
84
+ return_token_type_ids=False
85
+ )
86
+ pre.start_gen(gen_kwargs, model_num)
87
+
88
+ model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
89
+ model_type = global_vars.models[model_num]["model_type"]
90
+ for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
91
+ yield "", uis, prompt, str(res)
92
+
93
+ ppm = post.strip_pong(ppm)
94
+ yield "", ppm.build_uis(), prompt, str(res)
95
+
96
+ # summarization
97
+ # ppm.add_pingpong(
98
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
99
+ # )
100
+ # yield "", ppm.build_uis(), prompt, state
101
+ # ppm.pop_pingpong()
102
+
103
+ # ppm = summarize(
104
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
105
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
106
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
107
+ # )
108
+ yield "", ppm.build_uis(), prompt, str(res)
chats/alpaca_gpt4.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer):
28
+ for new_text in streamer:
29
+ ppmanager.append_pong(new_text)
30
+ yield ppmanager, ppmanager.build_uis()
31
+
32
+ yield ppmanager, ppmanager.build_uis()
33
+
34
+ def summarize(
35
+ ppmanager, prompt_to_summarize, win_size,
36
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
37
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
38
+ ):
39
+ ctx = ppmanager.ctx
40
+ last_pong = ppmanager.pingpongs[-1].pong
41
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
42
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
43
+
44
+ _, gen_config_summarization = pre.build_gen_config(
45
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
46
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
47
+ )
48
+ summarize_output = get_output_batch(
49
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
50
+ )[0].split("### Response:")[-1].strip()
51
+ ppmanager.ctx = summarize_output
52
+ ppmanager.pop_pingpong()
53
+ return ppmanager
54
+
55
+ def chat_stream(
56
+ idx, local_data, user_message, state, model_num,
57
+ global_context, ctx_num_lconv, ctx_sum_prompt,
58
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
59
+ ):
60
+ res = [
61
+ state["ppmanager_type"].from_json(json.dumps(ppm))
62
+ for ppm in local_data
63
+ ]
64
+
65
+ ppm = res[idx]
66
+
67
+ # add_ping returns a prompt structured in Alpaca form
68
+ ppm.add_pingpong(
69
+ PingPong(user_message, "")
70
+ )
71
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
72
+
73
+ # prepare text generating streamer & start generating
74
+ gen_kwargs, streamer = pre.build(
75
+ prompt,
76
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
77
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
78
+ return_token_type_ids=False
79
+ )
80
+ pre.start_gen(gen_kwargs)
81
+
82
+ # handling stream
83
+ for ppmanager, uis in text_stream(ppm, streamer):
84
+ yield "", uis, prompt, str(res)
85
+
86
+ ppm = post.strip_pong(ppm)
87
+ yield "", ppm.build_uis(), prompt, str(res)
88
+
89
+ # summarization
90
+ # ppm.add_pingpong(
91
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
92
+ # )
93
+ # yield "", ppm.build_uis(), prompt, state
94
+ # ppm.pop_pingpong()
95
+
96
+ # ppm = summarize(
97
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
98
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
99
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
100
+ # )
101
+ yield "", ppm.build_uis(), prompt, str(res)
chats/alpacoom.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer):
28
+ for new_text in streamer:
29
+ ppmanager.append_pong(new_text)
30
+ yield ppmanager, ppmanager.build_uis()
31
+
32
+ yield ppmanager, ppmanager.build_uis()
33
+
34
+ def summarize(
35
+ ppmanager, prompt_to_summarize, win_size,
36
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
37
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
38
+ ):
39
+ ctx = ppmanager.ctx
40
+ last_pong = ppmanager.pingpongs[-1].pong
41
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
42
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
43
+
44
+ _, gen_config_summarization = pre.build_gen_config(
45
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
46
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
47
+ )
48
+ summarize_output = get_output_batch(
49
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
50
+ )[0].split("### Response:")[-1].strip()
51
+ ppmanager.ctx = summarize_output
52
+ ppmanager.pop_pingpong()
53
+ return ppmanager
54
+
55
+ def chat_stream(
56
+ idx, local_data, user_message, state, model_num,
57
+ global_context, ctx_num_lconv, ctx_sum_prompt,
58
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
59
+ ):
60
+ res = [
61
+ state["ppmanager_type"].from_json(json.dumps(ppm))
62
+ for ppm in local_data
63
+ ]
64
+
65
+ ppm = res[idx]
66
+
67
+ # add_ping returns a prompt structured in Alpaca form
68
+ ppm.add_pingpong(
69
+ PingPong(user_message, "")
70
+ )
71
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
72
+
73
+ # prepare text generating streamer & start generating
74
+ gen_kwargs, streamer = pre.build(
75
+ prompt,
76
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
77
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
78
+ return_token_type_ids=False
79
+ )
80
+ pre.start_gen(gen_kwargs)
81
+
82
+ # handling stream
83
+ for ppmanager, uis in text_stream(ppm, streamer):
84
+ yield "", uis, prompt, str(res)
85
+
86
+ ppm = post.strip_pong(ppm)
87
+ yield "", ppm.build_uis(), prompt, str(res)
88
+
89
+ # summarization
90
+ # ppm.add_pingpong(
91
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
92
+ # )
93
+ # yield "", ppm.build_uis(), prompt, state
94
+ # ppm.pop_pingpong()
95
+
96
+ # ppm = summarize(
97
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
98
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
99
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
100
+ # )
101
+ yield "", ppm.build_uis(), prompt, str(res)
chats/baize.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
28
+ count = 0
29
+
30
+ for new_text in streamer:
31
+ if "[|Human|]" in new_text or \
32
+ "[|AI|]" in new_text:
33
+ break
34
+
35
+ if count == 0:
36
+ ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
37
+ count = count + 1
38
+
39
+ ppmanager.append_pong(new_text)
40
+ yield ppmanager, ppmanager.build_uis()
41
+
42
+ yield ppmanager, ppmanager.build_uis()
43
+
44
+ def summarize(
45
+ ppmanager, prompt_to_summarize, win_size,
46
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
47
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
48
+ ):
49
+ ctx = ppmanager.ctx
50
+ last_pong = ppmanager.pingpongs[-1].pong
51
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
52
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
53
+
54
+ _, gen_config_summarization = pre.build_gen_config(
55
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
56
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
57
+ )
58
+ summarize_output = get_output_batch(
59
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
60
+ )[0].split("### Response:")[-1].strip()
61
+ ppmanager.ctx = summarize_output
62
+ ppmanager.pop_pingpong()
63
+ return ppmanager
64
+
65
+ def chat_stream(
66
+ idx, local_data, user_message, state, model_num,
67
+ global_context, ctx_num_lconv, ctx_sum_prompt,
68
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
69
+ ):
70
+ res = [
71
+ state["ppmanager_type"].from_json(json.dumps(ppm))
72
+ for ppm in local_data
73
+ ]
74
+
75
+ ppm = res[idx]
76
+
77
+ # add_ping returns a prompt structured in Alpaca form
78
+ ppm.add_pingpong(
79
+ PingPong(user_message, "")
80
+ )
81
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
82
+
83
+ # prepare text generating streamer & start generating
84
+ gen_kwargs, streamer = pre.build(
85
+ prompt, model_num,
86
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
87
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
88
+ return_token_type_ids=False
89
+ )
90
+ pre.start_gen(gen_kwargs, model_num)
91
+
92
+ # handling stream
93
+ model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
94
+ model_type = global_vars.models[model_num]["model_type"]
95
+ for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
96
+ yield "", uis, prompt, str(res)
97
+
98
+ ppm = post.strip_pong(ppm)
99
+ yield "", ppm.build_uis(), prompt, str(res)
100
+
101
+ # summarization
102
+ # ppm.add_pingpong(
103
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
104
+ # )
105
+ # yield "", ppm.build_uis(), prompt, state
106
+ # ppm.pop_pingpong()
107
+
108
+ # ppm = summarize(
109
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
110
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
111
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
112
+ # )
113
+ yield "", ppm.build_uis(), prompt, str(res)
chats/central.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from chats import stablelm
2
+ from chats import alpaca
3
+ from chats import koalpaca
4
+ from chats import flan_alpaca
5
+ from chats import os_stablelm
6
+ from chats import vicuna
7
+ from chats import starchat
8
+ from chats import redpajama
9
+ from chats import mpt
10
+ from chats import alpacoom
11
+ from chats import baize
12
+ from chats import guanaco
13
+
14
+ def chat_stream(
15
+ idx, local_data, user_message, state, model_num,
16
+ global_context, ctx_num_lconv, ctx_sum_prompt,
17
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
18
+ ):
19
+ model_type = state["model_type"]
20
+
21
+ if model_type == "stablelm":
22
+ cs = stablelm.chat_stream(
23
+ idx, local_data, user_message, state, model_num,
24
+ global_context, ctx_num_lconv, ctx_sum_prompt,
25
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
26
+ )
27
+
28
+ elif model_type == "baize":
29
+ cs = baize.chat_stream(
30
+ idx, local_data, user_message, state, model_num,
31
+ global_context, ctx_num_lconv, ctx_sum_prompt,
32
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
33
+ )
34
+
35
+ elif model_type == "alpaca":
36
+ cs = alpaca.chat_stream(
37
+ idx, local_data, user_message, state, model_num,
38
+ global_context, ctx_num_lconv, ctx_sum_prompt,
39
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
40
+ )
41
+
42
+ elif model_type == "alpaca-gpt4":
43
+ cs = alpaca.chat_stream(
44
+ idx, local_data, user_message, state, model_num,
45
+ global_context, ctx_num_lconv, ctx_sum_prompt,
46
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
47
+ )
48
+
49
+ elif model_type == "alpacoom":
50
+ cs = alpacoom.chat_stream(
51
+ idx, local_data, user_message, state, model_num,
52
+ global_context, ctx_num_lconv, ctx_sum_prompt,
53
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
54
+ )
55
+
56
+ elif model_type == "llama-deus":
57
+ cs = alpaca.chat_stream(
58
+ idx, local_data, user_message, state, model_num,
59
+ global_context, ctx_num_lconv, ctx_sum_prompt,
60
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
61
+ )
62
+
63
+ elif model_type == "camel":
64
+ cs = alpaca.chat_stream(
65
+ idx, local_data, user_message, state, model_num,
66
+ global_context, ctx_num_lconv, ctx_sum_prompt,
67
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
68
+ )
69
+
70
+ elif model_type == "koalpaca-polyglot":
71
+ cs = koalpaca.chat_stream(
72
+ idx, local_data, user_message, state, model_num,
73
+ global_context, ctx_num_lconv, ctx_sum_prompt,
74
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
75
+ )
76
+
77
+ elif model_type == "flan-alpaca":
78
+ cs = flan_alpaca.chat_stream(
79
+ idx, local_data, user_message, state, model_num,
80
+ global_context, ctx_num_lconv, ctx_sum_prompt,
81
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
82
+ )
83
+
84
+ elif model_type == "os-stablelm":
85
+ cs = os_stablelm.chat_stream(
86
+ idx, local_data, user_message, state, model_num,
87
+ global_context, ctx_num_lconv, ctx_sum_prompt,
88
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
89
+ )
90
+
91
+ elif model_type == "t5-vicuna":
92
+ cs = vicuna.chat_stream(
93
+ idx, local_data, user_message, state, model_num,
94
+ global_context, ctx_num_lconv, ctx_sum_prompt,
95
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
96
+ )
97
+
98
+ elif model_type == "stable-vicuna":
99
+ cs = vicuna.chat_stream(
100
+ idx, local_data, user_message, state, model_num,
101
+ global_context, ctx_num_lconv, ctx_sum_prompt,
102
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
103
+ )
104
+
105
+ elif model_type == "vicuna":
106
+ cs = vicuna.chat_stream(
107
+ idx, local_data, user_message, state, model_num,
108
+ global_context, ctx_num_lconv, ctx_sum_prompt,
109
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
110
+ )
111
+
112
+ elif model_type == "evolinstruct-vicuna":
113
+ cs = vicuna.chat_stream(
114
+ idx, local_data, user_message, state, model_num,
115
+ global_context, ctx_num_lconv, ctx_sum_prompt,
116
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
117
+ )
118
+
119
+ elif model_type == "starchat":
120
+ cs = starchat.chat_stream(
121
+ idx, local_data, user_message, state, model_num,
122
+ global_context, ctx_num_lconv, ctx_sum_prompt,
123
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
124
+ )
125
+
126
+ elif model_type == "mpt":
127
+ cs = mpt.chat_stream(
128
+ idx, local_data, user_message, state, model_num,
129
+ global_context, ctx_num_lconv, ctx_sum_prompt,
130
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
131
+ )
132
+
133
+ elif model_type == "redpajama":
134
+ cs = redpajama.chat_stream(
135
+ idx, local_data, user_message, state, model_num,
136
+ global_context, ctx_num_lconv, ctx_sum_prompt,
137
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
138
+ )
139
+
140
+ elif model_type == "guanaco":
141
+ cs = guanaco.chat_stream(
142
+ idx, local_data, user_message, state, model_num,
143
+ global_context, ctx_num_lconv, ctx_sum_prompt,
144
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
145
+ )
146
+
147
+ elif model_type == "nous-hermes":
148
+ cs = alpaca.chat_stream(
149
+ idx, local_data, user_message, state, model_num,
150
+ global_context, ctx_num_lconv, ctx_sum_prompt,
151
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
152
+ )
153
+
154
+ for idx, x in enumerate(cs):
155
+ yield x
156
+
chats/flan_alpaca.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer):
28
+ for new_text in streamer:
29
+ ppmanager.append_pong(new_text)
30
+ yield ppmanager, ppmanager.build_uis()
31
+
32
+ yield ppmanager, ppmanager.build_uis()
33
+
34
+ def summarize(
35
+ ppmanager, prompt_to_summarize, win_size,
36
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
37
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
38
+ ):
39
+ ctx = ppmanager.ctx
40
+ last_pong = ppmanager.pingpongs[-1].pong
41
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
42
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
43
+
44
+ _, gen_config_summarization = pre.build_gen_config(
45
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
46
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
47
+ )
48
+ summarize_output = get_output_batch(
49
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
50
+ )[0].split("-----")[-1].strip()
51
+ ppmanager.ctx = summarize_output
52
+ ppmanager.pop_pingpong()
53
+ return ppmanager
54
+
55
+ def chat_stream(
56
+ idx, local_data, user_message, state, model_num,
57
+ global_context, ctx_num_lconv, ctx_sum_prompt,
58
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
59
+ ):
60
+ res = [
61
+ state["ppmanager_type"].from_json(json.dumps(ppm))
62
+ for ppm in local_data
63
+ ]
64
+
65
+ ppm = res[idx]
66
+
67
+ # add_ping returns a prompt structured in Alpaca form
68
+ ppm.add_pingpong(
69
+ PingPong(user_message, "")
70
+ )
71
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
72
+
73
+ # prepare text generating streamer & start generating
74
+ gen_kwargs, streamer = pre.build(
75
+ prompt,
76
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
77
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
78
+ return_token_type_ids=False
79
+ )
80
+ pre.start_gen(gen_kwargs)
81
+
82
+ # handling stream
83
+ for ppmanager, uis in text_stream(ppm, streamer):
84
+ yield "", uis, prompt, str(res)
85
+
86
+ ppm = post.strip_pong(ppm)
87
+ yield "", ppm.build_uis(), prompt, str(res)
88
+
89
+ # summarization
90
+ # ppm.add_pingpong(
91
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
92
+ # )
93
+ # yield "", ppm.build_uis(), prompt, state
94
+ # ppm.pop_pingpong()
95
+
96
+ # ppm = summarize(
97
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
98
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
99
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
100
+ # )
101
+ yield "", ppm.build_uis(), prompt, str(res)
chats/guanaco.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import StoppingCriteria, StoppingCriteriaList
3
+
4
+ import copy
5
+ import json
6
+ import global_vars
7
+ from chats import pre, post
8
+ from pingpong import PingPong
9
+ from gens.batch_gen import get_output_batch
10
+
11
+ from pingpong.context import CtxLastWindowStrategy
12
+
13
+ class StopOnTokens(StoppingCriteria):
14
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
15
+ stop_token_ids = [0]
16
+
17
+ for stop_id in stop_token_ids:
18
+ if input_ids[0][-1] == stop_id:
19
+ return True
20
+ return False
21
+
22
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
23
+ dummy_ppm = copy.deepcopy(ppmanager)
24
+
25
+ dummy_ppm.ctx = global_context
26
+ for pingpong in dummy_ppm.pingpongs:
27
+ pong = pingpong.pong
28
+ first_sentence = pong.split("\n")[0]
29
+ if first_sentence != "" and \
30
+ pre.contains_image_markdown(first_sentence):
31
+ pong = ' '.join(pong.split("\n")[1:]).strip()
32
+ pingpong.pong = pong
33
+
34
+ lws = CtxLastWindowStrategy(win_size)
35
+
36
+ prompt = lws(dummy_ppm)
37
+ return prompt
38
+
39
+ def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
40
+ count = 0
41
+
42
+ for new_text in streamer:
43
+ if count == 0:
44
+ ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
45
+ count = count + 1
46
+
47
+ ppmanager.append_pong(new_text)
48
+ yield ppmanager, ppmanager.build_uis()
49
+
50
+ yield ppmanager, ppmanager.build_uis()
51
+
52
+ def summarize(
53
+ ppmanager, prompt_to_summarize, win_size,
54
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
55
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
56
+ ):
57
+ ctx = ppmanager.ctx
58
+ last_pong = ppmanager.pingpongs[-1].pong
59
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
60
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
61
+
62
+ _, gen_config_summarization = pre.build_gen_config(
63
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
64
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
65
+ )
66
+ summarize_output = get_output_batch(
67
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
68
+ )[0].split(prompt_to_summarize)[-1].strip()
69
+ ppmanager.ctx = summarize_output
70
+ ppmanager.pop_pingpong()
71
+ return ppmanager
72
+
73
+ def chat_stream(
74
+ idx, local_data, user_message, state, model_num,
75
+ global_context, ctx_num_lconv, ctx_sum_prompt,
76
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
77
+ ):
78
+ res = [
79
+ state["ppmanager_type"].from_json(json.dumps(ppm))
80
+ for ppm in local_data
81
+ ]
82
+
83
+ ppm = res[idx]
84
+
85
+ # add_ping returns a prompt structured in Alpaca form
86
+ ppm.add_pingpong(
87
+ PingPong(user_message, "")
88
+ )
89
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
90
+
91
+ # prepare text generating streamer & start generating
92
+ gen_kwargs, streamer = pre.build(
93
+ prompt, model_num,
94
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
95
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
96
+ StoppingCriteriaList([StopOnTokens()]), False
97
+ )
98
+ pre.start_gen(gen_kwargs, model_num)
99
+
100
+ model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
101
+ model_type = global_vars.models[model_num]["model_type"]
102
+ for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
103
+ yield "", uis, prompt, str(res)
104
+
105
+ ppm = post.strip_pong(ppm)
106
+ yield "", ppm.build_uis(), prompt, str(res)
107
+
108
+ # summarization
109
+ # ppm.add_pingpong(
110
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
111
+ # )
112
+ # yield "", ppm.build_uis(), prompt, state
113
+ # ppm.pop_pingpong()
114
+
115
+ # ppm = summarize(
116
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
117
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
118
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
119
+ # )
120
+ yield "", ppm.build_uis(), prompt, str(res)
chats/koalpaca.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer):
28
+ for new_text in streamer:
29
+ ppmanager.append_pong(new_text)
30
+ yield ppmanager, ppmanager.build_uis()
31
+
32
+ yield ppmanager, ppmanager.build_uis()
33
+
34
+ def summarize(
35
+ ppmanager, prompt_to_summarize, win_size,
36
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
37
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
38
+ ):
39
+ ctx = ppmanager.ctx
40
+ last_pong = ppmanager.pingpongs[-1].pong
41
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
42
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
43
+
44
+ _, gen_config_summarization = pre.build_gen_config(
45
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
46
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
47
+ )
48
+ summarize_output = get_output_batch(
49
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
50
+ )[0].split("### 응답:")[-1].strip()
51
+ ppmanager.ctx = summarize_output
52
+ ppmanager.pop_pingpong()
53
+ return ppmanager
54
+
55
+ def chat_stream(
56
+ idx, local_data, user_message, state, model_num,
57
+ global_context, ctx_num_lconv, ctx_sum_prompt,
58
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
59
+ ):
60
+ res = [
61
+ state["ppmanager_type"].from_json(json.dumps(ppm))
62
+ for ppm in local_data
63
+ ]
64
+
65
+ ppm = res[idx]
66
+
67
+ # add_ping returns a prompt structured in Alpaca form
68
+ ppm.add_pingpong(
69
+ PingPong(user_message, "")
70
+ )
71
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
72
+
73
+ # prepare text generating streamer & start generating
74
+ gen_kwargs, streamer = pre.build(
75
+ prompt, model_num,
76
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
77
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
78
+ return_token_type_ids=False
79
+ )
80
+ pre.start_gen(gen_kwargs, model_num)
81
+
82
+ # handling stream
83
+ for ppmanager, uis in text_stream(ppm, streamer):
84
+ yield "", uis, prompt, str(res)
85
+
86
+ ppm = post.strip_pong(ppm)
87
+ yield "", ppm.build_uis(), prompt, str(res)
88
+
89
+ # summarization
90
+ # ppm.add_pingpong(
91
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
92
+ # )
93
+ # yield "", ppm.build_uis(), prompt, state
94
+ # ppm.pop_pingpong()
95
+
96
+ # ppm = summarize(
97
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
98
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
99
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
100
+ # )
101
+ yield "", ppm.build_uis(), prompt, str(res)
chats/mpt.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import StoppingCriteria, StoppingCriteriaList
3
+
4
+ import copy
5
+ import json
6
+ import global_vars
7
+ from chats import pre, post
8
+ from pingpong import PingPong
9
+ from gens.batch_gen import get_output_batch
10
+
11
+ from pingpong.context import CtxLastWindowStrategy
12
+
13
+ class StopOnTokens(StoppingCriteria):
14
+ def __init__(self, tokenizer):
15
+ super().__init__()
16
+
17
+ self.stop_token_ids = tokenizer.convert_tokens_to_ids(
18
+ ["<|im_end|>", "<|endoftext|>"]
19
+ )
20
+
21
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
22
+ for stop_id in self.stop_token_ids:
23
+ if input_ids[0][-1] == stop_id:
24
+ return True
25
+ return False
26
+
27
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
28
+ dummy_ppm = copy.deepcopy(ppmanager)
29
+
30
+ dummy_ppm.ctx = global_context
31
+ for pingpong in dummy_ppm.pingpongs:
32
+ pong = pingpong.pong
33
+ first_sentence = pong.split("\n")[0]
34
+ if first_sentence != "" and \
35
+ pre.contains_image_markdown(first_sentence):
36
+ pong = ' '.join(pong.split("\n")[1:]).strip()
37
+ pingpong.pong = pong
38
+
39
+ lws = CtxLastWindowStrategy(win_size)
40
+
41
+ prompt = lws(dummy_ppm)
42
+ return prompt
43
+
44
+ def text_stream(ppmanager, streamer):
45
+ for new_text in streamer:
46
+ ppmanager.append_pong(new_text)
47
+ yield ppmanager, ppmanager.build_uis()
48
+
49
+ yield ppmanager, ppmanager.build_uis()
50
+
51
+ def summarize(
52
+ ppmanager, prompt_to_summarize, win_size,
53
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
54
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
55
+ ):
56
+ ctx = ppmanager.ctx
57
+ last_pong = ppmanager.pingpongs[-1].pong
58
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
59
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
60
+
61
+ _, gen_config_summarization = pre.build_gen_config(
62
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
63
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
64
+ )
65
+ summarize_output = get_output_batch(
66
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
67
+ )[0].strip()
68
+ ppmanager.ctx = summarize_output
69
+ ppmanager.pop_pingpong()
70
+ return ppmanager
71
+
72
+ def chat_stream(
73
+ idx, local_data, user_message, state, model_num,
74
+ global_context, ctx_num_lconv, ctx_sum_prompt,
75
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
76
+ ):
77
+ res = [
78
+ state["ppmanager_type"].from_json(json.dumps(ppm))
79
+ for ppm in local_data
80
+ ]
81
+
82
+ ppm = res[idx]
83
+
84
+ # add_ping returns a prompt structured in Alpaca form
85
+ ppm.add_pingpong(
86
+ PingPong(user_message, "")
87
+ )
88
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
89
+
90
+ # prepare text generating streamer & start generating
91
+ gen_kwargs, streamer = pre.build(
92
+ prompt,
93
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
94
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
95
+ StoppingCriteriaList([StopOnTokens(global_vars.tokenizer)]), False
96
+ )
97
+ pre.start_gen(gen_kwargs)
98
+
99
+ # handling stream
100
+ for ppmanager, uis in text_stream(ppm, streamer):
101
+ yield "", uis, prompt, str(res)
102
+
103
+ ppm = post.strip_pong(ppm)
104
+ yield "", ppm.build_uis(), prompt, str(res)
105
+
106
+ # summarization
107
+ # ppm.add_pingpong(
108
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
109
+ # )
110
+ # yield "", ppm.build_uis(), prompt, state
111
+ # ppm.pop_pingpong()
112
+
113
+ # ppm = summarize(
114
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
115
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
116
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
117
+ # )
118
+ yield "", ppm.build_uis(), prompt, str(res)
chats/os_stablelm.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import StoppingCriteria, StoppingCriteriaList
3
+
4
+ import copy
5
+ import json
6
+ import global_vars
7
+ from chats import pre, post
8
+ from pingpong import PingPong
9
+ from gens.batch_gen import get_output_batch
10
+
11
+ from pingpong.context import CtxLastWindowStrategy
12
+
13
+ class StopOnTokens(StoppingCriteria):
14
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
15
+ stop_ids = [50278, 50279, 50277, 1, 0]
16
+ for stop_id in stop_ids:
17
+ if input_ids[0][-1] == stop_id:
18
+ return True
19
+ return False
20
+
21
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
22
+ dummy_ppm = copy.deepcopy(ppmanager)
23
+
24
+ dummy_ppm.ctx = global_context
25
+ for pingpong in dummy_ppm.pingpongs:
26
+ pong = pingpong.pong
27
+ first_sentence = pong.split("\n")[0]
28
+ if first_sentence != "" and \
29
+ pre.contains_image_markdown(first_sentence):
30
+ pong = ' '.join(pong.split("\n")[1:]).strip()
31
+ pingpong.pong = pong
32
+
33
+ lws = CtxLastWindowStrategy(win_size)
34
+
35
+ prompt = lws(dummy_ppm)
36
+ return prompt
37
+
38
+ def text_stream(ppmanager, streamer):
39
+ for new_text in streamer:
40
+ ppmanager.append_pong(new_text)
41
+ yield ppmanager, ppmanager.build_uis()
42
+
43
+ yield ppmanager, ppmanager.build_uis()
44
+
45
+ def summarize(
46
+ ppmanager, prompt_to_summarize, win_size,
47
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
48
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
49
+ ):
50
+ ctx = ppmanager.ctx
51
+ last_pong = ppmanager.pingpongs[-1].pong
52
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
53
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
54
+
55
+ _, gen_config_summarization = pre.build_gen_config(
56
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
57
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
58
+ )
59
+ summarize_output = get_output_batch(
60
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
61
+ )[0].split(prompt_to_summarize)[-1].strip()
62
+ ppmanager.ctx = summarize_output
63
+ ppmanager.pop_pingpong()
64
+ return ppmanager
65
+
66
+ def chat_stream(
67
+ idx, local_data, user_message, state, model_num,
68
+ global_context, ctx_num_lconv, ctx_sum_prompt,
69
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
70
+ ):
71
+ res = [
72
+ state["ppmanager_type"].from_json(json.dumps(ppm))
73
+ for ppm in local_data
74
+ ]
75
+
76
+ ppm = res[idx]
77
+
78
+ # add_ping returns a prompt structured in Alpaca form
79
+ ppm.add_pingpong(
80
+ PingPong(user_message, "")
81
+ )
82
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
83
+
84
+ # prepare text generating streamer & start generating
85
+ gen_kwargs, streamer = pre.build(
86
+ prompt,
87
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
88
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
89
+ StoppingCriteriaList([StopOnTokens()]), False
90
+ )
91
+ pre.start_gen(gen_kwargs)
92
+
93
+ # handling stream
94
+ for ppmanager, uis in text_stream(ppm, streamer):
95
+ yield "", uis, prompt, str(res)
96
+
97
+ ppm = post.strip_pong(ppm)
98
+ yield "", ppm.build_uis(), prompt, str(res)
99
+
100
+ # summarization
101
+ # ppm.add_pingpong(
102
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
103
+ # )
104
+ # yield "", ppm.build_uis(), prompt, state
105
+ # ppm.pop_pingpong()
106
+
107
+ # ppm = summarize(
108
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
109
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
110
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
111
+ # )
112
+ yield "", ppm.build_uis(), prompt, str(res)
chats/post.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def strip_pong(ppmanager):
2
+ ppmanager.pingpongs[-1].pong = ppmanager.pingpongs[-1].pong.strip()
3
+ return ppmanager
chats/pre.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import copy
3
+ import global_vars
4
+ from threading import Thread
5
+ from transformers import TextIteratorStreamer
6
+ from transformers import GenerationConfig
7
+
8
+ def contains_image_markdown(string):
9
+ regex = re.compile(r'!\[(.*?)\]\((.*?)\)')
10
+ match = regex.search(string)
11
+ return match
12
+
13
+ def build_model_inputs(prompt, model_num, return_token_type_ids):
14
+ model_inputs = global_vars.models[model_num]["tokenizer"](
15
+ [prompt],
16
+ return_tensors="pt",
17
+ return_token_type_ids=return_token_type_ids
18
+ ).to("cuda")
19
+ return model_inputs
20
+
21
+ def build_streamer(
22
+ model_num,
23
+ timeout=20.,
24
+ skip_prompt=True,
25
+ skip_special_tokens=True
26
+ ):
27
+ streamer = TextIteratorStreamer(
28
+ global_vars.models[model_num]["tokenizer"],
29
+ timeout=timeout,
30
+ skip_prompt=skip_prompt,
31
+ skip_special_tokens=skip_special_tokens
32
+ )
33
+ return streamer
34
+
35
+
36
+ def build_gen_config(
37
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
38
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
39
+ ):
40
+ gen_config_raw = {
41
+ "temperature": temperature,
42
+ "top_p": top_p,
43
+ "top_k": top_k,
44
+ "repetition_penalty": repetition_penalty,
45
+ "max_new_tokens": max_new_tokens,
46
+ "num_beams": num_beams,
47
+ "use_cache": use_cache,
48
+ "do_sample": do_sample,
49
+ "eos_token_id": eos_token_id,
50
+ "pad_token_id": pad_token_id
51
+ }
52
+
53
+ return gen_config_raw, GenerationConfig(**gen_config_raw)
54
+
55
+ def build_gen_kwargs(
56
+ gen_config,
57
+ model_inputs,
58
+ streamer,
59
+ stopping_criteria
60
+ ):
61
+ gen_kwargs = dict(
62
+ model_inputs,
63
+ streamer=streamer,
64
+ stopping_criteria=stopping_criteria
65
+ )
66
+ gen_kwargs.update(gen_config)
67
+ return gen_kwargs
68
+
69
+ def start_gen(gen_kwargs, model_num):
70
+ t = Thread(
71
+ target=global_vars.models[model_num]["model"].generate,
72
+ kwargs=gen_kwargs
73
+ )
74
+ t.start()
75
+
76
+ def build(
77
+ prompt, model_num,
78
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
79
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id,
80
+ stopping_criteria=None, return_token_type_ids=True
81
+ ):
82
+ gen_config_raw, _ = build_gen_config(
83
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
84
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
85
+ )
86
+
87
+ model_inputs = build_model_inputs(
88
+ prompt, model_num, return_token_type_ids=return_token_type_ids
89
+ )
90
+ streamer = build_streamer(model_num)
91
+ gen_kwargs = build_gen_kwargs(
92
+ gen_config_raw,
93
+ model_inputs,
94
+ streamer,
95
+ stopping_criteria
96
+ )
97
+ return gen_kwargs, streamer
chats/redpajama.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer):
28
+ for new_text in streamer:
29
+ ppmanager.append_pong(new_text)
30
+ yield ppmanager, ppmanager.build_uis()
31
+
32
+ yield ppmanager, ppmanager.build_uis()
33
+
34
+ def summarize(
35
+ ppmanager, prompt_to_summarize, win_size,
36
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
37
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
38
+ ):
39
+ ctx = ppmanager.ctx
40
+ last_pong = ppmanager.pingpongs[-1].pong
41
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
42
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
43
+
44
+ _, gen_config_summarization = pre.build_gen_config(
45
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
46
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
47
+ )
48
+ summarize_output = get_output_batch(
49
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
50
+ )[0].strip()
51
+ ppmanager.ctx = summarize_output
52
+ ppmanager.pop_pingpong()
53
+ return ppmanager
54
+
55
+ def chat_stream(
56
+ idx, local_data, user_message, state, model_num,
57
+ global_context, ctx_num_lconv, ctx_sum_prompt,
58
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
59
+ ):
60
+ res = [
61
+ state["ppmanager_type"].from_json(json.dumps(ppm))
62
+ for ppm in local_data
63
+ ]
64
+
65
+ ppm = res[idx]
66
+
67
+ # add_ping returns a prompt structured in Alpaca form
68
+ ppm.add_pingpong(
69
+ PingPong(user_message, "")
70
+ )
71
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
72
+
73
+ # prepare text generating streamer & start generating
74
+ gen_kwargs, streamer = pre.build(
75
+ prompt,
76
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
77
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
78
+ return_token_type_ids=False
79
+ )
80
+ pre.start_gen(gen_kwargs)
81
+
82
+ # handling stream
83
+ for ppmanager, uis in text_stream(ppm, streamer):
84
+ yield "", uis, prompt, str(res)
85
+
86
+ ppm = post.strip_pong(ppm)
87
+ yield "", ppm.build_uis(), prompt, str(res)
88
+
89
+ # summarization
90
+ # ppm.add_pingpong(
91
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
92
+ # )
93
+ # yield "", ppm.build_uis(), prompt, state
94
+ # ppm.pop_pingpong()
95
+
96
+ # ppm = summarize(
97
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
98
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
99
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
100
+ # )
101
+ yield "", ppm.build_uis(), prompt, str(res)
chats/stablelm.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import StoppingCriteria, StoppingCriteriaList
3
+
4
+ import copy
5
+ import json
6
+ import global_vars
7
+ from chats import pre, post
8
+ from pingpong import PingPong
9
+ from gens.batch_gen import get_output_batch
10
+
11
+ from pingpong.context import CtxLastWindowStrategy
12
+
13
+ class StopOnTokens(StoppingCriteria):
14
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
15
+ stop_ids = [50278, 50279, 50277, 1, 0]
16
+ for stop_id in stop_ids:
17
+ if input_ids[0][-1] == stop_id:
18
+ return True
19
+ return False
20
+
21
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
22
+ dummy_ppm = copy.deepcopy(ppmanager)
23
+
24
+ dummy_ppm.ctx = global_context
25
+ for pingpong in dummy_ppm.pingpongs:
26
+ pong = pingpong.pong
27
+ first_sentence = pong.split("\n")[0]
28
+ if first_sentence != "" and \
29
+ pre.contains_image_markdown(first_sentence):
30
+ pong = ' '.join(pong.split("\n")[1:]).strip()
31
+ pingpong.pong = pong
32
+
33
+ lws = CtxLastWindowStrategy(win_size)
34
+
35
+ prompt = lws(dummy_ppm)
36
+ return prompt
37
+
38
+ def text_stream(ppmanager, streamer):
39
+ for new_text in streamer:
40
+ ppmanager.append_pong(new_text)
41
+ yield ppmanager, ppmanager.build_uis()
42
+
43
+ yield ppmanager, ppmanager.build_uis()
44
+
45
+ def summarize(
46
+ ppmanager, prompt_to_summarize, win_size,
47
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
48
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
49
+ ):
50
+ ctx = ppmanager.ctx
51
+ last_pong = ppmanager.pingpongs[-1].pong
52
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
53
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
54
+
55
+ _, gen_config_summarization = pre.build_gen_config(
56
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
57
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
58
+ )
59
+ summarize_output = get_output_batch(
60
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
61
+ )[0].split(prompt_to_summarize)[-1].strip()
62
+ ppmanager.ctx = summarize_output
63
+ ppmanager.pop_pingpong()
64
+ return ppmanager
65
+
66
+ def chat_stream(
67
+ idx, local_data, user_message, state, model_num,
68
+ global_context, ctx_num_lconv, ctx_sum_prompt,
69
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
70
+ ):
71
+ res = [
72
+ state["ppmanager_type"].from_json(json.dumps(ppm))
73
+ for ppm in local_data
74
+ ]
75
+
76
+ ppm = res[idx]
77
+
78
+ # add_ping returns a prompt structured in Alpaca form
79
+ ppm.add_pingpong(
80
+ PingPong(user_message, "")
81
+ )
82
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
83
+
84
+ # prepare text generating streamer & start generating
85
+ gen_kwargs, streamer = pre.build(
86
+ prompt,
87
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
88
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
89
+ StoppingCriteriaList([StopOnTokens()]), False
90
+ )
91
+ pre.start_gen(gen_kwargs)
92
+
93
+ # handling stream
94
+ for ppmanager, uis in text_stream(ppm, streamer):
95
+ yield "", uis, prompt, str(res)
96
+
97
+ ppm = post.strip_pong(ppm)
98
+ yield "", ppm.build_uis(), prompt, str(res)
99
+
100
+ # summarization
101
+ # ppm.add_pingpong(
102
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
103
+ # )
104
+ # yield "", ppm.build_uis(), prompt, state
105
+ # ppm.pop_pingpong()
106
+
107
+ # ppm = summarize(
108
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
109
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
110
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
111
+ # )
112
+ yield "", ppm.build_uis(), prompt, str(res)
chats/starchat.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import StoppingCriteria, StoppingCriteriaList
3
+
4
+ import copy
5
+ import json
6
+ import global_vars
7
+ from chats import pre, post
8
+ from pingpong import PingPong
9
+ from gens.batch_gen import get_output_batch
10
+
11
+ from pingpong.context import CtxLastWindowStrategy
12
+
13
+ class StopOnTokens(StoppingCriteria):
14
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
15
+ stop_ids = [49155, 1, 0]
16
+ for stop_id in stop_ids:
17
+ if input_ids[0][-1] == stop_id:
18
+ return True
19
+ return False
20
+
21
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
22
+ dummy_ppm = copy.deepcopy(ppmanager)
23
+
24
+ dummy_ppm.ctx = global_context
25
+ for pingpong in dummy_ppm.pingpongs:
26
+ pong = pingpong.pong
27
+ first_sentence = pong.split("\n")[0]
28
+ if first_sentence != "" and \
29
+ pre.contains_image_markdown(first_sentence):
30
+ pong = ' '.join(pong.split("\n")[1:]).strip()
31
+ pingpong.pong = pong
32
+
33
+ lws = CtxLastWindowStrategy(win_size)
34
+
35
+ prompt = lws(dummy_ppm)
36
+ return prompt
37
+
38
+ def text_stream(ppmanager, streamer):
39
+ for new_text in streamer:
40
+ ppmanager.append_pong(new_text)
41
+ yield ppmanager, ppmanager.build_uis()
42
+
43
+ yield ppmanager, ppmanager.build_uis()
44
+
45
+ def summarize(
46
+ ppmanager, prompt_to_summarize, win_size,
47
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
48
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
49
+ ):
50
+ ctx = ppmanager.ctx
51
+ last_pong = ppmanager.pingpongs[-1].pong
52
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
53
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
54
+
55
+ _, gen_config_summarization = pre.build_gen_config(
56
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
57
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
58
+ )
59
+ summarize_output = get_output_batch(
60
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
61
+ )[0].strip()
62
+ ppmanager.ctx = summarize_output
63
+ ppmanager.pop_pingpong()
64
+ return ppmanager
65
+
66
+ def chat_stream(
67
+ idx, local_data, user_message, state, model_num,
68
+ global_context, ctx_num_lconv, ctx_sum_prompt,
69
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
70
+ ):
71
+ res = [
72
+ state["ppmanager_type"].from_json(json.dumps(ppm))
73
+ for ppm in local_data
74
+ ]
75
+
76
+ ppm = res[idx]
77
+
78
+ # add_ping returns a prompt structured in Alpaca form
79
+ ppm.add_pingpong(
80
+ PingPong(user_message, "")
81
+ )
82
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
83
+
84
+ # prepare text generating streamer & start generating
85
+ gen_kwargs, streamer = pre.build(
86
+ prompt,
87
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
88
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
89
+ StoppingCriteriaList([StopOnTokens()]), False
90
+ )
91
+ pre.start_gen(gen_kwargs)
92
+
93
+ # handling stream
94
+ for ppmanager, uis in text_stream(ppm, streamer):
95
+ yield "", uis, prompt, str(res)
96
+
97
+ ppm = post.strip_pong(ppm)
98
+ yield "", ppm.build_uis(), prompt, str(res)
99
+
100
+ # summarization
101
+ # ppm.add_pingpong(
102
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
103
+ # )
104
+ # yield "", ppm.build_uis(), prompt, state
105
+ # ppm.pop_pingpong()
106
+
107
+ # ppm = summarize(
108
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
109
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
110
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
111
+ # )
112
+ yield "", ppm.build_uis(), prompt, str(res)
chats/vicuna.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import global_vars
4
+ from chats import pre, post
5
+ from pingpong import PingPong
6
+ from gens.batch_gen import get_output_batch
7
+
8
+ from pingpong.context import CtxLastWindowStrategy
9
+
10
+ def build_prompts(ppmanager, user_message, global_context, win_size=3):
11
+ dummy_ppm = copy.deepcopy(ppmanager)
12
+
13
+ dummy_ppm.ctx = global_context
14
+ for pingpong in dummy_ppm.pingpongs:
15
+ pong = pingpong.pong
16
+ first_sentence = pong.split("\n")[0]
17
+ if first_sentence != "" and \
18
+ pre.contains_image_markdown(first_sentence):
19
+ pong = ' '.join(pong.split("\n")[1:]).strip()
20
+ pingpong.pong = pong
21
+
22
+ lws = CtxLastWindowStrategy(win_size)
23
+
24
+ prompt = lws(dummy_ppm)
25
+ return prompt
26
+
27
+ def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
28
+ count = 0
29
+
30
+ for new_text in streamer:
31
+ if count == 0:
32
+ ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
33
+ count = count + 1
34
+
35
+ ppmanager.append_pong(new_text)
36
+ yield ppmanager, ppmanager.build_uis()
37
+
38
+ yield ppmanager, ppmanager.build_uis()
39
+
40
+ def summarize(
41
+ ppmanager, prompt_to_summarize, win_size,
42
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
43
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
44
+ ):
45
+ ctx = ppmanager.ctx
46
+ last_pong = ppmanager.pingpongs[-1].pong
47
+ ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
48
+ prompt = ppmanager.build_prompts(from_idx=-win_size)
49
+
50
+ _, gen_config_summarization = pre.build_gen_config(
51
+ temperature, top_p, top_k, repetition_penalty, max_new_tokens,
52
+ num_beams, use_cache, do_sample, eos_token_id, pad_token_id
53
+ )
54
+ summarize_output = get_output_batch(
55
+ global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
56
+ )[0].strip()
57
+ ppmanager.ctx = summarize_output
58
+ ppmanager.pop_pingpong()
59
+ return ppmanager
60
+
61
+ def chat_stream(
62
+ idx, local_data, user_message, state, model_num,
63
+ global_context, ctx_num_lconv, ctx_sum_prompt,
64
+ res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
65
+ ):
66
+ res = [
67
+ state["ppmanager_type"].from_json(json.dumps(ppm))
68
+ for ppm in local_data
69
+ ]
70
+
71
+ ppm = res[idx]
72
+
73
+ # add_ping returns a prompt structured in Alpaca form
74
+ ppm.add_pingpong(
75
+ PingPong(user_message, "")
76
+ )
77
+ prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
78
+
79
+ # prepare text generating streamer & start generating
80
+ gen_kwargs, streamer = pre.build(
81
+ prompt, model_num,
82
+ res_temp, res_topp, res_topk, res_rpen, res_mnts,
83
+ res_beams, res_cache, res_sample, res_eosid, res_padid,
84
+ return_token_type_ids=False
85
+ )
86
+ pre.start_gen(gen_kwargs, model_num)
87
+
88
+ # handling stream
89
+ model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
90
+ model_type = global_vars.models[model_num]["model_type"]
91
+ for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
92
+ yield "", uis, prompt, str(res)
93
+
94
+ ppm = post.strip_pong(ppm)
95
+ yield "", ppm.build_uis(), prompt, str(res)
96
+
97
+ # summarization
98
+ # ppm.add_pingpong(
99
+ # PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
100
+ # )
101
+ # yield "", ppm.build_uis(), prompt, state
102
+ # ppm.pop_pingpong()
103
+
104
+ # ppm = summarize(
105
+ # ppm, ctx_sum_prompt, ctx_num_lconv,
106
+ # sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
107
+ # sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
108
+ # )
109
+ yield "", ppm.build_uis(), prompt, str(res)
configs/constraints_config.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ constraints:
2
+ max_context: 1000
3
+ max_prompt: 300
4
+ max_conv_len: 1500
configs/response_configs/baize.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 1024
9
+ do_sample: True
10
+ bos_token_id: 0
11
+ eos_token_id: 1
12
+ pad_token_id: 0
configs/response_configs/camel.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 1024
9
+ do_sample: True
10
+ pad_token_id: 50257
11
+ eos_token_id: 50256
configs/response_configs/default.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 1024
9
+ do_sample: True
configs/response_configs/flan.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.8
3
+ top_p: 0.95
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: False
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 256
9
+ do_sample: True
configs/response_configs/gpt4_alpaca.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 512
9
+ do_sample: True
configs/response_configs/guanaco.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 1024
9
+ do_sample: True
configs/response_configs/koalpaca.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 1024
9
+ do_sample: True
10
+ eos_token_id: 2
11
+ pad_token_id: 2
configs/response_configs/redpajama.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 1.0
3
+ top_p: 0.9
4
+ top_k: 1000
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 512
9
+ do_sample: True
10
+ eos_token_id: 0
11
+ pad_token_id: 1
configs/response_configs/stablelm.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 1.0
3
+ top_p: 0.9
4
+ top_k: 1000
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 512
9
+ do_sample: True
10
+ eos_token_id: 0
11
+ pad_token_id: 1
configs/response_configs/stackllama.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.9
3
+ top_p: 0.95
4
+ # top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 256
9
+ do_sample: True
10
+ early_stopping: True
configs/response_configs/starchat.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.5
3
+ top_p: 0.95
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 1024
9
+ do_sample: True
10
+ eos_token_id: 0
11
+ bos_token_id: 0
12
+ pad_token_id: 0
configs/response_configs/t5_vicuna.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 2048
9
+ do_sample: True
configs/summarization_configs/camel.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 1
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ max_new_tokens: 1024
8
+ do_sample: True
9
+ repetition_penalty: 1.5
10
+ pad_token_id: 50257
11
+ eos_token_id: 50256
configs/summarization_configs/default.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 1
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ max_new_tokens: 1024
8
+ do_sample: True
9
+ repetition_penalty: 1.5
10
+
11
+
configs/summarization_configs/koalpaca.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 1
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ do_sample: True
8
+ repetition_penalty: 1.2
9
+ max_new_tokens: 512
10
+ eos_token_id: 2
11
+ pad_token_id: 2
configs/summarization_configs/redpajama.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 1.0
3
+ top_p: 0.9
4
+ top_k: 1000
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 512
9
+ do_sample: True
10
+ eos_token_id: 0
11
+ pad_token_id: 1
configs/summarization_configs/stablelm.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 1
3
+ top_p: 0.9
4
+ top_k: 1000
5
+ num_beams: 1
6
+ use_cache: True
7
+ do_sample: True
8
+ repetition_penalty: 1.2
9
+ max_new_tokens: 512
10
+ eos_token_id: 0
11
+ pad_token_id: 1
configs/summarization_configs/t5_vicuna.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ generation_config:
2
+ temperature: 0.95
3
+ top_p: 0.9
4
+ top_k: 50
5
+ num_beams: 1
6
+ use_cache: True
7
+ repetition_penalty: 1.2
8
+ max_new_tokens: 2048
9
+ do_sample: True
examples.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Tell me about GPT
2
+ Write a Python program to print Fibonacci numbers
gens/__init__.py ADDED
File without changes
gens/batch_gen.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ def get_output_batch(
4
+ model, tokenizer, prompts, generation_config, device='cuda'
5
+ ):
6
+ if len(prompts) == 1:
7
+ encoding = tokenizer(prompts, return_tensors="pt")
8
+ input_ids = encoding["input_ids"].to(device)
9
+ generated_id = model.generate(
10
+ input_ids=input_ids,
11
+ generation_config=generation_config,
12
+ )
13
+
14
+ decoded = tokenizer.batch_decode(
15
+ generated_id, skip_prompt=True, skip_special_tokens=True
16
+ )
17
+ del input_ids, generated_id
18
+ torch.cuda.empty_cache()
19
+ return decoded
20
+ else:
21
+ encodings = tokenizer(prompts, padding=True, return_tensors="pt").to(device)
22
+ generated_ids = model.generate(
23
+ **encodings,
24
+ generation_config=generation_config,
25
+ )
26
+
27
+ decoded = tokenizer.batch_decode(
28
+ generated_ids, skip_prompt=True, skip_special_tokens=True
29
+ )
30
+ del encodings, generated_ids
31
+ torch.cuda.empty_cache()
32
+ return decoded
global_vars.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import yaml
3
+ import json
4
+ import torch
5
+ from transformers import GenerationConfig
6
+ from models import alpaca, stablelm, koalpaca, flan_alpaca, mpt
7
+ from models import camel, t5_vicuna, vicuna, starchat, redpajama, bloom
8
+ from models import baize, guanaco, falcon, kullm, replit, airoboros
9
+ from models import samantha_vicuna
10
+
11
+ from utils import get_chat_interface, get_chat_manager
12
+
13
+ model_infos = json.load(open("model_cards.json"))
14
+
15
+ def get_model_type(model_info):
16
+ base_url = model_info["hub(base)"]
17
+ ft_ckpt_url = model_info["hub(ckpt)"]
18
+
19
+ model_type_tmp = "alpaca"
20
+ if "llms/wizardlm" in base_url.lower():
21
+ model_type_tmp = "wizardlm"
22
+ elif "chronos" in base_url.lower():
23
+ model_type_tmp = "chronos"
24
+ elif "lazarus" in base_url.lower():
25
+ model_type_tmp = "lazarus"
26
+ elif "samantha" in base_url.lower():
27
+ model_type_tmp = "samantha-vicuna"
28
+ elif "airoboros" in base_url.lower():
29
+ model_type_tmp = "airoboros"
30
+ elif "replit" in base_url.lower():
31
+ model_type_tmp = "replit-instruct"
32
+ elif "kullm" in base_url.lower():
33
+ model_type_tmp = "kullm-polyglot"
34
+ elif "nous-hermes" in base_url.lower():
35
+ model_type_tmp = "nous-hermes"
36
+ elif "guanaco" in base_url.lower():
37
+ model_type_tmp = "guanaco"
38
+ elif "wizardlm-uncensored-falcon" in base_url.lower():
39
+ model_type_tmp = "wizard-falcon"
40
+ elif "falcon" in base_url.lower():
41
+ model_type_tmp = "falcon"
42
+ elif "baize" in base_url.lower():
43
+ model_type_tmp = "baize"
44
+ elif "stable-vicuna" in base_url.lower():
45
+ model_type_tmp = "stable-vicuna"
46
+ elif "vicuna" in base_url.lower():
47
+ model_type_tmp = "vicuna"
48
+ elif "mpt" in base_url.lower():
49
+ model_type_tmp = "mpt"
50
+ elif "redpajama-incite-7b-instruct" in base_url.lower():
51
+ model_type_tmp = "redpajama-instruct"
52
+ elif "redpajama" in base_url.lower():
53
+ model_type_tmp = "redpajama"
54
+ elif "starchat" in base_url.lower():
55
+ model_type_tmp = "starchat"
56
+ elif "camel" in base_url.lower():
57
+ model_type_tmp = "camel"
58
+ elif "flan-alpaca" in base_url.lower():
59
+ model_type_tmp = "flan-alpaca"
60
+ elif "openassistant/stablelm" in base_url.lower():
61
+ model_type_tmp = "os-stablelm"
62
+ elif "stablelm" in base_url.lower():
63
+ model_type_tmp = "stablelm"
64
+ elif "fastchat-t5" in base_url.lower():
65
+ model_type_tmp = "t5-vicuna"
66
+ elif "koalpaca-polyglot" in base_url.lower():
67
+ model_type_tmp = "koalpaca-polyglot"
68
+ elif "alpacagpt4" in ft_ckpt_url.lower():
69
+ model_type_tmp = "alpaca-gpt4"
70
+ elif "alpaca" in ft_ckpt_url.lower():
71
+ model_type_tmp = "alpaca"
72
+ elif "llama-deus" in ft_ckpt_url.lower():
73
+ model_type_tmp = "llama-deus"
74
+ elif "vicuna-lora-evolinstruct" in ft_ckpt_url.lower():
75
+ model_type_tmp = "evolinstruct-vicuna"
76
+ elif "alpacoom" in ft_ckpt_url.lower():
77
+ model_type_tmp = "alpacoom"
78
+ elif "guanaco" in ft_ckpt_url.lower():
79
+ model_type_tmp = "guanaco"
80
+ else:
81
+ print("unsupported model type")
82
+
83
+ return model_type_tmp
84
+
85
+ def initialize_globals():
86
+ global models, tokenizers
87
+
88
+ models = []
89
+ model_names = [
90
+ "baize-7b",
91
+ # "evolinstruct-vicuna-13b",
92
+ "guanaco-7b",
93
+ # "nous-hermes-13b"
94
+ ]
95
+ for model_name in model_names:
96
+ model_info = model_infos[model_name]
97
+ model_thumbnail_tiny = model_info["thumb-tiny"]
98
+ model_type = get_model_type(model_info)
99
+ print(model_type)
100
+ load_model = get_load_model(model_type)
101
+
102
+ model, tokenizer = load_model(
103
+ base=model_info["hub(base)"],
104
+ finetuned=model_info["hub(ckpt)"],
105
+ mode_cpu=False,
106
+ mode_mps=False,
107
+ mode_full_gpu=True,
108
+ mode_8bit=False,
109
+ mode_4bit=False,
110
+ force_download_ckpt=False
111
+ )
112
+
113
+ gen_config, gen_config_raw = get_generation_config(
114
+ model_info["default_gen_config"]
115
+ )
116
+
117
+ models.append(
118
+ {
119
+ "model_name": model_name,
120
+ "model_thumb_tiny": model_thumbnail_tiny,
121
+ "model_type": model_type,
122
+ "model": model,
123
+ "tokenizer": tokenizer,
124
+ "gen_config": gen_config,
125
+ "gen_config_raw": gen_config_raw,
126
+ "chat_interface": get_chat_interface(model_type),
127
+ "chat_manager": get_chat_manager(model_type),
128
+ }
129
+ )
130
+
131
+ def get_load_model(model_type):
132
+ if model_type == "alpaca" or \
133
+ model_type == "alpaca-gpt4" or \
134
+ model_type == "llama-deus" or \
135
+ model_type == "nous-hermes" or \
136
+ model_type == "lazarus" or \
137
+ model_type == "chronos" or \
138
+ model_type == "wizardlm":
139
+ return alpaca.load_model
140
+ elif model_type == "stablelm" or model_type == "os-stablelm":
141
+ return stablelm.load_model
142
+ elif model_type == "koalpaca-polyglot":
143
+ return koalpaca.load_model
144
+ elif model_type == "kullm-polyglot":
145
+ return kullm.load_model
146
+ elif model_type == "flan-alpaca":
147
+ return flan_alpaca.load_model
148
+ elif model_type == "camel":
149
+ return camel.load_model
150
+ elif model_type == "t5-vicuna":
151
+ return t5_vicuna.load_model
152
+ elif model_type == "stable-vicuna":
153
+ return vicuna.load_model
154
+ elif model_type == "starchat":
155
+ return starchat.load_model
156
+ elif model_type == "mpt":
157
+ return mpt.load_model
158
+ elif model_type == "redpajama" or \
159
+ model_type == "redpajama-instruct":
160
+ return redpajama.load_model
161
+ elif model_type == "vicuna":
162
+ return vicuna.load_model
163
+ elif model_type == "evolinstruct-vicuna":
164
+ return alpaca.load_model
165
+ elif model_type == "alpacoom":
166
+ return bloom.load_model
167
+ elif model_type == "baize":
168
+ return baize.load_model
169
+ elif model_type == "guanaco":
170
+ return guanaco.load_model
171
+ elif model_type == "falcon" or model_type == "wizard-falcon":
172
+ return falcon.load_model
173
+ elif model_type == "replit-instruct":
174
+ return replit.load_model
175
+ elif model_type == "airoboros":
176
+ return airoboros.load_model
177
+ elif model_type == "samantha-vicuna":
178
+ return samantha_vicuna.load_model
179
+ else:
180
+ return None
181
+
182
+ def get_generation_config(path):
183
+ with open(path, 'rb') as f:
184
+ generation_config = yaml.safe_load(f.read())
185
+
186
+ generation_config = generation_config["generation_config"]
187
+
188
+ return GenerationConfig(**generation_config), generation_config
189
+
190
+ def get_constraints_config(path):
191
+ with open(path, 'rb') as f:
192
+ constraints_config = yaml.safe_load(f.read())
193
+
194
+ return ConstraintsConfig(**constraints_config), constraints_config["constraints"]
miscs/__init__.py ADDED
File without changes
miscs/js.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GET_LOCAL_STORAGE = """
2
+ function() {
3
+ globalThis.setStorage = (key, value)=>{
4
+ localStorage.setItem(key, JSON.stringify(value));
5
+ }
6
+ globalThis.getStorage = (key, value)=>{
7
+ return JSON.parse(localStorage.getItem(key));
8
+ }
9
+
10
+ var local_data = getStorage('local_data');
11
+ var history = [];
12
+
13
+ if(local_data) {
14
+ local_data[0].pingpongs.forEach(element =>{
15
+ history.push([element.ping, element.pong]);
16
+ });
17
+ }
18
+ else {
19
+ local_data = [];
20
+ for (let step = 0; step < 10; step++) {
21
+ local_data.push({'ctx': '', 'pingpongs':[]});
22
+ }
23
+ setStorage('local_data', local_data);
24
+ }
25
+
26
+ if(history.length == 0) {
27
+ document.querySelector("#initial-popup").classList.remove('hide');
28
+ }
29
+
30
+ return [history, local_data];
31
+ }
32
+ """
33
+
34
+ UPDATE_LEFT_BTNS_STATE = """
35
+ (v)=>{
36
+ document.querySelector('.custom-btn-highlight').classList.add('custom-btn');
37
+ document.querySelector('.custom-btn-highlight').classList.remove('custom-btn-highlight');
38
+
39
+ const elements = document.querySelectorAll(".custom-btn");
40
+
41
+ for(var i=0; i < elements.length; i++) {
42
+ const element = elements[i];
43
+ if(element.textContent == v) {
44
+ console.log(v);
45
+ element.classList.add('custom-btn-highlight');
46
+ element.classList.remove('custom-btn');
47
+ break;
48
+ }
49
+ }
50
+ }"""
miscs/strings.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TITLE = "Alpaca-LoRA Playground"
2
+
3
+ ABSTRACT = """
4
+ Thanks to [tolen](https://github.com/tloen/alpaca-lora), this application runs Alpaca-LoRA which is instruction fine-tuned version of [LLaMA](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/). This demo currently runs 30B version on a 3*A6000 instance at [Jarvislabs.ai](https://jarvislabs.ai/).
5
+
6
+ NOTE: too long input (context, instruction) will not be allowed. Please keep context < 500 and instruction < 150
7
+ """
8
+
9
+ BOTTOM_LINE = """
10
+ This demo application runs the open source project, [Alpaca-LoRA-Serve](https://github.com/deep-diver/Alpaca-LoRA-Serve). By default, it runs with streaming mode, but you can also run with dynamic batch generation model. Please visit the repo, find more information, and contribute if you can.
11
+
12
+ Alpaca-LoRA is built on the same concept as Standford Alpaca project, but it lets us train and inference on a smaller GPUs such as RTX4090 for 7B version. Also, we could build very small size of checkpoints on top of base models thanks to [🤗 transformers](https://huggingface.co/docs/transformers/index), [🤗 peft](https://github.com/huggingface/peft), and [bitsandbytes](https://github.com/TimDettmers/bitsandbytes/tree/main) libraries.
13
+
14
+ We are thankful to the [Jarvislabs.ai](https://jarvislabs.ai/) who generously provided free GPU instances.
15
+ """
16
+
17
+ DEFAULT_EXAMPLES = {
18
+ "Typical Questions": [
19
+ {
20
+ "title": "List all Canadian provinces in alphabetical order.",
21
+ "examples": [
22
+ ["1", "List all Canadian provinces in alphabetical order."],
23
+ ["2", "Which ones are on the east side?"],
24
+ ["3", "What foods are famous in each province on the east side?"],
25
+ ["4", "What about sightseeing? or landmarks? list one per province"],
26
+ ],
27
+ },
28
+ {
29
+ "title": "Tell me about Alpacas.",
30
+ "examples": [
31
+ ["1", "Tell me about alpacas in two sentences"],
32
+ ["2", "What other animals are living in the same area?"],
33
+ ["3", "Are they the same species?"],
34
+ ["4", "Write a Python program to return those species"],
35
+ ],
36
+ },
37
+ {
38
+ "title": "Tell me about the king of France in 2019.",
39
+ "examples": [
40
+ ["1", "Tell me about the king of France in 2019."],
41
+ ["2", "What about before him?"],
42
+ ]
43
+ },
44
+ {
45
+ "title": "Write a Python program that prints the first 10 Fibonacci numbers.",
46
+ "examples": [
47
+ ["1", "Write a Python program that prints the first 10 Fibonacci numbers."],
48
+ ["2", "Could you explain how the code works?"],
49
+ ["3", "What is recursion?"],
50
+ ]
51
+ }
52
+ ],
53
+ "Identity": [
54
+ {
55
+ "title": "Conversation with the planet Pluto",
56
+ "examples": [
57
+ ["1", "Conversation with the planet Pluto", "I'am so curious about you"],
58
+ ["2", "Conversation with the planet Pluto", "Tell me what I would see if I visited"],
59
+ ["3", "Conversation with the planet Pluto", "It sounds beautiful"],
60
+ ["4", "Conversation with the planet Pluto", "I'll keep that in mind. Hey I was wondering have you ever had any visitor?"],
61
+ ["5", "Conversation with the planet Pluto", "That must have been exciting"],
62
+ ["6", "Conversation with the planet Pluto", "That's so great. What else do you wish people knew about you?"],
63
+ ["7", "Conversation with the planet Pluto", "Thanks for talking with me"],
64
+ ]
65
+ },
66
+ {
67
+ "title": "Conversation with a paper airplane",
68
+ "examples": [
69
+ ["1", "Conversation with a paper airplane", "What's it like being thrown through the air"],
70
+ ["2", "Conversation with a paper airplane", "What's the worst place you've ever landed"],
71
+ ["3", "Conversation with a paper airplane", "Have you ever stucked?"],
72
+ ["4", "Conversation with a paper airplane", "What's the secret to a really good paper airplane?"],
73
+ ["5", "Conversation with a paper airplane", "What's the farthest you've ever flown?"],
74
+ ["6", "Conversation with a paper airplane", "Good to talk to you!"]
75
+ ]
76
+ }
77
+ ]
78
+ }
79
+
80
+ SPECIAL_STRS = {
81
+ "continue": "continue.",
82
+ "summarize": "what have we discussed so far? describe in the user's view and include important entities. also be brief as much as possible."
83
+ }
miscs/styles.py ADDED
@@ -0,0 +1,727 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PARENT_BLOCK_CSS = """
2
+ #col-container {
3
+ width: 95%;
4
+ height: 100%;
5
+ margin-left: auto;
6
+ margin-right: auto;
7
+ }
8
+
9
+ #chatbot {
10
+ height: 800px;
11
+ overflow: auto;
12
+ }
13
+
14
+ #chatbot > .wrap {
15
+ max-height: 780px;
16
+ }
17
+ """
18
+
19
+ MODEL_SELECTION_CSS = """
20
+
21
+ .message {
22
+ margin: 0px !important;
23
+ }
24
+
25
+ .load-mode-selector:nth-child(3) {
26
+ margin: auto !important;
27
+ text-align: center !important;
28
+ width: fit-content !important;
29
+ }
30
+
31
+ code {
32
+ white-space: break-spaces !important;
33
+ }
34
+
35
+ .progress-view {
36
+ background: transparent !important;
37
+ border-radius: 25px !important;
38
+ }
39
+
40
+ #landing-container {
41
+ width: 85%;
42
+ margin: auto;
43
+ }
44
+
45
+ .landing-btn {
46
+ font-size: 2.3vw !important;
47
+ margin-top: 25px !important;
48
+ border-radius: 25px !important;
49
+ height: 120px !important;
50
+
51
+ @media screen and (max-width: 1000px) {
52
+ font-size: 20px !important;
53
+ }
54
+ }
55
+
56
+ #landing-bottom {
57
+ margin-top: 20px !important;
58
+ }
59
+
60
+ .custom-btn {
61
+ border: none !important;
62
+ background: none !important;
63
+ box-shadow: none !important;
64
+ display: block !important;
65
+ text-align: left !important;
66
+ }
67
+ .custom-btn:hover {
68
+ background: rgb(243 244 246) !important;
69
+ }
70
+
71
+ .custom-btn-highlight {
72
+ border: none !important;
73
+ background: rgb(243 244 246) !important;
74
+ box-shadow: none !important;
75
+ display: block !important;
76
+ text-align: left !important;
77
+
78
+ @media (prefers-color-scheme: dark) {
79
+ background-color: rgba(17,24,39,255) !important;
80
+ }
81
+ }
82
+
83
+ #prompt-txt > label > span {
84
+ display: none !important;
85
+ }
86
+ #prompt-txt > label > textarea {
87
+ border: transparent;
88
+ border-radius: 20px;
89
+ }
90
+ #chatbot {
91
+ height: 800px;
92
+ overflow: auto;
93
+ box-shadow: none !important;
94
+ border: none !important;
95
+ }
96
+ #chatbot > .wrap {
97
+ max-height: 780px;
98
+ }
99
+ #chatbot + div {
100
+ border-radius: 35px !important;
101
+ width: 80% !important;
102
+ margin: auto !important;
103
+ }
104
+
105
+ #left-pane {
106
+ background-color: #f9fafb;
107
+ border-radius: 15px;
108
+ padding: 10px;
109
+
110
+ @media (prefers-color-scheme: dark) {
111
+ background-color: rgba(31,41,55,255) !important;
112
+ }
113
+ }
114
+
115
+ #left-top {
116
+ padding-left: 10px;
117
+ padding-right: 10px;
118
+ text-align: center;
119
+ font-weight: bold;
120
+ font-size: large;
121
+ }
122
+
123
+ #chat-history-accordion {
124
+ background: transparent;
125
+ border: 0.8px !important;
126
+ }
127
+
128
+ #right-pane {
129
+ margin-left: 20px;
130
+ background: white;
131
+ border-radius: 20px;
132
+
133
+ @media (prefers-color-scheme: dark) {
134
+ background-color: rgba(31,41,55,255) !important;
135
+ }
136
+
137
+ @media screen and (max-width: 1000px) {
138
+ margin: 0px !important;
139
+ }
140
+ }
141
+
142
+ #initial-popup {
143
+ z-index: 100;
144
+ position: absolute;
145
+ width: 50%;
146
+ top: 50%;
147
+ height: 50%;
148
+ left: 50%;
149
+ transform: translate(-50%, -50%);
150
+ border-radius: 35px;
151
+ padding: 15px;
152
+ }
153
+
154
+ #initial-popup-title {
155
+ text-align: center;
156
+ font-size: 18px;
157
+ font-weight: bold;
158
+ }
159
+
160
+ #initial-popup-left-pane {
161
+ min-width: 150px !important;
162
+ }
163
+
164
+ #initial-popup-right-pane {
165
+ text-align: right;
166
+ }
167
+
168
+ .example-btn {
169
+ padding-top: 20px !important;
170
+ padding-bottom: 20px !important;
171
+ padding-left: 5px !important;
172
+ padding-right: 5px !important;
173
+ background: linear-gradient(to bottom right, #f7faff, #ffffff) !important;
174
+ box-shadow: none !important;
175
+ border-radius: 20px !important;
176
+
177
+ @media (prefers-color-scheme: dark) {
178
+ background: rgba(70,79,86,255) !important;
179
+ }
180
+ }
181
+
182
+ .example-btn:hover {
183
+ box-shadow: 0.3px 0.3px 0.3px gray !important;
184
+
185
+ @media (prefers-color-scheme: dark) {
186
+ background: rgba(34,37,42,255) !important;
187
+ }
188
+ }
189
+
190
+ .example-btn:active {
191
+ @media (prefers-color-scheme: dark) {
192
+ background: rgba(70,79,86,255) !important;
193
+ }
194
+ }
195
+
196
+ #example-title {
197
+ margin-bottom: 15px;
198
+ }
199
+
200
+ #aux-btns-popup {
201
+ z-index: 200;
202
+ position: absolute !important;
203
+ bottom: 75px !important;
204
+ right: 40px !important;
205
+ }
206
+
207
+ #aux-btns-popup > div {
208
+ flex-wrap: nowrap;
209
+ width: fit-content;
210
+ margin: auto;
211
+ }
212
+
213
+ .aux-btn {
214
+ height: 30px !important;
215
+ flex-wrap: initial !important;
216
+ flex: none !important;
217
+ min-width: min(100px,100%) !important;
218
+ font-weight: unset !important;
219
+ font-size: 10pt !important;
220
+
221
+ background: linear-gradient(to bottom right, #f7faff, #ffffff) !important;
222
+ box-shadow: none !important;
223
+ border-radius: 20px !important;
224
+
225
+ opacity: 0.5;
226
+ border-width: 0.5px;
227
+ border-color: grey;
228
+
229
+ @media (prefers-color-scheme: dark) {
230
+ opacity: 0.2 !important;
231
+ color: black !important;
232
+ }
233
+ }
234
+
235
+ .aux-btn:hover {
236
+ opacity: 1.0;
237
+ box-shadow: 0.3px 0.3px 0.3px gray !important;
238
+
239
+ @media (prefers-color-scheme: dark) {
240
+ opacity: 1.0 !important;
241
+ box-shadow: 0.3px 0.3px 0.3px gray !important;
242
+ }
243
+ }
244
+
245
+ #aux-viewer {
246
+ position: absolute !important;
247
+ border-style: solid !important;
248
+ overflow: visible !important;
249
+ border: none !important;
250
+ box-shadow: none !important;
251
+ z-index: 1000 !important;
252
+ opacity: 0.0 !important;
253
+ width: 75% !important;
254
+ right: 1px !important;
255
+ transition: all 0.5s;
256
+ }
257
+
258
+ #aux-viewer:hover {
259
+ opacity: 1.0 !important;
260
+ box-shadow: 0px 0.5px 0px 0px gray !important;
261
+ }
262
+
263
+ #aux-viewer > .label-wrap {
264
+ justify-content: end;
265
+ }
266
+
267
+ #aux-viewer > .label-wrap > span {
268
+ margin-right: 10px;
269
+ }
270
+
271
+ #aux-viewer-inspector {
272
+ padding: 0px;
273
+ }
274
+
275
+ #aux-viewer-inspector > label > span {
276
+ display: none !important;
277
+ }
278
+
279
+ #aux-viewer-inspector > label > textarea {
280
+ box-shadow: none;
281
+ border-color: transparent;
282
+ }
283
+
284
+ #global-context > label > span {
285
+ display: none !important;
286
+ }
287
+
288
+ #chat-back-btn {
289
+ background: transparent !important;
290
+ }
291
+
292
+ #chat-back-btn:hover {
293
+ @media (prefers-color-scheme: dark) {
294
+ background: rgb(75,85,99) !important;
295
+ }
296
+ }
297
+
298
+ #chat-back-btn:active {
299
+ @media (prefers-color-scheme: dark) {
300
+ background: transparent !important;
301
+ }
302
+ }
303
+
304
+ #col-container {
305
+ max-width: 70%;
306
+ height: 100%;
307
+ margin-left: auto;
308
+ margin-right: auto;
309
+ }
310
+
311
+
312
+ #container {
313
+ max-width: 70%;
314
+ margin: auto;
315
+
316
+ @media screen and (max-width: 1000px) {
317
+ max-width: 90% !important;
318
+ }
319
+ }
320
+
321
+ #container2 {
322
+ max-width: 60%;
323
+ margin: auto;
324
+ }
325
+
326
+ #container3 {
327
+ max-width: 60%;
328
+ margin: auto;
329
+ }
330
+
331
+ .square {
332
+ height: 100px;
333
+
334
+ @media (prefers-color-scheme: dark) {
335
+ background-color: rgba(70,79,86,255) !important;
336
+ }
337
+ }
338
+
339
+ .square:hover {
340
+ @media (prefers-color-scheme: dark) {
341
+ background-color: rgba(34,37,42,255) !important;
342
+ }
343
+ }
344
+
345
+ .square:active {
346
+ @media (prefers-color-scheme: dark) {
347
+ background-color: rgba(70,79,86,255) !important;
348
+ }
349
+ }
350
+
351
+ .placeholders {
352
+ min-width: max-content !important;
353
+ }
354
+
355
+ .placeholders > button {
356
+ border-color: transparent !important;
357
+ background-color: transparent !important;
358
+ box-shadow: none !important;
359
+ cursor: default !important;
360
+ }
361
+
362
+ .center {
363
+ text-align: center;
364
+ overflow: hidden;
365
+ }
366
+
367
+ #30b-placeholder1, #30b-placeholder2, #30b-placeholder3, #30b-placeholder4 {
368
+ background: red;
369
+ box-shadow: none;
370
+ pointer-events: none;
371
+ width: 100px;
372
+ height: 100px;
373
+ background: transparent !important;
374
+ border-color: transparent !important;
375
+ box-shadow: none !important;
376
+ cursor: default !important;
377
+ }
378
+
379
+ #20b-placeholder1, #20b-placeholder2, #20b-placeholder3, #20b-placeholder4 {
380
+ background: red;
381
+ box-shadow: none;
382
+ pointer-events: none;
383
+ width: 100px;
384
+ height: 100px;
385
+ margin: auto;
386
+ background: transparent !important;
387
+ border-color: transparent !important;
388
+ box-shadow: none !important;
389
+ cursor: default !important;
390
+ }
391
+
392
+ #10b-placeholder1, #10b-placeholder3, #10b-placeholder3, #10b-placeholder4 {
393
+ background: red;
394
+ box-shadow: none;
395
+ pointer-events: none;
396
+ width: 100px;
397
+ height: 100px;
398
+ margin: auto;
399
+ background: transparent !important;
400
+ border-color: transparent !important;
401
+ box-shadow: none !important;
402
+ cursor: default !important;
403
+ }
404
+
405
+ #camel-5b, #camel-20b {
406
+ background: url(https://i.ibb.co/qD5HN9T/camel-removebg-preview.png);
407
+ background-repeat: no-repeat;
408
+ background-size: 100px 100px;
409
+ color: transparent;
410
+ width: 100px;
411
+ height: 100px;
412
+ margin: auto;
413
+ }
414
+
415
+ #alpaca-lora-7b, #alpaca-lora-13b {
416
+ background: url(https://i.ibb.co/z89FTz2/alpaca-lora.png);
417
+ background-repeat: no-repeat;
418
+ background-size: 100px 100px;
419
+ color: transparent;
420
+ width: 100px;
421
+ height: 100px;
422
+ margin: auto;
423
+ }
424
+
425
+ #stablelm-7b {
426
+ background: url(https://i.ibb.co/d2pd5wk/stable-LM-cropped.png);
427
+ background-repeat: no-repeat;
428
+ background-size: 100px 100px;
429
+ color: transparent;
430
+ width: 100px;
431
+ height: 100px;
432
+ margin: auto;
433
+ }
434
+
435
+ #stackllama-7b {
436
+ background: url(https://i.ibb.co/Q9vLcYm/tuxpi-com-1682256296-removebg-preview.png);
437
+ background-repeat: no-repeat;
438
+ background-size: 100px 100px;
439
+ color: transparent;
440
+ width: 100px;
441
+ height: 100px;
442
+ margin: auto;
443
+ }
444
+
445
+ #flan-3b, #flan-11b {
446
+ background: url(https://i.ibb.co/yBTk5bv/flan.png);
447
+ background-repeat: no-repeat;
448
+ background-size: 100px 100px;
449
+ color: transparent;
450
+ width: 100px;
451
+ height: 100px;
452
+ margin: auto;
453
+ }
454
+
455
+ #koalpaca {
456
+ background: url(https://i.ibb.co/hF9NL7r/koalpaca.png);
457
+ background-repeat: no-repeat;
458
+ background-size: 100px 100px;
459
+ color: transparent;
460
+ width: 100px;
461
+ height: 100px;
462
+ margin: auto;
463
+ }
464
+
465
+ #kullm {
466
+ background: url(https://i.ibb.co/6ZFqk4J/kullm.png);
467
+ background-repeat: no-repeat;
468
+ background-size: 100px 100px;
469
+ color: transparent;
470
+ width: 100px;
471
+ height: 100px;
472
+ margin: auto;
473
+ }
474
+
475
+ #flan-3b {
476
+ background: url(https://i.ibb.co/yBTk5bv/flan.png);
477
+ background-repeat: no-repeat;
478
+ background-size: 100px 100px;
479
+ color: transparent;
480
+ width: 100px;
481
+ height: 100px;
482
+ margin: auto;
483
+ }
484
+
485
+ #os-stablelm-7b {
486
+ background: url(https://i.ibb.co/WszrtVV/stablelm-oasst1.png);
487
+ background-repeat: no-repeat;
488
+ background-size: 100px 95px;
489
+ color: transparent;
490
+ width: 100px;
491
+ height: 100px;
492
+ margin: auto;
493
+ }
494
+
495
+ #t5-vicuna-3b {
496
+ background: url(https://i.ibb.co/4W7n78b/chansung-vector-logo-of-collective-intelligence-of-cute-llamas-3ef46884-72e6-44da-b88a-e831e5fee747.png);
497
+ background-repeat: no-repeat;
498
+ background-size: 100px 95px;
499
+ color: transparent;
500
+ width: 100px;
501
+ height: 100px;
502
+ margin: auto;
503
+ }
504
+
505
+ #gpt4-alpaca-7b, #gpt4-alpaca-13b {
506
+ background: url(https://i.ibb.co/qDz3HCG/chansung-vector-logo-of-alpaca-made-out-of-machines-Side-shot-39b27595-8202-48a6-97d1-266a745b2a29-r.png);
507
+ background-repeat: no-repeat;
508
+ background-size: 100px 95px;
509
+ color: transparent;
510
+ width: 100px;
511
+ height: 100px;
512
+ margin: auto;
513
+ }
514
+
515
+ #stable-vicuna-13b {
516
+ background: url(https://i.ibb.co/b6Vv6Jh/sv.png);
517
+ background-repeat: no-repeat;
518
+ background-size: 100px 95px;
519
+ color: transparent;
520
+ width: 100px;
521
+ height: 100px;
522
+ }
523
+
524
+ #starchat-15b, #starchat-beta-15b {
525
+ background: url(https://i.ibb.co/QjPP0Vv/starcoder.png);
526
+ background-repeat: no-repeat;
527
+ background-size: 100px 95px;
528
+ color: transparent;
529
+ width: 100px;
530
+ height: 100px;
531
+ margin: auto;
532
+ }
533
+
534
+ #redpajama-7b, #redpajama-instruct-7b {
535
+ background: url(https://i.ibb.co/NNB6qPj/redpajama.png);
536
+ background-repeat: no-repeat;
537
+ background-size: 100px 95px;
538
+ color: transparent;
539
+ width: 100px;
540
+ height: 100px;
541
+ margin: auto;
542
+ }
543
+
544
+ #mpt-7b {
545
+ background: url(https://i.ibb.co/DwN44Z9/mpt.png);
546
+ background-repeat: no-repeat;
547
+ background-size: 100px 95px;
548
+ color: transparent;
549
+ width: 100px;
550
+ height: 100px;
551
+ margin: auto;
552
+ }
553
+
554
+ #vicuna-7b, #vicuna-13b {
555
+ background: url(https://i.ibb.co/vqPDrPQ/vicuna.png);
556
+ background-repeat: no-repeat;
557
+ background-size: 100px 95px;
558
+ color: transparent;
559
+ width: 100px;
560
+ height: 100px;
561
+ margin: auto;
562
+ }
563
+
564
+ #llama-deus-7b {
565
+ background: url(https://i.ibb.co/4mH9LRQ/llama-deus.png);
566
+ background-repeat: no-repeat;
567
+ background-size: 100px 95px;
568
+ color: transparent;
569
+ width: 100px;
570
+ height: 100px;
571
+ margin: auto;
572
+ }
573
+
574
+ #evolinstruct-vicuna-7b, #evolinstruct-vicuna-13b {
575
+ background: url(https://i.ibb.co/xHDRjLS/evol-vicuna.png);
576
+ background-repeat: no-repeat;
577
+ background-size: 100px 95px;
578
+ color: transparent;
579
+ width: 100px;
580
+ height: 100px;
581
+ margin: auto;
582
+ }
583
+
584
+ #alpacoom-7b {
585
+ background: url(https://huggingface.co/mrm8488/Alpacoom/resolve/main/alpacoom_logo__1___1___1_-removebg-preview.png);
586
+ background-repeat: no-repeat;
587
+ background-size: 100px 95px;
588
+ color: transparent;
589
+ width: 100px;
590
+ height: 100px;
591
+ margin: auto;
592
+ }
593
+
594
+ #baize-7b, #baize-13b {
595
+ background: url(https://i.ibb.co/j5VpHb0/baize.png);
596
+ background-repeat: no-repeat;
597
+ background-size: 100px 95px;
598
+ color: transparent;
599
+ width: 100px;
600
+ height: 100px;
601
+ margin: auto;
602
+ }
603
+
604
+ #guanaco-7b, #guanaco-13b, #guanaco-33b, #guanaco-65b {
605
+ background: url(https://i.ibb.co/DWWsZn7/guanaco.png);
606
+ background-repeat: no-repeat;
607
+ background-size: 100px 95px;
608
+ color: transparent;
609
+ width: 100px;
610
+ height: 100px;
611
+ margin: auto;
612
+ }
613
+
614
+ #falcon-7b, #falcon-40b {
615
+ background: url(https://i.ibb.co/86yNWwG/falcon.png);
616
+ background-repeat: no-repeat;
617
+ background-size: 100px 95px;
618
+ color: transparent;
619
+ width: 100px;
620
+ height: 100px;
621
+ margin: auto;
622
+ }
623
+
624
+ #wizard-falcon-7b, #wizard-falcon-40b {
625
+ background: url(https://i.ibb.co/415s0D4/wizard-falcon.png);
626
+ background-repeat: no-repeat;
627
+ background-size: 100px 95px;
628
+ color: transparent;
629
+ width: 100px;
630
+ height: 100px;
631
+ margin: auto;
632
+ }
633
+
634
+ #nous-hermes-13b {
635
+ background: url(https://i.ibb.co/sm8VgtL/nous-hermes.png);
636
+ background-repeat: no-repeat;
637
+ background-size: 100px 95px;
638
+ color: transparent;
639
+ width: 100px;
640
+ height: 100px;
641
+ margin: auto;
642
+ }
643
+
644
+ #airoboros-7b, #airoboros-13b {
645
+ background: url(https://i.ibb.co/NLchBkB/airoboros.png);
646
+ background-repeat: no-repeat;
647
+ background-size: 100px 95px;
648
+ color: transparent;
649
+ width: 100px;
650
+ height: 100px;
651
+ margin: auto;
652
+ }
653
+
654
+ #samantha-7b, #samantha-13b, #samantha-33b {
655
+ background: url(https://i.ibb.co/72t5pyP/samantha.png);
656
+ background-repeat: no-repeat;
657
+ background-size: 100px 95px;
658
+ color: transparent;
659
+ width: 100px;
660
+ height: 100px;
661
+ margin: auto;
662
+ }
663
+
664
+ #lazarus-30b {
665
+ background: url(https://i.ibb.co/Zm2Bdzt/lazarus.png);
666
+ background-repeat: no-repeat;
667
+ background-size: 100px 95px;
668
+ color: transparent;
669
+ width: 100px;
670
+ height: 100px;
671
+ margin: auto;
672
+ }
673
+
674
+ #chronos-13b, #chronos-33b {
675
+ background: url(https://i.ibb.co/sQZ3L8j/chronos.png);
676
+ background-repeat: no-repeat;
677
+ background-size: 100px 95px;
678
+ color: transparent;
679
+ width: 100px;
680
+ height: 100px;
681
+ margin: auto;
682
+ }
683
+
684
+ #wizardlm-13b, #wizardlm-30b {
685
+ background: url(https://i.ibb.co/SRXWKz9/WizardLM.png);
686
+ background-repeat: no-repeat;
687
+ background-size: 100px 95px;
688
+ color: transparent;
689
+ width: 100px;
690
+ height: 100px;
691
+ margin: auto;
692
+ }
693
+
694
+ #replit-3b {
695
+ background: url(https://i.ibb.co/BrKCKYq/replit.png);
696
+ background-repeat: no-repeat;
697
+ background-size: 100px 95px;
698
+ color: transparent;
699
+ width: 100px;
700
+ height: 100px;
701
+ margin: auto;
702
+ }
703
+
704
+ #byom {
705
+ background: url(https://i.ibb.co/YhM4B2X/byom.png);
706
+ background-repeat: no-repeat;
707
+ background-size: 100px 95px;
708
+ color: transparent;
709
+ width: 100px;
710
+ height: 100px;
711
+ margin: auto;
712
+ }
713
+
714
+ #chosen-model {
715
+ background: url(https://i.ibb.co/dLmNh2v/chosen.png);
716
+ background-repeat: no-repeat;
717
+ background-size: 100px 95px;
718
+ color: transparent;
719
+ width: 100px;
720
+ height: 100px;
721
+ margin: auto;
722
+ }
723
+
724
+ .sub-container > div {
725
+ min-width: max-content !important;
726
+ }
727
+ """
model_cards.json ADDED
The diff for this file is too large to render. See raw diff