Spaces:
Runtime error
Runtime error
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +10 -0
- __init__.py +0 -0
- app.py +524 -0
- args.py +61 -0
- channels.txt +10 -0
- chats/__init__.py +0 -0
- chats/alpaca.py +108 -0
- chats/alpaca_gpt4.py +101 -0
- chats/alpacoom.py +101 -0
- chats/baize.py +113 -0
- chats/central.py +156 -0
- chats/flan_alpaca.py +101 -0
- chats/guanaco.py +120 -0
- chats/koalpaca.py +101 -0
- chats/mpt.py +118 -0
- chats/os_stablelm.py +112 -0
- chats/post.py +3 -0
- chats/pre.py +97 -0
- chats/redpajama.py +101 -0
- chats/stablelm.py +112 -0
- chats/starchat.py +112 -0
- chats/vicuna.py +109 -0
- configs/constraints_config.yaml +4 -0
- configs/response_configs/baize.yaml +12 -0
- configs/response_configs/camel.yaml +11 -0
- configs/response_configs/default.yaml +9 -0
- configs/response_configs/flan.yaml +9 -0
- configs/response_configs/gpt4_alpaca.yaml +9 -0
- configs/response_configs/guanaco.yaml +9 -0
- configs/response_configs/koalpaca.yaml +11 -0
- configs/response_configs/redpajama.yaml +11 -0
- configs/response_configs/stablelm.yaml +11 -0
- configs/response_configs/stackllama.yaml +10 -0
- configs/response_configs/starchat.yaml +12 -0
- configs/response_configs/t5_vicuna.yaml +9 -0
- configs/summarization_configs/camel.yaml +11 -0
- configs/summarization_configs/default.yaml +11 -0
- configs/summarization_configs/koalpaca.yaml +11 -0
- configs/summarization_configs/redpajama.yaml +11 -0
- configs/summarization_configs/stablelm.yaml +11 -0
- configs/summarization_configs/t5_vicuna.yaml +9 -0
- examples.txt +2 -0
- gens/__init__.py +0 -0
- gens/batch_gen.py +32 -0
- global_vars.py +194 -0
- miscs/__init__.py +0 -0
- miscs/js.py +50 -0
- miscs/strings.py +83 -0
- miscs/styles.py +727 -0
- model_cards.json +0 -0
README.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: LLM As Serve
|
3 |
+
emoji: 🦙🚀
|
4 |
+
sdk: gradio
|
5 |
+
app_file: app.py
|
6 |
+
pinned: true
|
7 |
+
license: apache-2.0
|
8 |
+
colorFrom: yellow
|
9 |
+
colorTo: green
|
10 |
+
---
|
__init__.py
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,524 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import json
|
3 |
+
import re
|
4 |
+
import os
|
5 |
+
from os import listdir
|
6 |
+
from os.path import isfile, join
|
7 |
+
import gradio as gr
|
8 |
+
import args
|
9 |
+
import global_vars
|
10 |
+
from chats import central
|
11 |
+
from transformers import AutoModelForCausalLM
|
12 |
+
from miscs.styles import MODEL_SELECTION_CSS
|
13 |
+
from miscs.js import GET_LOCAL_STORAGE, UPDATE_LEFT_BTNS_STATE
|
14 |
+
from utils import get_chat_interface, get_chat_manager, get_global_context
|
15 |
+
|
16 |
+
ex_file = open("examples.txt", "r")
|
17 |
+
examples = ex_file.read().split("\n")
|
18 |
+
ex_btns = []
|
19 |
+
|
20 |
+
chl_file = open("channels.txt", "r")
|
21 |
+
channels = chl_file.read().split("\n")
|
22 |
+
channel_btns = []
|
23 |
+
|
24 |
+
global_vars.initialize_globals()
|
25 |
+
|
26 |
+
response_configs = [
|
27 |
+
f"configs/response_configs/{f}"
|
28 |
+
for f in listdir("configs/response_configs")
|
29 |
+
if isfile(join("configs/response_configs", f))
|
30 |
+
]
|
31 |
+
|
32 |
+
summarization_configs = [
|
33 |
+
f"configs/summarization_configs/{f}"
|
34 |
+
for f in listdir("configs/summarization_configs")
|
35 |
+
if isfile(join("configs/summarization_configs", f))
|
36 |
+
]
|
37 |
+
|
38 |
+
model_info = json.load(open("model_cards.json"))
|
39 |
+
|
40 |
+
def channel_num(btn_title):
|
41 |
+
choice = 0
|
42 |
+
|
43 |
+
for idx, channel in enumerate(channels):
|
44 |
+
if channel == btn_title:
|
45 |
+
choice = idx
|
46 |
+
|
47 |
+
return choice
|
48 |
+
|
49 |
+
|
50 |
+
def set_chatbot(btn, ld, state):
|
51 |
+
choice = channel_num(btn)
|
52 |
+
|
53 |
+
res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
|
54 |
+
empty = len(res[choice].pingpongs) == 0
|
55 |
+
return (
|
56 |
+
res[choice].build_uis(),
|
57 |
+
choice,
|
58 |
+
gr.update(visible=empty),
|
59 |
+
gr.update(interactive=not empty)
|
60 |
+
)
|
61 |
+
|
62 |
+
|
63 |
+
def set_example(btn):
|
64 |
+
return btn, gr.update(visible=False)
|
65 |
+
|
66 |
+
|
67 |
+
def set_popup_visibility(ld, example_block):
|
68 |
+
return example_block
|
69 |
+
|
70 |
+
|
71 |
+
def move_to_second_view(btn):
|
72 |
+
info = model_info[btn]
|
73 |
+
|
74 |
+
guard_vram = 5 * 1024.
|
75 |
+
vram_req_full = int(info["vram(full)"]) + guard_vram
|
76 |
+
vram_req_8bit = int(info["vram(8bit)"]) + guard_vram
|
77 |
+
vram_req_4bit = int(info["vram(4bit)"]) + guard_vram
|
78 |
+
|
79 |
+
load_mode_list = []
|
80 |
+
|
81 |
+
return (
|
82 |
+
gr.update(visible=False),
|
83 |
+
gr.update(visible=True),
|
84 |
+
info["thumb"],
|
85 |
+
f"## {btn}",
|
86 |
+
f"**Parameters**\n: Approx. {info['parameters']}",
|
87 |
+
f"**🤗 Hub(base)**\n: {info['hub(base)']}",
|
88 |
+
f"**🤗 Hub(LoRA)**\n: {info['hub(ckpt)']}",
|
89 |
+
info['desc'],
|
90 |
+
f"""**Min VRAM requirements** :
|
91 |
+
| half precision | load_in_8bit | load_in_4bit |
|
92 |
+
| ------------------------------------- | ---------------------------------- | ---------------------------------- |
|
93 |
+
| {round(vram_req_full/1024., 1)}GiB | {round(vram_req_8bit/1024., 1)}GiB | {round(vram_req_4bit/1024., 1)}GiB |
|
94 |
+
""",
|
95 |
+
info['default_gen_config'],
|
96 |
+
info['example1'],
|
97 |
+
info['example2'],
|
98 |
+
info['example3'],
|
99 |
+
info['example4'],
|
100 |
+
"",
|
101 |
+
)
|
102 |
+
|
103 |
+
|
104 |
+
def move_to_first_view():
|
105 |
+
return (
|
106 |
+
gr.update(visible=True),
|
107 |
+
gr.update(visible=False),
|
108 |
+
""
|
109 |
+
)
|
110 |
+
|
111 |
+
|
112 |
+
def get_model_num(
|
113 |
+
model_name
|
114 |
+
):
|
115 |
+
model_num = 0
|
116 |
+
re_tag = re.compile(r'<[^>]+>')
|
117 |
+
model_name = re_tag.sub('', model_name).strip()
|
118 |
+
print(model_name)
|
119 |
+
|
120 |
+
for idx, item in enumerate(global_vars.models):
|
121 |
+
if item["model_name"] == model_name:
|
122 |
+
model_num = idx
|
123 |
+
print(idx)
|
124 |
+
break
|
125 |
+
|
126 |
+
return "Download completed!", model_num
|
127 |
+
|
128 |
+
def move_to_third_view(model_num):
|
129 |
+
gen_config = global_vars.models[model_num]["gen_config"]
|
130 |
+
|
131 |
+
return (
|
132 |
+
"Preparation done!",
|
133 |
+
gr.update(visible=False),
|
134 |
+
gr.update(visible=True),
|
135 |
+
gr.update(label=global_vars.models[model_num]["model_type"]),
|
136 |
+
{
|
137 |
+
"ppmanager_type": global_vars.models[model_num]["chat_manager"],
|
138 |
+
"model_type": global_vars.models[model_num]["model_type"],
|
139 |
+
},
|
140 |
+
get_global_context(global_vars.models[model_num]["model_type"]),
|
141 |
+
gen_config.temperature,
|
142 |
+
gen_config.top_p,
|
143 |
+
gen_config.top_k,
|
144 |
+
gen_config.repetition_penalty,
|
145 |
+
gen_config.max_new_tokens,
|
146 |
+
gen_config.num_beams,
|
147 |
+
gen_config.use_cache,
|
148 |
+
gen_config.do_sample,
|
149 |
+
gen_config.eos_token_id,
|
150 |
+
gen_config.pad_token_id,
|
151 |
+
)
|
152 |
+
|
153 |
+
|
154 |
+
def toggle_inspector(view_selector):
|
155 |
+
if view_selector == "with context inspector":
|
156 |
+
return gr.update(visible=True)
|
157 |
+
else:
|
158 |
+
return gr.update(visible=False)
|
159 |
+
|
160 |
+
|
161 |
+
def reset_chat(idx, ld, state):
|
162 |
+
res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
|
163 |
+
res[idx].pingpongs = []
|
164 |
+
|
165 |
+
return (
|
166 |
+
"",
|
167 |
+
[],
|
168 |
+
str(res),
|
169 |
+
gr.update(visible=True),
|
170 |
+
gr.update(interactive=False),
|
171 |
+
)
|
172 |
+
|
173 |
+
def rollback_last(idx, ld, state):
|
174 |
+
res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
|
175 |
+
last_user_message = res[idx].pingpongs[-1].ping
|
176 |
+
res[idx].pingpongs = res[idx].pingpongs[:-1]
|
177 |
+
|
178 |
+
return (
|
179 |
+
last_user_message,
|
180 |
+
res[idx].build_uis(),
|
181 |
+
str(res),
|
182 |
+
gr.update(interactive=False)
|
183 |
+
)
|
184 |
+
|
185 |
+
with gr.Blocks(css=MODEL_SELECTION_CSS, theme='gradio/soft') as demo:
|
186 |
+
with gr.Column() as model_choice_view:
|
187 |
+
gr.Markdown("# Choose a Model", elem_classes=["center"])
|
188 |
+
|
189 |
+
with gr.Row(elem_id="container"):
|
190 |
+
with gr.Column():
|
191 |
+
gr.Markdown("""This application is built and provided for anyone who wants to try out open source Large Language Models for free. All the provided models are pre-downloaded and pre-loaded to maximize your experience. This application is hosted on [jarvislabs.ai](https://jarvislabs.ai/) with 3 x A6000 VM instance. This demo will be hosted until 13/07/2023, but you can run the same application on [jarvislabs.ai](https://jarvislabs.ai/) with arbitrary GPU options of your choice. Also, if you can run the same application on your own environment, be sure to check out the [project repository](https://github.com/deep-diver/LLM-As-Chatbot) for any further information.
|
192 |
+
|
193 |
+
From this page, choose a model that you would like to try out. By selecting a model, you will see more detailed description of the model in a separate page. Also note that this page will appear whenever you refresh your browser tab. """)
|
194 |
+
with gr.Row(elem_classes=["sub-container"]):
|
195 |
+
# with gr.Column(min_width=20):
|
196 |
+
# llama_deus_7b = gr.Button("llama-deus-7b", elem_id="llama-deus-7b", elem_classes=["square"])
|
197 |
+
# gr.Markdown("LLaMA Deus", elem_classes=["center"])
|
198 |
+
|
199 |
+
with gr.Column(min_width=20):
|
200 |
+
baize_7b = gr.Button("baize-7b", elem_id="baize-7b", elem_classes=["square"])
|
201 |
+
gr.Markdown("Baize", elem_classes=["center"])
|
202 |
+
|
203 |
+
# with gr.Column(min_width=20):
|
204 |
+
# koalpaca = gr.Button("koalpaca", elem_id="koalpaca", elem_classes=["square"])
|
205 |
+
# gr.Markdown("koalpaca", elem_classes=["center"])
|
206 |
+
|
207 |
+
# with gr.Column(min_width=20):
|
208 |
+
# evolinstruct_vicuna_13b = gr.Button("evolinstruct-vicuna-13b", elem_id="evolinstruct-vicuna-13b", elem_classes=["square"])
|
209 |
+
# gr.Markdown("EvolInstruct Vicuna", elem_classes=["center"])
|
210 |
+
|
211 |
+
with gr.Column(min_width=20):
|
212 |
+
guanaco_7b = gr.Button("guanaco-7b", elem_id="guanaco-7b", elem_classes=["square"])
|
213 |
+
gr.Markdown("Guanaco", elem_classes=["center"])
|
214 |
+
|
215 |
+
# with gr.Column(min_width=20):
|
216 |
+
# nous_hermes_13b = gr.Button("nous-hermes-13b", elem_id="nous-hermes-13b", elem_classes=["square"])
|
217 |
+
# gr.Markdown("Nous Hermes", elem_classes=["center"])
|
218 |
+
|
219 |
+
progress_view = gr.Textbox(label="Progress")
|
220 |
+
|
221 |
+
with gr.Column(visible=False) as model_review_view:
|
222 |
+
gr.Markdown("# Confirm the chosen model", elem_classes=["center"])
|
223 |
+
|
224 |
+
with gr.Column(elem_id="container2"):
|
225 |
+
gr.Markdown("""The model is pre-downloaded and pre-loaded for your convenience in this demo application, so you don't need to worry about the `VRAM requirements`. It is there just as a reference. Also, proper `GenerationConfig` is selected and fixed, but you can adjust some of the hyper-parameters once you enter the chatting mode.
|
226 |
+
|
227 |
+
Before deciding which model to use, you can expand `Example showcases` to see some of the recorded example pairs of question and answer. It will help you understanding better which model suits you well. Then, click `Confirm` button to enter the chatting mode. If you click `Back` button or refresh the browser tab, the model selection page will appear.
|
228 |
+
""")
|
229 |
+
|
230 |
+
with gr.Row():
|
231 |
+
model_image = gr.Image(None, interactive=False, show_label=False)
|
232 |
+
with gr.Column():
|
233 |
+
model_name = gr.Markdown("**Model name**")
|
234 |
+
model_desc = gr.Markdown("...")
|
235 |
+
model_params = gr.Markdown("Parameters\n: ...")
|
236 |
+
model_base = gr.Markdown("🤗 Hub(base)\n: ...")
|
237 |
+
model_ckpt = gr.Markdown("🤗 Hub(LoRA)\n: ...")
|
238 |
+
model_vram = gr.Markdown(f"""**Minimal VRAM requirement** :
|
239 |
+
| half precision | load_in_8bit | load_in_4bit |
|
240 |
+
| ------------------------------ | ------------------------- | ------------------------- |
|
241 |
+
| {round(7830/1024., 1)}GiB | {round(5224/1024., 1)}GiB | {round(4324/1024., 1)}GiB |
|
242 |
+
""")
|
243 |
+
model_thumbnail_tiny = gr.Textbox("", visible=False)
|
244 |
+
|
245 |
+
with gr.Column():
|
246 |
+
gen_config_path = gr.Dropdown(
|
247 |
+
response_configs,
|
248 |
+
value=response_configs[0],
|
249 |
+
interactive=False,
|
250 |
+
label="Gen Config(response)",
|
251 |
+
)
|
252 |
+
|
253 |
+
with gr.Accordion("Example showcases", open=False):
|
254 |
+
with gr.Tab("Ex1"):
|
255 |
+
example_showcase1 = gr.Chatbot(
|
256 |
+
[("hello", "world"), ("damn", "good")]
|
257 |
+
)
|
258 |
+
with gr.Tab("Ex2"):
|
259 |
+
example_showcase2 = gr.Chatbot(
|
260 |
+
[("hello", "world"), ("damn", "good")]
|
261 |
+
)
|
262 |
+
with gr.Tab("Ex3"):
|
263 |
+
example_showcase3 = gr.Chatbot(
|
264 |
+
[("hello", "world"), ("damn", "good")]
|
265 |
+
)
|
266 |
+
with gr.Tab("Ex4"):
|
267 |
+
example_showcase4 = gr.Chatbot(
|
268 |
+
[("hello", "world"), ("damn", "good")]
|
269 |
+
)
|
270 |
+
|
271 |
+
with gr.Row():
|
272 |
+
back_to_model_choose_btn = gr.Button("Back")
|
273 |
+
confirm_btn = gr.Button("Confirm")
|
274 |
+
|
275 |
+
with gr.Column(elem_classes=["progress-view"]):
|
276 |
+
txt_view = gr.Textbox(label="Status")
|
277 |
+
progress_view2 = gr.Textbox(label="Progress")
|
278 |
+
|
279 |
+
with gr.Column(visible=False) as chat_view:
|
280 |
+
idx = gr.State(0)
|
281 |
+
model_num = gr.State(0)
|
282 |
+
chat_state = gr.State()
|
283 |
+
local_data = gr.JSON({}, visible=False)
|
284 |
+
|
285 |
+
gr.Markdown("# Chatting", elem_classes=["center"])
|
286 |
+
gr.Markdown("""This entire application is built on top of `Gradio`. You can select one of the 10 channels on the left side to start chatting with the model. The model type you chose appear as a label on the top left corner of the chat component as well. Furthermore, you will see which model has responded to your question in each turn with their unique icons. This is because you can go back and forth to select different models from time to time, and you can continue your conversation with different models. With models' icons, you will understand how the conversation has gone better.""")
|
287 |
+
|
288 |
+
with gr.Row():
|
289 |
+
with gr.Column(scale=1, min_width=180):
|
290 |
+
gr.Markdown("GradioChat", elem_id="left-top")
|
291 |
+
|
292 |
+
with gr.Column(elem_id="left-pane"):
|
293 |
+
chat_back_btn = gr.Button("Back", elem_id="chat-back-btn")
|
294 |
+
|
295 |
+
with gr.Accordion("Histories", elem_id="chat-history-accordion"):
|
296 |
+
channel_btns.append(gr.Button(channels[0], elem_classes=["custom-btn-highlight"]))
|
297 |
+
|
298 |
+
for channel in channels[1:]:
|
299 |
+
channel_btns.append(gr.Button(channel, elem_classes=["custom-btn"]))
|
300 |
+
|
301 |
+
with gr.Column(scale=8, elem_id="right-pane"):
|
302 |
+
with gr.Column(
|
303 |
+
elem_id="initial-popup", visible=False
|
304 |
+
) as example_block:
|
305 |
+
with gr.Row(scale=1):
|
306 |
+
with gr.Column(elem_id="initial-popup-left-pane"):
|
307 |
+
gr.Markdown("GradioChat", elem_id="initial-popup-title")
|
308 |
+
gr.Markdown(
|
309 |
+
"Making the community's best AI chat models available to everyone."
|
310 |
+
)
|
311 |
+
with gr.Column(elem_id="initial-popup-right-pane"):
|
312 |
+
gr.Markdown(
|
313 |
+
"Chat UI is now open sourced on Hugging Face Hub"
|
314 |
+
)
|
315 |
+
gr.Markdown(
|
316 |
+
"check out the [↗ repository](https://huggingface.co/spaces/chansung/test-multi-conv)"
|
317 |
+
)
|
318 |
+
|
319 |
+
with gr.Column(scale=1):
|
320 |
+
gr.Markdown("Examples")
|
321 |
+
with gr.Row():
|
322 |
+
for example in examples:
|
323 |
+
ex_btns.append(gr.Button(example, elem_classes=["example-btn"]))
|
324 |
+
|
325 |
+
with gr.Column(elem_id="aux-btns-popup", visible=True):
|
326 |
+
with gr.Row():
|
327 |
+
stop = gr.Button("Stop", elem_classes=["aux-btn"], interactive=False)
|
328 |
+
regenerate = gr.Button("Rege", interactive=False, elem_classes=["aux-btn"])
|
329 |
+
clean = gr.Button("Clean", elem_classes=["aux-btn"])
|
330 |
+
|
331 |
+
with gr.Accordion("Context Inspector", elem_id="aux-viewer", open=False):
|
332 |
+
context_inspector = gr.Textbox(
|
333 |
+
"",
|
334 |
+
elem_id="aux-viewer-inspector",
|
335 |
+
label="",
|
336 |
+
lines=30,
|
337 |
+
max_lines=50,
|
338 |
+
)
|
339 |
+
|
340 |
+
chatbot = gr.Chatbot(elem_id='chatbot')
|
341 |
+
instruction_txtbox = gr.Textbox(
|
342 |
+
placeholder="Ask anything", label="",
|
343 |
+
elem_id="prompt-txt"
|
344 |
+
)
|
345 |
+
|
346 |
+
with gr.Accordion("Control Panel", open=False) as control_panel:
|
347 |
+
with gr.Column():
|
348 |
+
with gr.Column():
|
349 |
+
gr.Markdown("#### Global context")
|
350 |
+
with gr.Accordion("global context will persist during conversation, and it is placed at the top of the prompt", open=False):
|
351 |
+
global_context = gr.Textbox(
|
352 |
+
"global context",
|
353 |
+
lines=5,
|
354 |
+
max_lines=10,
|
355 |
+
interactive=True,
|
356 |
+
elem_id="global-context"
|
357 |
+
)
|
358 |
+
|
359 |
+
gr.Markdown("#### GenConfig for **response** text generation")
|
360 |
+
with gr.Row():
|
361 |
+
res_temp = gr.Slider(0.0, 2.0, 0, step=0.1, label="temp", interactive=True)
|
362 |
+
res_topp = gr.Slider(0.0, 2.0, 0, step=0.1, label="top_p", interactive=True)
|
363 |
+
res_topk = gr.Slider(20, 1000, 0, step=1, label="top_k", interactive=True)
|
364 |
+
res_rpen = gr.Slider(0.0, 2.0, 0, step=0.1, label="rep_penalty", interactive=True)
|
365 |
+
res_mnts = gr.Slider(64, 2048, 0, step=1, label="new_tokens", interactive=True)
|
366 |
+
res_beams = gr.Slider(1, 4, 0, step=1, label="beams")
|
367 |
+
res_cache = gr.Radio([True, False], value=0, label="cache", interactive=True)
|
368 |
+
res_sample = gr.Radio([True, False], value=0, label="sample", interactive=True)
|
369 |
+
res_eosid = gr.Number(value=0, visible=False, precision=0)
|
370 |
+
res_padid = gr.Number(value=0, visible=False, precision=0)
|
371 |
+
|
372 |
+
with gr.Column(visible=False):
|
373 |
+
gr.Markdown("#### GenConfig for **summary** text generation")
|
374 |
+
with gr.Row():
|
375 |
+
sum_temp = gr.Slider(0.0, 2.0, 0, step=0.1, label="temp", interactive=True)
|
376 |
+
sum_topp = gr.Slider(0.0, 2.0, 0, step=0.1, label="top_p", interactive=True)
|
377 |
+
sum_topk = gr.Slider(20, 1000, 0, step=1, label="top_k", interactive=True)
|
378 |
+
sum_rpen = gr.Slider(0.0, 2.0, 0, step=0.1, label="rep_penalty", interactive=True)
|
379 |
+
sum_mnts = gr.Slider(64, 2048, 0, step=1, label="new_tokens", interactive=True)
|
380 |
+
sum_beams = gr.Slider(1, 8, 0, step=1, label="beams", interactive=True)
|
381 |
+
sum_cache = gr.Radio([True, False], value=0, label="cache", interactive=True)
|
382 |
+
sum_sample = gr.Radio([True, False], value=0, label="sample", interactive=True)
|
383 |
+
sum_eosid = gr.Number(value=0, visible=False, precision=0)
|
384 |
+
sum_padid = gr.Number(value=0, visible=False, precision=0)
|
385 |
+
|
386 |
+
with gr.Column():
|
387 |
+
gr.Markdown("#### Context managements")
|
388 |
+
with gr.Row():
|
389 |
+
ctx_num_lconv = gr.Slider(2, 10, 3, step=1, label="number of recent talks to keep", interactive=True)
|
390 |
+
ctx_sum_prompt = gr.Textbox(
|
391 |
+
"summarize our conversations. what have we discussed about so far?",
|
392 |
+
label="design a prompt to summarize the conversations",
|
393 |
+
visible=False
|
394 |
+
)
|
395 |
+
|
396 |
+
gr.Markdown("""The control panel on the bottom side allows you to adjust three major hyper-parameters. First, you can set the global context of the conversation. Appropriate global context that is recommended by each model's authors is provided by default, but you can set it as you like. Second, you can adjust some of the hyper-parameters of the `GenerationConfig` to decide how you want the model to generate text. `Temperature`, `Top K`, and `New Max Tokens` are some of the available ones. Third, you can adjust the number of recent talks to keep track of. With bigger number, the model will see more of the past conversations.
|
397 |
+
|
398 |
+
Lastly, there is a hidden panel on the top right corner, and it will appear when you hover your mouse around it. When expanding the panel, it shows what the model actually sees. That is you can double check how the entire prompt is constructed and fed into the model at each conversation.
|
399 |
+
""")
|
400 |
+
|
401 |
+
btns = [
|
402 |
+
baize_7b, guanaco_7b #nous_hermes_13b, evolinstruct_vicuna_13b, guanaco_13b
|
403 |
+
# baize_7b, evolinstruct_vicuna_13b, guanaco_13b, nous_hermes_13b
|
404 |
+
# llama_deus_7b, koalpaca, evolinstruct_vicuna_13b, baize_7b, guanaco_33b,
|
405 |
+
]
|
406 |
+
for btn in btns:
|
407 |
+
btn.click(
|
408 |
+
move_to_second_view,
|
409 |
+
btn,
|
410 |
+
[
|
411 |
+
model_choice_view, model_review_view,
|
412 |
+
model_image, model_name, model_params, model_base, model_ckpt,
|
413 |
+
model_desc, model_vram, gen_config_path,
|
414 |
+
example_showcase1, example_showcase2, example_showcase3, example_showcase4,
|
415 |
+
progress_view
|
416 |
+
]
|
417 |
+
)
|
418 |
+
|
419 |
+
back_to_model_choose_btn.click(
|
420 |
+
move_to_first_view,
|
421 |
+
None,
|
422 |
+
[model_choice_view, model_review_view, progress_view2]
|
423 |
+
)
|
424 |
+
|
425 |
+
confirm_btn.click(
|
426 |
+
get_model_num,
|
427 |
+
[model_name],
|
428 |
+
[progress_view2, model_num]
|
429 |
+
).then(
|
430 |
+
move_to_third_view,
|
431 |
+
model_num,
|
432 |
+
[progress_view2, model_review_view, chat_view, chatbot, chat_state, global_context,
|
433 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid]
|
434 |
+
)
|
435 |
+
|
436 |
+
for btn in channel_btns:
|
437 |
+
btn.click(
|
438 |
+
set_chatbot,
|
439 |
+
[btn, local_data, chat_state],
|
440 |
+
[chatbot, idx, example_block, regenerate]
|
441 |
+
).then(
|
442 |
+
None, btn, None,
|
443 |
+
_js=UPDATE_LEFT_BTNS_STATE
|
444 |
+
)
|
445 |
+
|
446 |
+
for btn in ex_btns:
|
447 |
+
btn.click(
|
448 |
+
set_example,
|
449 |
+
[btn],
|
450 |
+
[instruction_txtbox, example_block]
|
451 |
+
)
|
452 |
+
|
453 |
+
instruction_txtbox.submit(
|
454 |
+
lambda: [
|
455 |
+
gr.update(visible=False),
|
456 |
+
gr.update(interactive=True)
|
457 |
+
],
|
458 |
+
None,
|
459 |
+
[example_block, regenerate]
|
460 |
+
).then(
|
461 |
+
central.chat_stream,
|
462 |
+
[idx, local_data, instruction_txtbox, chat_state, model_num,
|
463 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
464 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid],
|
465 |
+
[instruction_txtbox, chatbot, context_inspector, local_data],
|
466 |
+
).then(
|
467 |
+
None, local_data, None,
|
468 |
+
_js="(v)=>{ setStorage('local_data',v) }"
|
469 |
+
)
|
470 |
+
|
471 |
+
regenerate.click(
|
472 |
+
rollback_last,
|
473 |
+
[idx, local_data, chat_state],
|
474 |
+
[instruction_txtbox, chatbot, local_data, regenerate]
|
475 |
+
).then(
|
476 |
+
central.chat_stream,
|
477 |
+
[idx, local_data, instruction_txtbox, chat_state, model_num,
|
478 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
479 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid],
|
480 |
+
[instruction_txtbox, chatbot, context_inspector, local_data],
|
481 |
+
).then(
|
482 |
+
lambda: gr.update(interactive=True),
|
483 |
+
None,
|
484 |
+
regenerate
|
485 |
+
).then(
|
486 |
+
None, local_data, None,
|
487 |
+
_js="(v)=>{ setStorage('local_data',v) }"
|
488 |
+
)
|
489 |
+
|
490 |
+
# stop.click(
|
491 |
+
# None, None, None,
|
492 |
+
# cancels=[send_event]
|
493 |
+
# )
|
494 |
+
|
495 |
+
clean.click(
|
496 |
+
reset_chat,
|
497 |
+
[idx, local_data, chat_state],
|
498 |
+
[instruction_txtbox, chatbot, local_data, example_block, regenerate]
|
499 |
+
).then(
|
500 |
+
None, local_data, None,
|
501 |
+
_js="(v)=>{ setStorage('local_data',v) }"
|
502 |
+
)
|
503 |
+
|
504 |
+
chat_back_btn.click(
|
505 |
+
lambda: [gr.update(visible=False), gr.update(visible=True)],
|
506 |
+
None,
|
507 |
+
[chat_view, model_choice_view]
|
508 |
+
)
|
509 |
+
|
510 |
+
demo.load(
|
511 |
+
None,
|
512 |
+
inputs=None,
|
513 |
+
outputs=[chatbot, local_data],
|
514 |
+
_js=GET_LOCAL_STORAGE,
|
515 |
+
)
|
516 |
+
|
517 |
+
demo.queue(
|
518 |
+
concurrency_count=5,
|
519 |
+
max_size=256,
|
520 |
+
).launch(
|
521 |
+
server_port=6006,
|
522 |
+
server_name="0.0.0.0",
|
523 |
+
debug=True,
|
524 |
+
)
|
args.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
def parse_args():
|
4 |
+
parser = argparse.ArgumentParser(
|
5 |
+
description="Gradio Application for LLM as a chatbot service"
|
6 |
+
)
|
7 |
+
parser.add_argument(
|
8 |
+
"--base-url",
|
9 |
+
help="Hugging Face Hub URL",
|
10 |
+
default="elinas/llama-7b-hf-transformers-4.29",
|
11 |
+
type=str,
|
12 |
+
)
|
13 |
+
parser.add_argument(
|
14 |
+
"--ft-ckpt-url",
|
15 |
+
help="Hugging Face Hub URL",
|
16 |
+
# default="tloen/alpaca-lora-7b",
|
17 |
+
default="LLMs/Alpaca-LoRA-7B-elina",
|
18 |
+
type=str,
|
19 |
+
)
|
20 |
+
parser.add_argument(
|
21 |
+
"--port",
|
22 |
+
help="PORT number where the app is served",
|
23 |
+
default=6006,
|
24 |
+
type=int,
|
25 |
+
)
|
26 |
+
parser.add_argument(
|
27 |
+
"--share",
|
28 |
+
help="Create and share temporary endpoint (useful in Colab env)",
|
29 |
+
action='store_true'
|
30 |
+
)
|
31 |
+
parser.add_argument(
|
32 |
+
"--gen-config-path",
|
33 |
+
help="path to GenerationConfig file",
|
34 |
+
default="configs/response_configs/default.yaml",
|
35 |
+
# default="configs/gen_config_koalpaca.yaml",
|
36 |
+
# default="configs/gen_config_stablelm.yaml",
|
37 |
+
type=str
|
38 |
+
)
|
39 |
+
parser.add_argument(
|
40 |
+
"--gen-config-summarization-path",
|
41 |
+
help="path to GenerationConfig file used in context summarization",
|
42 |
+
default="configs/summarization_configs/default.yaml",
|
43 |
+
type=str
|
44 |
+
)
|
45 |
+
parser.add_argument(
|
46 |
+
"--multi-gpu",
|
47 |
+
help="Enable multi gpu mode. This will force not to use Int8 but float16, so you need to check if your system has enough GPU memory",
|
48 |
+
action='store_true'
|
49 |
+
)
|
50 |
+
parser.add_argument(
|
51 |
+
"--force-download_ckpt",
|
52 |
+
help="Force to download ckpt instead of using cached one",
|
53 |
+
action="store_true"
|
54 |
+
)
|
55 |
+
parser.add_argument(
|
56 |
+
"--chat-only-mode",
|
57 |
+
help="Only show chatting window. Otherwise, other components will be appeared for more sophisticated control",
|
58 |
+
action="store_true"
|
59 |
+
)
|
60 |
+
|
61 |
+
return parser.parse_args()
|
channels.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1st Channel
|
2 |
+
2nd Channel
|
3 |
+
3rd Channel
|
4 |
+
4th Channel
|
5 |
+
5th Channel
|
6 |
+
6th Channel
|
7 |
+
7th Channel
|
8 |
+
8th Channel
|
9 |
+
9th Channel
|
10 |
+
10th Channel
|
chats/__init__.py
ADDED
File without changes
|
chats/alpaca.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
|
28 |
+
count = 0
|
29 |
+
|
30 |
+
for new_text in streamer:
|
31 |
+
if count == 0:
|
32 |
+
ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
|
33 |
+
count = count + 1
|
34 |
+
|
35 |
+
ppmanager.append_pong(new_text)
|
36 |
+
yield ppmanager, ppmanager.build_uis()
|
37 |
+
|
38 |
+
yield ppmanager, ppmanager.build_uis()
|
39 |
+
|
40 |
+
def summarize(
|
41 |
+
ppmanager, prompt_to_summarize, win_size,
|
42 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
43 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
44 |
+
):
|
45 |
+
ctx = ppmanager.ctx
|
46 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
47 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
48 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
49 |
+
|
50 |
+
_, gen_config_summarization = pre.build_gen_config(
|
51 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
52 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
53 |
+
)
|
54 |
+
summarize_output = get_output_batch(
|
55 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
56 |
+
)[0].split("### Response:")[-1].strip()
|
57 |
+
ppmanager.ctx = summarize_output
|
58 |
+
ppmanager.pop_pingpong()
|
59 |
+
return ppmanager
|
60 |
+
|
61 |
+
def chat_stream(
|
62 |
+
idx, local_data, user_message, state, model_num,
|
63 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
64 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
65 |
+
):
|
66 |
+
res = [
|
67 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
68 |
+
for ppm in local_data
|
69 |
+
]
|
70 |
+
|
71 |
+
ppm = res[idx]
|
72 |
+
|
73 |
+
# add_ping returns a prompt structured in Alpaca form
|
74 |
+
ppm.add_pingpong(
|
75 |
+
PingPong(user_message, "")
|
76 |
+
)
|
77 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
78 |
+
|
79 |
+
# prepare text generating streamer & start generating
|
80 |
+
gen_kwargs, streamer = pre.build(
|
81 |
+
prompt, model_num,
|
82 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
83 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
84 |
+
return_token_type_ids=False
|
85 |
+
)
|
86 |
+
pre.start_gen(gen_kwargs, model_num)
|
87 |
+
|
88 |
+
model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
|
89 |
+
model_type = global_vars.models[model_num]["model_type"]
|
90 |
+
for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
|
91 |
+
yield "", uis, prompt, str(res)
|
92 |
+
|
93 |
+
ppm = post.strip_pong(ppm)
|
94 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
95 |
+
|
96 |
+
# summarization
|
97 |
+
# ppm.add_pingpong(
|
98 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
99 |
+
# )
|
100 |
+
# yield "", ppm.build_uis(), prompt, state
|
101 |
+
# ppm.pop_pingpong()
|
102 |
+
|
103 |
+
# ppm = summarize(
|
104 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
105 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
106 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
107 |
+
# )
|
108 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/alpaca_gpt4.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer):
|
28 |
+
for new_text in streamer:
|
29 |
+
ppmanager.append_pong(new_text)
|
30 |
+
yield ppmanager, ppmanager.build_uis()
|
31 |
+
|
32 |
+
yield ppmanager, ppmanager.build_uis()
|
33 |
+
|
34 |
+
def summarize(
|
35 |
+
ppmanager, prompt_to_summarize, win_size,
|
36 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
37 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
38 |
+
):
|
39 |
+
ctx = ppmanager.ctx
|
40 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
41 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
42 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
43 |
+
|
44 |
+
_, gen_config_summarization = pre.build_gen_config(
|
45 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
46 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
47 |
+
)
|
48 |
+
summarize_output = get_output_batch(
|
49 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
50 |
+
)[0].split("### Response:")[-1].strip()
|
51 |
+
ppmanager.ctx = summarize_output
|
52 |
+
ppmanager.pop_pingpong()
|
53 |
+
return ppmanager
|
54 |
+
|
55 |
+
def chat_stream(
|
56 |
+
idx, local_data, user_message, state, model_num,
|
57 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
58 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
59 |
+
):
|
60 |
+
res = [
|
61 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
62 |
+
for ppm in local_data
|
63 |
+
]
|
64 |
+
|
65 |
+
ppm = res[idx]
|
66 |
+
|
67 |
+
# add_ping returns a prompt structured in Alpaca form
|
68 |
+
ppm.add_pingpong(
|
69 |
+
PingPong(user_message, "")
|
70 |
+
)
|
71 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
72 |
+
|
73 |
+
# prepare text generating streamer & start generating
|
74 |
+
gen_kwargs, streamer = pre.build(
|
75 |
+
prompt,
|
76 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
77 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
78 |
+
return_token_type_ids=False
|
79 |
+
)
|
80 |
+
pre.start_gen(gen_kwargs)
|
81 |
+
|
82 |
+
# handling stream
|
83 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
84 |
+
yield "", uis, prompt, str(res)
|
85 |
+
|
86 |
+
ppm = post.strip_pong(ppm)
|
87 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
88 |
+
|
89 |
+
# summarization
|
90 |
+
# ppm.add_pingpong(
|
91 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
92 |
+
# )
|
93 |
+
# yield "", ppm.build_uis(), prompt, state
|
94 |
+
# ppm.pop_pingpong()
|
95 |
+
|
96 |
+
# ppm = summarize(
|
97 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
98 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
99 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
100 |
+
# )
|
101 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/alpacoom.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer):
|
28 |
+
for new_text in streamer:
|
29 |
+
ppmanager.append_pong(new_text)
|
30 |
+
yield ppmanager, ppmanager.build_uis()
|
31 |
+
|
32 |
+
yield ppmanager, ppmanager.build_uis()
|
33 |
+
|
34 |
+
def summarize(
|
35 |
+
ppmanager, prompt_to_summarize, win_size,
|
36 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
37 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
38 |
+
):
|
39 |
+
ctx = ppmanager.ctx
|
40 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
41 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
42 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
43 |
+
|
44 |
+
_, gen_config_summarization = pre.build_gen_config(
|
45 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
46 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
47 |
+
)
|
48 |
+
summarize_output = get_output_batch(
|
49 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
50 |
+
)[0].split("### Response:")[-1].strip()
|
51 |
+
ppmanager.ctx = summarize_output
|
52 |
+
ppmanager.pop_pingpong()
|
53 |
+
return ppmanager
|
54 |
+
|
55 |
+
def chat_stream(
|
56 |
+
idx, local_data, user_message, state, model_num,
|
57 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
58 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
59 |
+
):
|
60 |
+
res = [
|
61 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
62 |
+
for ppm in local_data
|
63 |
+
]
|
64 |
+
|
65 |
+
ppm = res[idx]
|
66 |
+
|
67 |
+
# add_ping returns a prompt structured in Alpaca form
|
68 |
+
ppm.add_pingpong(
|
69 |
+
PingPong(user_message, "")
|
70 |
+
)
|
71 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
72 |
+
|
73 |
+
# prepare text generating streamer & start generating
|
74 |
+
gen_kwargs, streamer = pre.build(
|
75 |
+
prompt,
|
76 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
77 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
78 |
+
return_token_type_ids=False
|
79 |
+
)
|
80 |
+
pre.start_gen(gen_kwargs)
|
81 |
+
|
82 |
+
# handling stream
|
83 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
84 |
+
yield "", uis, prompt, str(res)
|
85 |
+
|
86 |
+
ppm = post.strip_pong(ppm)
|
87 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
88 |
+
|
89 |
+
# summarization
|
90 |
+
# ppm.add_pingpong(
|
91 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
92 |
+
# )
|
93 |
+
# yield "", ppm.build_uis(), prompt, state
|
94 |
+
# ppm.pop_pingpong()
|
95 |
+
|
96 |
+
# ppm = summarize(
|
97 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
98 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
99 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
100 |
+
# )
|
101 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/baize.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
|
28 |
+
count = 0
|
29 |
+
|
30 |
+
for new_text in streamer:
|
31 |
+
if "[|Human|]" in new_text or \
|
32 |
+
"[|AI|]" in new_text:
|
33 |
+
break
|
34 |
+
|
35 |
+
if count == 0:
|
36 |
+
ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
|
37 |
+
count = count + 1
|
38 |
+
|
39 |
+
ppmanager.append_pong(new_text)
|
40 |
+
yield ppmanager, ppmanager.build_uis()
|
41 |
+
|
42 |
+
yield ppmanager, ppmanager.build_uis()
|
43 |
+
|
44 |
+
def summarize(
|
45 |
+
ppmanager, prompt_to_summarize, win_size,
|
46 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
47 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
48 |
+
):
|
49 |
+
ctx = ppmanager.ctx
|
50 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
51 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
52 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
53 |
+
|
54 |
+
_, gen_config_summarization = pre.build_gen_config(
|
55 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
56 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
57 |
+
)
|
58 |
+
summarize_output = get_output_batch(
|
59 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
60 |
+
)[0].split("### Response:")[-1].strip()
|
61 |
+
ppmanager.ctx = summarize_output
|
62 |
+
ppmanager.pop_pingpong()
|
63 |
+
return ppmanager
|
64 |
+
|
65 |
+
def chat_stream(
|
66 |
+
idx, local_data, user_message, state, model_num,
|
67 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
68 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
69 |
+
):
|
70 |
+
res = [
|
71 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
72 |
+
for ppm in local_data
|
73 |
+
]
|
74 |
+
|
75 |
+
ppm = res[idx]
|
76 |
+
|
77 |
+
# add_ping returns a prompt structured in Alpaca form
|
78 |
+
ppm.add_pingpong(
|
79 |
+
PingPong(user_message, "")
|
80 |
+
)
|
81 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
82 |
+
|
83 |
+
# prepare text generating streamer & start generating
|
84 |
+
gen_kwargs, streamer = pre.build(
|
85 |
+
prompt, model_num,
|
86 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
87 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
88 |
+
return_token_type_ids=False
|
89 |
+
)
|
90 |
+
pre.start_gen(gen_kwargs, model_num)
|
91 |
+
|
92 |
+
# handling stream
|
93 |
+
model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
|
94 |
+
model_type = global_vars.models[model_num]["model_type"]
|
95 |
+
for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
|
96 |
+
yield "", uis, prompt, str(res)
|
97 |
+
|
98 |
+
ppm = post.strip_pong(ppm)
|
99 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
100 |
+
|
101 |
+
# summarization
|
102 |
+
# ppm.add_pingpong(
|
103 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
104 |
+
# )
|
105 |
+
# yield "", ppm.build_uis(), prompt, state
|
106 |
+
# ppm.pop_pingpong()
|
107 |
+
|
108 |
+
# ppm = summarize(
|
109 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
110 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
111 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
112 |
+
# )
|
113 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/central.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from chats import stablelm
|
2 |
+
from chats import alpaca
|
3 |
+
from chats import koalpaca
|
4 |
+
from chats import flan_alpaca
|
5 |
+
from chats import os_stablelm
|
6 |
+
from chats import vicuna
|
7 |
+
from chats import starchat
|
8 |
+
from chats import redpajama
|
9 |
+
from chats import mpt
|
10 |
+
from chats import alpacoom
|
11 |
+
from chats import baize
|
12 |
+
from chats import guanaco
|
13 |
+
|
14 |
+
def chat_stream(
|
15 |
+
idx, local_data, user_message, state, model_num,
|
16 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
17 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
18 |
+
):
|
19 |
+
model_type = state["model_type"]
|
20 |
+
|
21 |
+
if model_type == "stablelm":
|
22 |
+
cs = stablelm.chat_stream(
|
23 |
+
idx, local_data, user_message, state, model_num,
|
24 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
25 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
26 |
+
)
|
27 |
+
|
28 |
+
elif model_type == "baize":
|
29 |
+
cs = baize.chat_stream(
|
30 |
+
idx, local_data, user_message, state, model_num,
|
31 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
32 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
33 |
+
)
|
34 |
+
|
35 |
+
elif model_type == "alpaca":
|
36 |
+
cs = alpaca.chat_stream(
|
37 |
+
idx, local_data, user_message, state, model_num,
|
38 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
39 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
40 |
+
)
|
41 |
+
|
42 |
+
elif model_type == "alpaca-gpt4":
|
43 |
+
cs = alpaca.chat_stream(
|
44 |
+
idx, local_data, user_message, state, model_num,
|
45 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
46 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
47 |
+
)
|
48 |
+
|
49 |
+
elif model_type == "alpacoom":
|
50 |
+
cs = alpacoom.chat_stream(
|
51 |
+
idx, local_data, user_message, state, model_num,
|
52 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
53 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
54 |
+
)
|
55 |
+
|
56 |
+
elif model_type == "llama-deus":
|
57 |
+
cs = alpaca.chat_stream(
|
58 |
+
idx, local_data, user_message, state, model_num,
|
59 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
60 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
61 |
+
)
|
62 |
+
|
63 |
+
elif model_type == "camel":
|
64 |
+
cs = alpaca.chat_stream(
|
65 |
+
idx, local_data, user_message, state, model_num,
|
66 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
67 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
68 |
+
)
|
69 |
+
|
70 |
+
elif model_type == "koalpaca-polyglot":
|
71 |
+
cs = koalpaca.chat_stream(
|
72 |
+
idx, local_data, user_message, state, model_num,
|
73 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
74 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
75 |
+
)
|
76 |
+
|
77 |
+
elif model_type == "flan-alpaca":
|
78 |
+
cs = flan_alpaca.chat_stream(
|
79 |
+
idx, local_data, user_message, state, model_num,
|
80 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
81 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
82 |
+
)
|
83 |
+
|
84 |
+
elif model_type == "os-stablelm":
|
85 |
+
cs = os_stablelm.chat_stream(
|
86 |
+
idx, local_data, user_message, state, model_num,
|
87 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
88 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
89 |
+
)
|
90 |
+
|
91 |
+
elif model_type == "t5-vicuna":
|
92 |
+
cs = vicuna.chat_stream(
|
93 |
+
idx, local_data, user_message, state, model_num,
|
94 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
95 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
96 |
+
)
|
97 |
+
|
98 |
+
elif model_type == "stable-vicuna":
|
99 |
+
cs = vicuna.chat_stream(
|
100 |
+
idx, local_data, user_message, state, model_num,
|
101 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
102 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
103 |
+
)
|
104 |
+
|
105 |
+
elif model_type == "vicuna":
|
106 |
+
cs = vicuna.chat_stream(
|
107 |
+
idx, local_data, user_message, state, model_num,
|
108 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
109 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
110 |
+
)
|
111 |
+
|
112 |
+
elif model_type == "evolinstruct-vicuna":
|
113 |
+
cs = vicuna.chat_stream(
|
114 |
+
idx, local_data, user_message, state, model_num,
|
115 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
116 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
117 |
+
)
|
118 |
+
|
119 |
+
elif model_type == "starchat":
|
120 |
+
cs = starchat.chat_stream(
|
121 |
+
idx, local_data, user_message, state, model_num,
|
122 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
123 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
124 |
+
)
|
125 |
+
|
126 |
+
elif model_type == "mpt":
|
127 |
+
cs = mpt.chat_stream(
|
128 |
+
idx, local_data, user_message, state, model_num,
|
129 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
130 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
131 |
+
)
|
132 |
+
|
133 |
+
elif model_type == "redpajama":
|
134 |
+
cs = redpajama.chat_stream(
|
135 |
+
idx, local_data, user_message, state, model_num,
|
136 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
137 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
138 |
+
)
|
139 |
+
|
140 |
+
elif model_type == "guanaco":
|
141 |
+
cs = guanaco.chat_stream(
|
142 |
+
idx, local_data, user_message, state, model_num,
|
143 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
144 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
145 |
+
)
|
146 |
+
|
147 |
+
elif model_type == "nous-hermes":
|
148 |
+
cs = alpaca.chat_stream(
|
149 |
+
idx, local_data, user_message, state, model_num,
|
150 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
151 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
152 |
+
)
|
153 |
+
|
154 |
+
for idx, x in enumerate(cs):
|
155 |
+
yield x
|
156 |
+
|
chats/flan_alpaca.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer):
|
28 |
+
for new_text in streamer:
|
29 |
+
ppmanager.append_pong(new_text)
|
30 |
+
yield ppmanager, ppmanager.build_uis()
|
31 |
+
|
32 |
+
yield ppmanager, ppmanager.build_uis()
|
33 |
+
|
34 |
+
def summarize(
|
35 |
+
ppmanager, prompt_to_summarize, win_size,
|
36 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
37 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
38 |
+
):
|
39 |
+
ctx = ppmanager.ctx
|
40 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
41 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
42 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
43 |
+
|
44 |
+
_, gen_config_summarization = pre.build_gen_config(
|
45 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
46 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
47 |
+
)
|
48 |
+
summarize_output = get_output_batch(
|
49 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
50 |
+
)[0].split("-----")[-1].strip()
|
51 |
+
ppmanager.ctx = summarize_output
|
52 |
+
ppmanager.pop_pingpong()
|
53 |
+
return ppmanager
|
54 |
+
|
55 |
+
def chat_stream(
|
56 |
+
idx, local_data, user_message, state, model_num,
|
57 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
58 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
59 |
+
):
|
60 |
+
res = [
|
61 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
62 |
+
for ppm in local_data
|
63 |
+
]
|
64 |
+
|
65 |
+
ppm = res[idx]
|
66 |
+
|
67 |
+
# add_ping returns a prompt structured in Alpaca form
|
68 |
+
ppm.add_pingpong(
|
69 |
+
PingPong(user_message, "")
|
70 |
+
)
|
71 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
72 |
+
|
73 |
+
# prepare text generating streamer & start generating
|
74 |
+
gen_kwargs, streamer = pre.build(
|
75 |
+
prompt,
|
76 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
77 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
78 |
+
return_token_type_ids=False
|
79 |
+
)
|
80 |
+
pre.start_gen(gen_kwargs)
|
81 |
+
|
82 |
+
# handling stream
|
83 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
84 |
+
yield "", uis, prompt, str(res)
|
85 |
+
|
86 |
+
ppm = post.strip_pong(ppm)
|
87 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
88 |
+
|
89 |
+
# summarization
|
90 |
+
# ppm.add_pingpong(
|
91 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
92 |
+
# )
|
93 |
+
# yield "", ppm.build_uis(), prompt, state
|
94 |
+
# ppm.pop_pingpong()
|
95 |
+
|
96 |
+
# ppm = summarize(
|
97 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
98 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
99 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
100 |
+
# )
|
101 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/guanaco.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import StoppingCriteria, StoppingCriteriaList
|
3 |
+
|
4 |
+
import copy
|
5 |
+
import json
|
6 |
+
import global_vars
|
7 |
+
from chats import pre, post
|
8 |
+
from pingpong import PingPong
|
9 |
+
from gens.batch_gen import get_output_batch
|
10 |
+
|
11 |
+
from pingpong.context import CtxLastWindowStrategy
|
12 |
+
|
13 |
+
class StopOnTokens(StoppingCriteria):
|
14 |
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
15 |
+
stop_token_ids = [0]
|
16 |
+
|
17 |
+
for stop_id in stop_token_ids:
|
18 |
+
if input_ids[0][-1] == stop_id:
|
19 |
+
return True
|
20 |
+
return False
|
21 |
+
|
22 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
23 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
24 |
+
|
25 |
+
dummy_ppm.ctx = global_context
|
26 |
+
for pingpong in dummy_ppm.pingpongs:
|
27 |
+
pong = pingpong.pong
|
28 |
+
first_sentence = pong.split("\n")[0]
|
29 |
+
if first_sentence != "" and \
|
30 |
+
pre.contains_image_markdown(first_sentence):
|
31 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
32 |
+
pingpong.pong = pong
|
33 |
+
|
34 |
+
lws = CtxLastWindowStrategy(win_size)
|
35 |
+
|
36 |
+
prompt = lws(dummy_ppm)
|
37 |
+
return prompt
|
38 |
+
|
39 |
+
def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
|
40 |
+
count = 0
|
41 |
+
|
42 |
+
for new_text in streamer:
|
43 |
+
if count == 0:
|
44 |
+
ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
|
45 |
+
count = count + 1
|
46 |
+
|
47 |
+
ppmanager.append_pong(new_text)
|
48 |
+
yield ppmanager, ppmanager.build_uis()
|
49 |
+
|
50 |
+
yield ppmanager, ppmanager.build_uis()
|
51 |
+
|
52 |
+
def summarize(
|
53 |
+
ppmanager, prompt_to_summarize, win_size,
|
54 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
55 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
56 |
+
):
|
57 |
+
ctx = ppmanager.ctx
|
58 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
59 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
60 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
61 |
+
|
62 |
+
_, gen_config_summarization = pre.build_gen_config(
|
63 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
64 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
65 |
+
)
|
66 |
+
summarize_output = get_output_batch(
|
67 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
68 |
+
)[0].split(prompt_to_summarize)[-1].strip()
|
69 |
+
ppmanager.ctx = summarize_output
|
70 |
+
ppmanager.pop_pingpong()
|
71 |
+
return ppmanager
|
72 |
+
|
73 |
+
def chat_stream(
|
74 |
+
idx, local_data, user_message, state, model_num,
|
75 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
76 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
77 |
+
):
|
78 |
+
res = [
|
79 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
80 |
+
for ppm in local_data
|
81 |
+
]
|
82 |
+
|
83 |
+
ppm = res[idx]
|
84 |
+
|
85 |
+
# add_ping returns a prompt structured in Alpaca form
|
86 |
+
ppm.add_pingpong(
|
87 |
+
PingPong(user_message, "")
|
88 |
+
)
|
89 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
90 |
+
|
91 |
+
# prepare text generating streamer & start generating
|
92 |
+
gen_kwargs, streamer = pre.build(
|
93 |
+
prompt, model_num,
|
94 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
95 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
96 |
+
StoppingCriteriaList([StopOnTokens()]), False
|
97 |
+
)
|
98 |
+
pre.start_gen(gen_kwargs, model_num)
|
99 |
+
|
100 |
+
model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
|
101 |
+
model_type = global_vars.models[model_num]["model_type"]
|
102 |
+
for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
|
103 |
+
yield "", uis, prompt, str(res)
|
104 |
+
|
105 |
+
ppm = post.strip_pong(ppm)
|
106 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
107 |
+
|
108 |
+
# summarization
|
109 |
+
# ppm.add_pingpong(
|
110 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
111 |
+
# )
|
112 |
+
# yield "", ppm.build_uis(), prompt, state
|
113 |
+
# ppm.pop_pingpong()
|
114 |
+
|
115 |
+
# ppm = summarize(
|
116 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
117 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
118 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
119 |
+
# )
|
120 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/koalpaca.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer):
|
28 |
+
for new_text in streamer:
|
29 |
+
ppmanager.append_pong(new_text)
|
30 |
+
yield ppmanager, ppmanager.build_uis()
|
31 |
+
|
32 |
+
yield ppmanager, ppmanager.build_uis()
|
33 |
+
|
34 |
+
def summarize(
|
35 |
+
ppmanager, prompt_to_summarize, win_size,
|
36 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
37 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
38 |
+
):
|
39 |
+
ctx = ppmanager.ctx
|
40 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
41 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
42 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
43 |
+
|
44 |
+
_, gen_config_summarization = pre.build_gen_config(
|
45 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
46 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
47 |
+
)
|
48 |
+
summarize_output = get_output_batch(
|
49 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
50 |
+
)[0].split("### 응답:")[-1].strip()
|
51 |
+
ppmanager.ctx = summarize_output
|
52 |
+
ppmanager.pop_pingpong()
|
53 |
+
return ppmanager
|
54 |
+
|
55 |
+
def chat_stream(
|
56 |
+
idx, local_data, user_message, state, model_num,
|
57 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
58 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
59 |
+
):
|
60 |
+
res = [
|
61 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
62 |
+
for ppm in local_data
|
63 |
+
]
|
64 |
+
|
65 |
+
ppm = res[idx]
|
66 |
+
|
67 |
+
# add_ping returns a prompt structured in Alpaca form
|
68 |
+
ppm.add_pingpong(
|
69 |
+
PingPong(user_message, "")
|
70 |
+
)
|
71 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
72 |
+
|
73 |
+
# prepare text generating streamer & start generating
|
74 |
+
gen_kwargs, streamer = pre.build(
|
75 |
+
prompt, model_num,
|
76 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
77 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
78 |
+
return_token_type_ids=False
|
79 |
+
)
|
80 |
+
pre.start_gen(gen_kwargs, model_num)
|
81 |
+
|
82 |
+
# handling stream
|
83 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
84 |
+
yield "", uis, prompt, str(res)
|
85 |
+
|
86 |
+
ppm = post.strip_pong(ppm)
|
87 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
88 |
+
|
89 |
+
# summarization
|
90 |
+
# ppm.add_pingpong(
|
91 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
92 |
+
# )
|
93 |
+
# yield "", ppm.build_uis(), prompt, state
|
94 |
+
# ppm.pop_pingpong()
|
95 |
+
|
96 |
+
# ppm = summarize(
|
97 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
98 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
99 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
100 |
+
# )
|
101 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/mpt.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import StoppingCriteria, StoppingCriteriaList
|
3 |
+
|
4 |
+
import copy
|
5 |
+
import json
|
6 |
+
import global_vars
|
7 |
+
from chats import pre, post
|
8 |
+
from pingpong import PingPong
|
9 |
+
from gens.batch_gen import get_output_batch
|
10 |
+
|
11 |
+
from pingpong.context import CtxLastWindowStrategy
|
12 |
+
|
13 |
+
class StopOnTokens(StoppingCriteria):
|
14 |
+
def __init__(self, tokenizer):
|
15 |
+
super().__init__()
|
16 |
+
|
17 |
+
self.stop_token_ids = tokenizer.convert_tokens_to_ids(
|
18 |
+
["<|im_end|>", "<|endoftext|>"]
|
19 |
+
)
|
20 |
+
|
21 |
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
22 |
+
for stop_id in self.stop_token_ids:
|
23 |
+
if input_ids[0][-1] == stop_id:
|
24 |
+
return True
|
25 |
+
return False
|
26 |
+
|
27 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
28 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
29 |
+
|
30 |
+
dummy_ppm.ctx = global_context
|
31 |
+
for pingpong in dummy_ppm.pingpongs:
|
32 |
+
pong = pingpong.pong
|
33 |
+
first_sentence = pong.split("\n")[0]
|
34 |
+
if first_sentence != "" and \
|
35 |
+
pre.contains_image_markdown(first_sentence):
|
36 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
37 |
+
pingpong.pong = pong
|
38 |
+
|
39 |
+
lws = CtxLastWindowStrategy(win_size)
|
40 |
+
|
41 |
+
prompt = lws(dummy_ppm)
|
42 |
+
return prompt
|
43 |
+
|
44 |
+
def text_stream(ppmanager, streamer):
|
45 |
+
for new_text in streamer:
|
46 |
+
ppmanager.append_pong(new_text)
|
47 |
+
yield ppmanager, ppmanager.build_uis()
|
48 |
+
|
49 |
+
yield ppmanager, ppmanager.build_uis()
|
50 |
+
|
51 |
+
def summarize(
|
52 |
+
ppmanager, prompt_to_summarize, win_size,
|
53 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
54 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
55 |
+
):
|
56 |
+
ctx = ppmanager.ctx
|
57 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
58 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
59 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
60 |
+
|
61 |
+
_, gen_config_summarization = pre.build_gen_config(
|
62 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
63 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
64 |
+
)
|
65 |
+
summarize_output = get_output_batch(
|
66 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
67 |
+
)[0].strip()
|
68 |
+
ppmanager.ctx = summarize_output
|
69 |
+
ppmanager.pop_pingpong()
|
70 |
+
return ppmanager
|
71 |
+
|
72 |
+
def chat_stream(
|
73 |
+
idx, local_data, user_message, state, model_num,
|
74 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
75 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
76 |
+
):
|
77 |
+
res = [
|
78 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
79 |
+
for ppm in local_data
|
80 |
+
]
|
81 |
+
|
82 |
+
ppm = res[idx]
|
83 |
+
|
84 |
+
# add_ping returns a prompt structured in Alpaca form
|
85 |
+
ppm.add_pingpong(
|
86 |
+
PingPong(user_message, "")
|
87 |
+
)
|
88 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
89 |
+
|
90 |
+
# prepare text generating streamer & start generating
|
91 |
+
gen_kwargs, streamer = pre.build(
|
92 |
+
prompt,
|
93 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
94 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
95 |
+
StoppingCriteriaList([StopOnTokens(global_vars.tokenizer)]), False
|
96 |
+
)
|
97 |
+
pre.start_gen(gen_kwargs)
|
98 |
+
|
99 |
+
# handling stream
|
100 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
101 |
+
yield "", uis, prompt, str(res)
|
102 |
+
|
103 |
+
ppm = post.strip_pong(ppm)
|
104 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
105 |
+
|
106 |
+
# summarization
|
107 |
+
# ppm.add_pingpong(
|
108 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
109 |
+
# )
|
110 |
+
# yield "", ppm.build_uis(), prompt, state
|
111 |
+
# ppm.pop_pingpong()
|
112 |
+
|
113 |
+
# ppm = summarize(
|
114 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
115 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
116 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
117 |
+
# )
|
118 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/os_stablelm.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import StoppingCriteria, StoppingCriteriaList
|
3 |
+
|
4 |
+
import copy
|
5 |
+
import json
|
6 |
+
import global_vars
|
7 |
+
from chats import pre, post
|
8 |
+
from pingpong import PingPong
|
9 |
+
from gens.batch_gen import get_output_batch
|
10 |
+
|
11 |
+
from pingpong.context import CtxLastWindowStrategy
|
12 |
+
|
13 |
+
class StopOnTokens(StoppingCriteria):
|
14 |
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
15 |
+
stop_ids = [50278, 50279, 50277, 1, 0]
|
16 |
+
for stop_id in stop_ids:
|
17 |
+
if input_ids[0][-1] == stop_id:
|
18 |
+
return True
|
19 |
+
return False
|
20 |
+
|
21 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
22 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
23 |
+
|
24 |
+
dummy_ppm.ctx = global_context
|
25 |
+
for pingpong in dummy_ppm.pingpongs:
|
26 |
+
pong = pingpong.pong
|
27 |
+
first_sentence = pong.split("\n")[0]
|
28 |
+
if first_sentence != "" and \
|
29 |
+
pre.contains_image_markdown(first_sentence):
|
30 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
31 |
+
pingpong.pong = pong
|
32 |
+
|
33 |
+
lws = CtxLastWindowStrategy(win_size)
|
34 |
+
|
35 |
+
prompt = lws(dummy_ppm)
|
36 |
+
return prompt
|
37 |
+
|
38 |
+
def text_stream(ppmanager, streamer):
|
39 |
+
for new_text in streamer:
|
40 |
+
ppmanager.append_pong(new_text)
|
41 |
+
yield ppmanager, ppmanager.build_uis()
|
42 |
+
|
43 |
+
yield ppmanager, ppmanager.build_uis()
|
44 |
+
|
45 |
+
def summarize(
|
46 |
+
ppmanager, prompt_to_summarize, win_size,
|
47 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
48 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
49 |
+
):
|
50 |
+
ctx = ppmanager.ctx
|
51 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
52 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
53 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
54 |
+
|
55 |
+
_, gen_config_summarization = pre.build_gen_config(
|
56 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
57 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
58 |
+
)
|
59 |
+
summarize_output = get_output_batch(
|
60 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
61 |
+
)[0].split(prompt_to_summarize)[-1].strip()
|
62 |
+
ppmanager.ctx = summarize_output
|
63 |
+
ppmanager.pop_pingpong()
|
64 |
+
return ppmanager
|
65 |
+
|
66 |
+
def chat_stream(
|
67 |
+
idx, local_data, user_message, state, model_num,
|
68 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
69 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
70 |
+
):
|
71 |
+
res = [
|
72 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
73 |
+
for ppm in local_data
|
74 |
+
]
|
75 |
+
|
76 |
+
ppm = res[idx]
|
77 |
+
|
78 |
+
# add_ping returns a prompt structured in Alpaca form
|
79 |
+
ppm.add_pingpong(
|
80 |
+
PingPong(user_message, "")
|
81 |
+
)
|
82 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
83 |
+
|
84 |
+
# prepare text generating streamer & start generating
|
85 |
+
gen_kwargs, streamer = pre.build(
|
86 |
+
prompt,
|
87 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
88 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
89 |
+
StoppingCriteriaList([StopOnTokens()]), False
|
90 |
+
)
|
91 |
+
pre.start_gen(gen_kwargs)
|
92 |
+
|
93 |
+
# handling stream
|
94 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
95 |
+
yield "", uis, prompt, str(res)
|
96 |
+
|
97 |
+
ppm = post.strip_pong(ppm)
|
98 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
99 |
+
|
100 |
+
# summarization
|
101 |
+
# ppm.add_pingpong(
|
102 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
103 |
+
# )
|
104 |
+
# yield "", ppm.build_uis(), prompt, state
|
105 |
+
# ppm.pop_pingpong()
|
106 |
+
|
107 |
+
# ppm = summarize(
|
108 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
109 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
110 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
111 |
+
# )
|
112 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/post.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
def strip_pong(ppmanager):
|
2 |
+
ppmanager.pingpongs[-1].pong = ppmanager.pingpongs[-1].pong.strip()
|
3 |
+
return ppmanager
|
chats/pre.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import copy
|
3 |
+
import global_vars
|
4 |
+
from threading import Thread
|
5 |
+
from transformers import TextIteratorStreamer
|
6 |
+
from transformers import GenerationConfig
|
7 |
+
|
8 |
+
def contains_image_markdown(string):
|
9 |
+
regex = re.compile(r'!\[(.*?)\]\((.*?)\)')
|
10 |
+
match = regex.search(string)
|
11 |
+
return match
|
12 |
+
|
13 |
+
def build_model_inputs(prompt, model_num, return_token_type_ids):
|
14 |
+
model_inputs = global_vars.models[model_num]["tokenizer"](
|
15 |
+
[prompt],
|
16 |
+
return_tensors="pt",
|
17 |
+
return_token_type_ids=return_token_type_ids
|
18 |
+
).to("cuda")
|
19 |
+
return model_inputs
|
20 |
+
|
21 |
+
def build_streamer(
|
22 |
+
model_num,
|
23 |
+
timeout=20.,
|
24 |
+
skip_prompt=True,
|
25 |
+
skip_special_tokens=True
|
26 |
+
):
|
27 |
+
streamer = TextIteratorStreamer(
|
28 |
+
global_vars.models[model_num]["tokenizer"],
|
29 |
+
timeout=timeout,
|
30 |
+
skip_prompt=skip_prompt,
|
31 |
+
skip_special_tokens=skip_special_tokens
|
32 |
+
)
|
33 |
+
return streamer
|
34 |
+
|
35 |
+
|
36 |
+
def build_gen_config(
|
37 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
38 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
39 |
+
):
|
40 |
+
gen_config_raw = {
|
41 |
+
"temperature": temperature,
|
42 |
+
"top_p": top_p,
|
43 |
+
"top_k": top_k,
|
44 |
+
"repetition_penalty": repetition_penalty,
|
45 |
+
"max_new_tokens": max_new_tokens,
|
46 |
+
"num_beams": num_beams,
|
47 |
+
"use_cache": use_cache,
|
48 |
+
"do_sample": do_sample,
|
49 |
+
"eos_token_id": eos_token_id,
|
50 |
+
"pad_token_id": pad_token_id
|
51 |
+
}
|
52 |
+
|
53 |
+
return gen_config_raw, GenerationConfig(**gen_config_raw)
|
54 |
+
|
55 |
+
def build_gen_kwargs(
|
56 |
+
gen_config,
|
57 |
+
model_inputs,
|
58 |
+
streamer,
|
59 |
+
stopping_criteria
|
60 |
+
):
|
61 |
+
gen_kwargs = dict(
|
62 |
+
model_inputs,
|
63 |
+
streamer=streamer,
|
64 |
+
stopping_criteria=stopping_criteria
|
65 |
+
)
|
66 |
+
gen_kwargs.update(gen_config)
|
67 |
+
return gen_kwargs
|
68 |
+
|
69 |
+
def start_gen(gen_kwargs, model_num):
|
70 |
+
t = Thread(
|
71 |
+
target=global_vars.models[model_num]["model"].generate,
|
72 |
+
kwargs=gen_kwargs
|
73 |
+
)
|
74 |
+
t.start()
|
75 |
+
|
76 |
+
def build(
|
77 |
+
prompt, model_num,
|
78 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
79 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id,
|
80 |
+
stopping_criteria=None, return_token_type_ids=True
|
81 |
+
):
|
82 |
+
gen_config_raw, _ = build_gen_config(
|
83 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
84 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
85 |
+
)
|
86 |
+
|
87 |
+
model_inputs = build_model_inputs(
|
88 |
+
prompt, model_num, return_token_type_ids=return_token_type_ids
|
89 |
+
)
|
90 |
+
streamer = build_streamer(model_num)
|
91 |
+
gen_kwargs = build_gen_kwargs(
|
92 |
+
gen_config_raw,
|
93 |
+
model_inputs,
|
94 |
+
streamer,
|
95 |
+
stopping_criteria
|
96 |
+
)
|
97 |
+
return gen_kwargs, streamer
|
chats/redpajama.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer):
|
28 |
+
for new_text in streamer:
|
29 |
+
ppmanager.append_pong(new_text)
|
30 |
+
yield ppmanager, ppmanager.build_uis()
|
31 |
+
|
32 |
+
yield ppmanager, ppmanager.build_uis()
|
33 |
+
|
34 |
+
def summarize(
|
35 |
+
ppmanager, prompt_to_summarize, win_size,
|
36 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
37 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
38 |
+
):
|
39 |
+
ctx = ppmanager.ctx
|
40 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
41 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
42 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
43 |
+
|
44 |
+
_, gen_config_summarization = pre.build_gen_config(
|
45 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
46 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
47 |
+
)
|
48 |
+
summarize_output = get_output_batch(
|
49 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
50 |
+
)[0].strip()
|
51 |
+
ppmanager.ctx = summarize_output
|
52 |
+
ppmanager.pop_pingpong()
|
53 |
+
return ppmanager
|
54 |
+
|
55 |
+
def chat_stream(
|
56 |
+
idx, local_data, user_message, state, model_num,
|
57 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
58 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
59 |
+
):
|
60 |
+
res = [
|
61 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
62 |
+
for ppm in local_data
|
63 |
+
]
|
64 |
+
|
65 |
+
ppm = res[idx]
|
66 |
+
|
67 |
+
# add_ping returns a prompt structured in Alpaca form
|
68 |
+
ppm.add_pingpong(
|
69 |
+
PingPong(user_message, "")
|
70 |
+
)
|
71 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
72 |
+
|
73 |
+
# prepare text generating streamer & start generating
|
74 |
+
gen_kwargs, streamer = pre.build(
|
75 |
+
prompt,
|
76 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
77 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
78 |
+
return_token_type_ids=False
|
79 |
+
)
|
80 |
+
pre.start_gen(gen_kwargs)
|
81 |
+
|
82 |
+
# handling stream
|
83 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
84 |
+
yield "", uis, prompt, str(res)
|
85 |
+
|
86 |
+
ppm = post.strip_pong(ppm)
|
87 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
88 |
+
|
89 |
+
# summarization
|
90 |
+
# ppm.add_pingpong(
|
91 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
92 |
+
# )
|
93 |
+
# yield "", ppm.build_uis(), prompt, state
|
94 |
+
# ppm.pop_pingpong()
|
95 |
+
|
96 |
+
# ppm = summarize(
|
97 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
98 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
99 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
100 |
+
# )
|
101 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/stablelm.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import StoppingCriteria, StoppingCriteriaList
|
3 |
+
|
4 |
+
import copy
|
5 |
+
import json
|
6 |
+
import global_vars
|
7 |
+
from chats import pre, post
|
8 |
+
from pingpong import PingPong
|
9 |
+
from gens.batch_gen import get_output_batch
|
10 |
+
|
11 |
+
from pingpong.context import CtxLastWindowStrategy
|
12 |
+
|
13 |
+
class StopOnTokens(StoppingCriteria):
|
14 |
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
15 |
+
stop_ids = [50278, 50279, 50277, 1, 0]
|
16 |
+
for stop_id in stop_ids:
|
17 |
+
if input_ids[0][-1] == stop_id:
|
18 |
+
return True
|
19 |
+
return False
|
20 |
+
|
21 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
22 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
23 |
+
|
24 |
+
dummy_ppm.ctx = global_context
|
25 |
+
for pingpong in dummy_ppm.pingpongs:
|
26 |
+
pong = pingpong.pong
|
27 |
+
first_sentence = pong.split("\n")[0]
|
28 |
+
if first_sentence != "" and \
|
29 |
+
pre.contains_image_markdown(first_sentence):
|
30 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
31 |
+
pingpong.pong = pong
|
32 |
+
|
33 |
+
lws = CtxLastWindowStrategy(win_size)
|
34 |
+
|
35 |
+
prompt = lws(dummy_ppm)
|
36 |
+
return prompt
|
37 |
+
|
38 |
+
def text_stream(ppmanager, streamer):
|
39 |
+
for new_text in streamer:
|
40 |
+
ppmanager.append_pong(new_text)
|
41 |
+
yield ppmanager, ppmanager.build_uis()
|
42 |
+
|
43 |
+
yield ppmanager, ppmanager.build_uis()
|
44 |
+
|
45 |
+
def summarize(
|
46 |
+
ppmanager, prompt_to_summarize, win_size,
|
47 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
48 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
49 |
+
):
|
50 |
+
ctx = ppmanager.ctx
|
51 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
52 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
53 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
54 |
+
|
55 |
+
_, gen_config_summarization = pre.build_gen_config(
|
56 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
57 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
58 |
+
)
|
59 |
+
summarize_output = get_output_batch(
|
60 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
61 |
+
)[0].split(prompt_to_summarize)[-1].strip()
|
62 |
+
ppmanager.ctx = summarize_output
|
63 |
+
ppmanager.pop_pingpong()
|
64 |
+
return ppmanager
|
65 |
+
|
66 |
+
def chat_stream(
|
67 |
+
idx, local_data, user_message, state, model_num,
|
68 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
69 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
70 |
+
):
|
71 |
+
res = [
|
72 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
73 |
+
for ppm in local_data
|
74 |
+
]
|
75 |
+
|
76 |
+
ppm = res[idx]
|
77 |
+
|
78 |
+
# add_ping returns a prompt structured in Alpaca form
|
79 |
+
ppm.add_pingpong(
|
80 |
+
PingPong(user_message, "")
|
81 |
+
)
|
82 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
83 |
+
|
84 |
+
# prepare text generating streamer & start generating
|
85 |
+
gen_kwargs, streamer = pre.build(
|
86 |
+
prompt,
|
87 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
88 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
89 |
+
StoppingCriteriaList([StopOnTokens()]), False
|
90 |
+
)
|
91 |
+
pre.start_gen(gen_kwargs)
|
92 |
+
|
93 |
+
# handling stream
|
94 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
95 |
+
yield "", uis, prompt, str(res)
|
96 |
+
|
97 |
+
ppm = post.strip_pong(ppm)
|
98 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
99 |
+
|
100 |
+
# summarization
|
101 |
+
# ppm.add_pingpong(
|
102 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
103 |
+
# )
|
104 |
+
# yield "", ppm.build_uis(), prompt, state
|
105 |
+
# ppm.pop_pingpong()
|
106 |
+
|
107 |
+
# ppm = summarize(
|
108 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
109 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
110 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
111 |
+
# )
|
112 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/starchat.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import StoppingCriteria, StoppingCriteriaList
|
3 |
+
|
4 |
+
import copy
|
5 |
+
import json
|
6 |
+
import global_vars
|
7 |
+
from chats import pre, post
|
8 |
+
from pingpong import PingPong
|
9 |
+
from gens.batch_gen import get_output_batch
|
10 |
+
|
11 |
+
from pingpong.context import CtxLastWindowStrategy
|
12 |
+
|
13 |
+
class StopOnTokens(StoppingCriteria):
|
14 |
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
15 |
+
stop_ids = [49155, 1, 0]
|
16 |
+
for stop_id in stop_ids:
|
17 |
+
if input_ids[0][-1] == stop_id:
|
18 |
+
return True
|
19 |
+
return False
|
20 |
+
|
21 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
22 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
23 |
+
|
24 |
+
dummy_ppm.ctx = global_context
|
25 |
+
for pingpong in dummy_ppm.pingpongs:
|
26 |
+
pong = pingpong.pong
|
27 |
+
first_sentence = pong.split("\n")[0]
|
28 |
+
if first_sentence != "" and \
|
29 |
+
pre.contains_image_markdown(first_sentence):
|
30 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
31 |
+
pingpong.pong = pong
|
32 |
+
|
33 |
+
lws = CtxLastWindowStrategy(win_size)
|
34 |
+
|
35 |
+
prompt = lws(dummy_ppm)
|
36 |
+
return prompt
|
37 |
+
|
38 |
+
def text_stream(ppmanager, streamer):
|
39 |
+
for new_text in streamer:
|
40 |
+
ppmanager.append_pong(new_text)
|
41 |
+
yield ppmanager, ppmanager.build_uis()
|
42 |
+
|
43 |
+
yield ppmanager, ppmanager.build_uis()
|
44 |
+
|
45 |
+
def summarize(
|
46 |
+
ppmanager, prompt_to_summarize, win_size,
|
47 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
48 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
49 |
+
):
|
50 |
+
ctx = ppmanager.ctx
|
51 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
52 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
53 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
54 |
+
|
55 |
+
_, gen_config_summarization = pre.build_gen_config(
|
56 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
57 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
58 |
+
)
|
59 |
+
summarize_output = get_output_batch(
|
60 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
61 |
+
)[0].strip()
|
62 |
+
ppmanager.ctx = summarize_output
|
63 |
+
ppmanager.pop_pingpong()
|
64 |
+
return ppmanager
|
65 |
+
|
66 |
+
def chat_stream(
|
67 |
+
idx, local_data, user_message, state, model_num,
|
68 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
69 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
70 |
+
):
|
71 |
+
res = [
|
72 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
73 |
+
for ppm in local_data
|
74 |
+
]
|
75 |
+
|
76 |
+
ppm = res[idx]
|
77 |
+
|
78 |
+
# add_ping returns a prompt structured in Alpaca form
|
79 |
+
ppm.add_pingpong(
|
80 |
+
PingPong(user_message, "")
|
81 |
+
)
|
82 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
83 |
+
|
84 |
+
# prepare text generating streamer & start generating
|
85 |
+
gen_kwargs, streamer = pre.build(
|
86 |
+
prompt,
|
87 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
88 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
89 |
+
StoppingCriteriaList([StopOnTokens()]), False
|
90 |
+
)
|
91 |
+
pre.start_gen(gen_kwargs)
|
92 |
+
|
93 |
+
# handling stream
|
94 |
+
for ppmanager, uis in text_stream(ppm, streamer):
|
95 |
+
yield "", uis, prompt, str(res)
|
96 |
+
|
97 |
+
ppm = post.strip_pong(ppm)
|
98 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
99 |
+
|
100 |
+
# summarization
|
101 |
+
# ppm.add_pingpong(
|
102 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
103 |
+
# )
|
104 |
+
# yield "", ppm.build_uis(), prompt, state
|
105 |
+
# ppm.pop_pingpong()
|
106 |
+
|
107 |
+
# ppm = summarize(
|
108 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
109 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
110 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
111 |
+
# )
|
112 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
chats/vicuna.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import global_vars
|
4 |
+
from chats import pre, post
|
5 |
+
from pingpong import PingPong
|
6 |
+
from gens.batch_gen import get_output_batch
|
7 |
+
|
8 |
+
from pingpong.context import CtxLastWindowStrategy
|
9 |
+
|
10 |
+
def build_prompts(ppmanager, user_message, global_context, win_size=3):
|
11 |
+
dummy_ppm = copy.deepcopy(ppmanager)
|
12 |
+
|
13 |
+
dummy_ppm.ctx = global_context
|
14 |
+
for pingpong in dummy_ppm.pingpongs:
|
15 |
+
pong = pingpong.pong
|
16 |
+
first_sentence = pong.split("\n")[0]
|
17 |
+
if first_sentence != "" and \
|
18 |
+
pre.contains_image_markdown(first_sentence):
|
19 |
+
pong = ' '.join(pong.split("\n")[1:]).strip()
|
20 |
+
pingpong.pong = pong
|
21 |
+
|
22 |
+
lws = CtxLastWindowStrategy(win_size)
|
23 |
+
|
24 |
+
prompt = lws(dummy_ppm)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
def text_stream(ppmanager, streamer, model_thumbnail_tiny, model_type):
|
28 |
+
count = 0
|
29 |
+
|
30 |
+
for new_text in streamer:
|
31 |
+
if count == 0:
|
32 |
+
ppmanager.append_pong(f"![]({model_thumbnail_tiny})***[{model_type}]***\n")
|
33 |
+
count = count + 1
|
34 |
+
|
35 |
+
ppmanager.append_pong(new_text)
|
36 |
+
yield ppmanager, ppmanager.build_uis()
|
37 |
+
|
38 |
+
yield ppmanager, ppmanager.build_uis()
|
39 |
+
|
40 |
+
def summarize(
|
41 |
+
ppmanager, prompt_to_summarize, win_size,
|
42 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
43 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
44 |
+
):
|
45 |
+
ctx = ppmanager.ctx
|
46 |
+
last_pong = ppmanager.pingpongs[-1].pong
|
47 |
+
ppmanager.add_pingpong(PingPong(prompt_to_summarize, ""))
|
48 |
+
prompt = ppmanager.build_prompts(from_idx=-win_size)
|
49 |
+
|
50 |
+
_, gen_config_summarization = pre.build_gen_config(
|
51 |
+
temperature, top_p, top_k, repetition_penalty, max_new_tokens,
|
52 |
+
num_beams, use_cache, do_sample, eos_token_id, pad_token_id
|
53 |
+
)
|
54 |
+
summarize_output = get_output_batch(
|
55 |
+
global_vars.model, global_vars.tokenizer, [prompt], gen_config_summarization
|
56 |
+
)[0].strip()
|
57 |
+
ppmanager.ctx = summarize_output
|
58 |
+
ppmanager.pop_pingpong()
|
59 |
+
return ppmanager
|
60 |
+
|
61 |
+
def chat_stream(
|
62 |
+
idx, local_data, user_message, state, model_num,
|
63 |
+
global_context, ctx_num_lconv, ctx_sum_prompt,
|
64 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
|
65 |
+
):
|
66 |
+
res = [
|
67 |
+
state["ppmanager_type"].from_json(json.dumps(ppm))
|
68 |
+
for ppm in local_data
|
69 |
+
]
|
70 |
+
|
71 |
+
ppm = res[idx]
|
72 |
+
|
73 |
+
# add_ping returns a prompt structured in Alpaca form
|
74 |
+
ppm.add_pingpong(
|
75 |
+
PingPong(user_message, "")
|
76 |
+
)
|
77 |
+
prompt = build_prompts(ppm, user_message, global_context, ctx_num_lconv)
|
78 |
+
|
79 |
+
# prepare text generating streamer & start generating
|
80 |
+
gen_kwargs, streamer = pre.build(
|
81 |
+
prompt, model_num,
|
82 |
+
res_temp, res_topp, res_topk, res_rpen, res_mnts,
|
83 |
+
res_beams, res_cache, res_sample, res_eosid, res_padid,
|
84 |
+
return_token_type_ids=False
|
85 |
+
)
|
86 |
+
pre.start_gen(gen_kwargs, model_num)
|
87 |
+
|
88 |
+
# handling stream
|
89 |
+
model_thumbnail_tiny = global_vars.models[model_num]["model_thumb_tiny"]
|
90 |
+
model_type = global_vars.models[model_num]["model_type"]
|
91 |
+
for ppmanager, uis in text_stream(ppm, streamer, model_thumbnail_tiny, model_type):
|
92 |
+
yield "", uis, prompt, str(res)
|
93 |
+
|
94 |
+
ppm = post.strip_pong(ppm)
|
95 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
96 |
+
|
97 |
+
# summarization
|
98 |
+
# ppm.add_pingpong(
|
99 |
+
# PingPong(None, "![](https://i.postimg.cc/ZKNKDPBd/Vanilla-1s-209px.gif)")
|
100 |
+
# )
|
101 |
+
# yield "", ppm.build_uis(), prompt, state
|
102 |
+
# ppm.pop_pingpong()
|
103 |
+
|
104 |
+
# ppm = summarize(
|
105 |
+
# ppm, ctx_sum_prompt, ctx_num_lconv,
|
106 |
+
# sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts,
|
107 |
+
# sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid
|
108 |
+
# )
|
109 |
+
yield "", ppm.build_uis(), prompt, str(res)
|
configs/constraints_config.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
constraints:
|
2 |
+
max_context: 1000
|
3 |
+
max_prompt: 300
|
4 |
+
max_conv_len: 1500
|
configs/response_configs/baize.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 1024
|
9 |
+
do_sample: True
|
10 |
+
bos_token_id: 0
|
11 |
+
eos_token_id: 1
|
12 |
+
pad_token_id: 0
|
configs/response_configs/camel.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 1024
|
9 |
+
do_sample: True
|
10 |
+
pad_token_id: 50257
|
11 |
+
eos_token_id: 50256
|
configs/response_configs/default.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 1024
|
9 |
+
do_sample: True
|
configs/response_configs/flan.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.8
|
3 |
+
top_p: 0.95
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: False
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 256
|
9 |
+
do_sample: True
|
configs/response_configs/gpt4_alpaca.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 512
|
9 |
+
do_sample: True
|
configs/response_configs/guanaco.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 1024
|
9 |
+
do_sample: True
|
configs/response_configs/koalpaca.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 1024
|
9 |
+
do_sample: True
|
10 |
+
eos_token_id: 2
|
11 |
+
pad_token_id: 2
|
configs/response_configs/redpajama.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 1.0
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 1000
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 512
|
9 |
+
do_sample: True
|
10 |
+
eos_token_id: 0
|
11 |
+
pad_token_id: 1
|
configs/response_configs/stablelm.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 1.0
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 1000
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 512
|
9 |
+
do_sample: True
|
10 |
+
eos_token_id: 0
|
11 |
+
pad_token_id: 1
|
configs/response_configs/stackllama.yaml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.9
|
3 |
+
top_p: 0.95
|
4 |
+
# top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 256
|
9 |
+
do_sample: True
|
10 |
+
early_stopping: True
|
configs/response_configs/starchat.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.5
|
3 |
+
top_p: 0.95
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 1024
|
9 |
+
do_sample: True
|
10 |
+
eos_token_id: 0
|
11 |
+
bos_token_id: 0
|
12 |
+
pad_token_id: 0
|
configs/response_configs/t5_vicuna.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 2048
|
9 |
+
do_sample: True
|
configs/summarization_configs/camel.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 1
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
max_new_tokens: 1024
|
8 |
+
do_sample: True
|
9 |
+
repetition_penalty: 1.5
|
10 |
+
pad_token_id: 50257
|
11 |
+
eos_token_id: 50256
|
configs/summarization_configs/default.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 1
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
max_new_tokens: 1024
|
8 |
+
do_sample: True
|
9 |
+
repetition_penalty: 1.5
|
10 |
+
|
11 |
+
|
configs/summarization_configs/koalpaca.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 1
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
do_sample: True
|
8 |
+
repetition_penalty: 1.2
|
9 |
+
max_new_tokens: 512
|
10 |
+
eos_token_id: 2
|
11 |
+
pad_token_id: 2
|
configs/summarization_configs/redpajama.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 1.0
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 1000
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 512
|
9 |
+
do_sample: True
|
10 |
+
eos_token_id: 0
|
11 |
+
pad_token_id: 1
|
configs/summarization_configs/stablelm.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 1
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 1000
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
do_sample: True
|
8 |
+
repetition_penalty: 1.2
|
9 |
+
max_new_tokens: 512
|
10 |
+
eos_token_id: 0
|
11 |
+
pad_token_id: 1
|
configs/summarization_configs/t5_vicuna.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
generation_config:
|
2 |
+
temperature: 0.95
|
3 |
+
top_p: 0.9
|
4 |
+
top_k: 50
|
5 |
+
num_beams: 1
|
6 |
+
use_cache: True
|
7 |
+
repetition_penalty: 1.2
|
8 |
+
max_new_tokens: 2048
|
9 |
+
do_sample: True
|
examples.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Tell me about GPT
|
2 |
+
Write a Python program to print Fibonacci numbers
|
gens/__init__.py
ADDED
File without changes
|
gens/batch_gen.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
def get_output_batch(
|
4 |
+
model, tokenizer, prompts, generation_config, device='cuda'
|
5 |
+
):
|
6 |
+
if len(prompts) == 1:
|
7 |
+
encoding = tokenizer(prompts, return_tensors="pt")
|
8 |
+
input_ids = encoding["input_ids"].to(device)
|
9 |
+
generated_id = model.generate(
|
10 |
+
input_ids=input_ids,
|
11 |
+
generation_config=generation_config,
|
12 |
+
)
|
13 |
+
|
14 |
+
decoded = tokenizer.batch_decode(
|
15 |
+
generated_id, skip_prompt=True, skip_special_tokens=True
|
16 |
+
)
|
17 |
+
del input_ids, generated_id
|
18 |
+
torch.cuda.empty_cache()
|
19 |
+
return decoded
|
20 |
+
else:
|
21 |
+
encodings = tokenizer(prompts, padding=True, return_tensors="pt").to(device)
|
22 |
+
generated_ids = model.generate(
|
23 |
+
**encodings,
|
24 |
+
generation_config=generation_config,
|
25 |
+
)
|
26 |
+
|
27 |
+
decoded = tokenizer.batch_decode(
|
28 |
+
generated_ids, skip_prompt=True, skip_special_tokens=True
|
29 |
+
)
|
30 |
+
del encodings, generated_ids
|
31 |
+
torch.cuda.empty_cache()
|
32 |
+
return decoded
|
global_vars.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gc
|
2 |
+
import yaml
|
3 |
+
import json
|
4 |
+
import torch
|
5 |
+
from transformers import GenerationConfig
|
6 |
+
from models import alpaca, stablelm, koalpaca, flan_alpaca, mpt
|
7 |
+
from models import camel, t5_vicuna, vicuna, starchat, redpajama, bloom
|
8 |
+
from models import baize, guanaco, falcon, kullm, replit, airoboros
|
9 |
+
from models import samantha_vicuna
|
10 |
+
|
11 |
+
from utils import get_chat_interface, get_chat_manager
|
12 |
+
|
13 |
+
model_infos = json.load(open("model_cards.json"))
|
14 |
+
|
15 |
+
def get_model_type(model_info):
|
16 |
+
base_url = model_info["hub(base)"]
|
17 |
+
ft_ckpt_url = model_info["hub(ckpt)"]
|
18 |
+
|
19 |
+
model_type_tmp = "alpaca"
|
20 |
+
if "llms/wizardlm" in base_url.lower():
|
21 |
+
model_type_tmp = "wizardlm"
|
22 |
+
elif "chronos" in base_url.lower():
|
23 |
+
model_type_tmp = "chronos"
|
24 |
+
elif "lazarus" in base_url.lower():
|
25 |
+
model_type_tmp = "lazarus"
|
26 |
+
elif "samantha" in base_url.lower():
|
27 |
+
model_type_tmp = "samantha-vicuna"
|
28 |
+
elif "airoboros" in base_url.lower():
|
29 |
+
model_type_tmp = "airoboros"
|
30 |
+
elif "replit" in base_url.lower():
|
31 |
+
model_type_tmp = "replit-instruct"
|
32 |
+
elif "kullm" in base_url.lower():
|
33 |
+
model_type_tmp = "kullm-polyglot"
|
34 |
+
elif "nous-hermes" in base_url.lower():
|
35 |
+
model_type_tmp = "nous-hermes"
|
36 |
+
elif "guanaco" in base_url.lower():
|
37 |
+
model_type_tmp = "guanaco"
|
38 |
+
elif "wizardlm-uncensored-falcon" in base_url.lower():
|
39 |
+
model_type_tmp = "wizard-falcon"
|
40 |
+
elif "falcon" in base_url.lower():
|
41 |
+
model_type_tmp = "falcon"
|
42 |
+
elif "baize" in base_url.lower():
|
43 |
+
model_type_tmp = "baize"
|
44 |
+
elif "stable-vicuna" in base_url.lower():
|
45 |
+
model_type_tmp = "stable-vicuna"
|
46 |
+
elif "vicuna" in base_url.lower():
|
47 |
+
model_type_tmp = "vicuna"
|
48 |
+
elif "mpt" in base_url.lower():
|
49 |
+
model_type_tmp = "mpt"
|
50 |
+
elif "redpajama-incite-7b-instruct" in base_url.lower():
|
51 |
+
model_type_tmp = "redpajama-instruct"
|
52 |
+
elif "redpajama" in base_url.lower():
|
53 |
+
model_type_tmp = "redpajama"
|
54 |
+
elif "starchat" in base_url.lower():
|
55 |
+
model_type_tmp = "starchat"
|
56 |
+
elif "camel" in base_url.lower():
|
57 |
+
model_type_tmp = "camel"
|
58 |
+
elif "flan-alpaca" in base_url.lower():
|
59 |
+
model_type_tmp = "flan-alpaca"
|
60 |
+
elif "openassistant/stablelm" in base_url.lower():
|
61 |
+
model_type_tmp = "os-stablelm"
|
62 |
+
elif "stablelm" in base_url.lower():
|
63 |
+
model_type_tmp = "stablelm"
|
64 |
+
elif "fastchat-t5" in base_url.lower():
|
65 |
+
model_type_tmp = "t5-vicuna"
|
66 |
+
elif "koalpaca-polyglot" in base_url.lower():
|
67 |
+
model_type_tmp = "koalpaca-polyglot"
|
68 |
+
elif "alpacagpt4" in ft_ckpt_url.lower():
|
69 |
+
model_type_tmp = "alpaca-gpt4"
|
70 |
+
elif "alpaca" in ft_ckpt_url.lower():
|
71 |
+
model_type_tmp = "alpaca"
|
72 |
+
elif "llama-deus" in ft_ckpt_url.lower():
|
73 |
+
model_type_tmp = "llama-deus"
|
74 |
+
elif "vicuna-lora-evolinstruct" in ft_ckpt_url.lower():
|
75 |
+
model_type_tmp = "evolinstruct-vicuna"
|
76 |
+
elif "alpacoom" in ft_ckpt_url.lower():
|
77 |
+
model_type_tmp = "alpacoom"
|
78 |
+
elif "guanaco" in ft_ckpt_url.lower():
|
79 |
+
model_type_tmp = "guanaco"
|
80 |
+
else:
|
81 |
+
print("unsupported model type")
|
82 |
+
|
83 |
+
return model_type_tmp
|
84 |
+
|
85 |
+
def initialize_globals():
|
86 |
+
global models, tokenizers
|
87 |
+
|
88 |
+
models = []
|
89 |
+
model_names = [
|
90 |
+
"baize-7b",
|
91 |
+
# "evolinstruct-vicuna-13b",
|
92 |
+
"guanaco-7b",
|
93 |
+
# "nous-hermes-13b"
|
94 |
+
]
|
95 |
+
for model_name in model_names:
|
96 |
+
model_info = model_infos[model_name]
|
97 |
+
model_thumbnail_tiny = model_info["thumb-tiny"]
|
98 |
+
model_type = get_model_type(model_info)
|
99 |
+
print(model_type)
|
100 |
+
load_model = get_load_model(model_type)
|
101 |
+
|
102 |
+
model, tokenizer = load_model(
|
103 |
+
base=model_info["hub(base)"],
|
104 |
+
finetuned=model_info["hub(ckpt)"],
|
105 |
+
mode_cpu=False,
|
106 |
+
mode_mps=False,
|
107 |
+
mode_full_gpu=True,
|
108 |
+
mode_8bit=False,
|
109 |
+
mode_4bit=False,
|
110 |
+
force_download_ckpt=False
|
111 |
+
)
|
112 |
+
|
113 |
+
gen_config, gen_config_raw = get_generation_config(
|
114 |
+
model_info["default_gen_config"]
|
115 |
+
)
|
116 |
+
|
117 |
+
models.append(
|
118 |
+
{
|
119 |
+
"model_name": model_name,
|
120 |
+
"model_thumb_tiny": model_thumbnail_tiny,
|
121 |
+
"model_type": model_type,
|
122 |
+
"model": model,
|
123 |
+
"tokenizer": tokenizer,
|
124 |
+
"gen_config": gen_config,
|
125 |
+
"gen_config_raw": gen_config_raw,
|
126 |
+
"chat_interface": get_chat_interface(model_type),
|
127 |
+
"chat_manager": get_chat_manager(model_type),
|
128 |
+
}
|
129 |
+
)
|
130 |
+
|
131 |
+
def get_load_model(model_type):
|
132 |
+
if model_type == "alpaca" or \
|
133 |
+
model_type == "alpaca-gpt4" or \
|
134 |
+
model_type == "llama-deus" or \
|
135 |
+
model_type == "nous-hermes" or \
|
136 |
+
model_type == "lazarus" or \
|
137 |
+
model_type == "chronos" or \
|
138 |
+
model_type == "wizardlm":
|
139 |
+
return alpaca.load_model
|
140 |
+
elif model_type == "stablelm" or model_type == "os-stablelm":
|
141 |
+
return stablelm.load_model
|
142 |
+
elif model_type == "koalpaca-polyglot":
|
143 |
+
return koalpaca.load_model
|
144 |
+
elif model_type == "kullm-polyglot":
|
145 |
+
return kullm.load_model
|
146 |
+
elif model_type == "flan-alpaca":
|
147 |
+
return flan_alpaca.load_model
|
148 |
+
elif model_type == "camel":
|
149 |
+
return camel.load_model
|
150 |
+
elif model_type == "t5-vicuna":
|
151 |
+
return t5_vicuna.load_model
|
152 |
+
elif model_type == "stable-vicuna":
|
153 |
+
return vicuna.load_model
|
154 |
+
elif model_type == "starchat":
|
155 |
+
return starchat.load_model
|
156 |
+
elif model_type == "mpt":
|
157 |
+
return mpt.load_model
|
158 |
+
elif model_type == "redpajama" or \
|
159 |
+
model_type == "redpajama-instruct":
|
160 |
+
return redpajama.load_model
|
161 |
+
elif model_type == "vicuna":
|
162 |
+
return vicuna.load_model
|
163 |
+
elif model_type == "evolinstruct-vicuna":
|
164 |
+
return alpaca.load_model
|
165 |
+
elif model_type == "alpacoom":
|
166 |
+
return bloom.load_model
|
167 |
+
elif model_type == "baize":
|
168 |
+
return baize.load_model
|
169 |
+
elif model_type == "guanaco":
|
170 |
+
return guanaco.load_model
|
171 |
+
elif model_type == "falcon" or model_type == "wizard-falcon":
|
172 |
+
return falcon.load_model
|
173 |
+
elif model_type == "replit-instruct":
|
174 |
+
return replit.load_model
|
175 |
+
elif model_type == "airoboros":
|
176 |
+
return airoboros.load_model
|
177 |
+
elif model_type == "samantha-vicuna":
|
178 |
+
return samantha_vicuna.load_model
|
179 |
+
else:
|
180 |
+
return None
|
181 |
+
|
182 |
+
def get_generation_config(path):
|
183 |
+
with open(path, 'rb') as f:
|
184 |
+
generation_config = yaml.safe_load(f.read())
|
185 |
+
|
186 |
+
generation_config = generation_config["generation_config"]
|
187 |
+
|
188 |
+
return GenerationConfig(**generation_config), generation_config
|
189 |
+
|
190 |
+
def get_constraints_config(path):
|
191 |
+
with open(path, 'rb') as f:
|
192 |
+
constraints_config = yaml.safe_load(f.read())
|
193 |
+
|
194 |
+
return ConstraintsConfig(**constraints_config), constraints_config["constraints"]
|
miscs/__init__.py
ADDED
File without changes
|
miscs/js.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GET_LOCAL_STORAGE = """
|
2 |
+
function() {
|
3 |
+
globalThis.setStorage = (key, value)=>{
|
4 |
+
localStorage.setItem(key, JSON.stringify(value));
|
5 |
+
}
|
6 |
+
globalThis.getStorage = (key, value)=>{
|
7 |
+
return JSON.parse(localStorage.getItem(key));
|
8 |
+
}
|
9 |
+
|
10 |
+
var local_data = getStorage('local_data');
|
11 |
+
var history = [];
|
12 |
+
|
13 |
+
if(local_data) {
|
14 |
+
local_data[0].pingpongs.forEach(element =>{
|
15 |
+
history.push([element.ping, element.pong]);
|
16 |
+
});
|
17 |
+
}
|
18 |
+
else {
|
19 |
+
local_data = [];
|
20 |
+
for (let step = 0; step < 10; step++) {
|
21 |
+
local_data.push({'ctx': '', 'pingpongs':[]});
|
22 |
+
}
|
23 |
+
setStorage('local_data', local_data);
|
24 |
+
}
|
25 |
+
|
26 |
+
if(history.length == 0) {
|
27 |
+
document.querySelector("#initial-popup").classList.remove('hide');
|
28 |
+
}
|
29 |
+
|
30 |
+
return [history, local_data];
|
31 |
+
}
|
32 |
+
"""
|
33 |
+
|
34 |
+
UPDATE_LEFT_BTNS_STATE = """
|
35 |
+
(v)=>{
|
36 |
+
document.querySelector('.custom-btn-highlight').classList.add('custom-btn');
|
37 |
+
document.querySelector('.custom-btn-highlight').classList.remove('custom-btn-highlight');
|
38 |
+
|
39 |
+
const elements = document.querySelectorAll(".custom-btn");
|
40 |
+
|
41 |
+
for(var i=0; i < elements.length; i++) {
|
42 |
+
const element = elements[i];
|
43 |
+
if(element.textContent == v) {
|
44 |
+
console.log(v);
|
45 |
+
element.classList.add('custom-btn-highlight');
|
46 |
+
element.classList.remove('custom-btn');
|
47 |
+
break;
|
48 |
+
}
|
49 |
+
}
|
50 |
+
}"""
|
miscs/strings.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TITLE = "Alpaca-LoRA Playground"
|
2 |
+
|
3 |
+
ABSTRACT = """
|
4 |
+
Thanks to [tolen](https://github.com/tloen/alpaca-lora), this application runs Alpaca-LoRA which is instruction fine-tuned version of [LLaMA](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/). This demo currently runs 30B version on a 3*A6000 instance at [Jarvislabs.ai](https://jarvislabs.ai/).
|
5 |
+
|
6 |
+
NOTE: too long input (context, instruction) will not be allowed. Please keep context < 500 and instruction < 150
|
7 |
+
"""
|
8 |
+
|
9 |
+
BOTTOM_LINE = """
|
10 |
+
This demo application runs the open source project, [Alpaca-LoRA-Serve](https://github.com/deep-diver/Alpaca-LoRA-Serve). By default, it runs with streaming mode, but you can also run with dynamic batch generation model. Please visit the repo, find more information, and contribute if you can.
|
11 |
+
|
12 |
+
Alpaca-LoRA is built on the same concept as Standford Alpaca project, but it lets us train and inference on a smaller GPUs such as RTX4090 for 7B version. Also, we could build very small size of checkpoints on top of base models thanks to [🤗 transformers](https://huggingface.co/docs/transformers/index), [🤗 peft](https://github.com/huggingface/peft), and [bitsandbytes](https://github.com/TimDettmers/bitsandbytes/tree/main) libraries.
|
13 |
+
|
14 |
+
We are thankful to the [Jarvislabs.ai](https://jarvislabs.ai/) who generously provided free GPU instances.
|
15 |
+
"""
|
16 |
+
|
17 |
+
DEFAULT_EXAMPLES = {
|
18 |
+
"Typical Questions": [
|
19 |
+
{
|
20 |
+
"title": "List all Canadian provinces in alphabetical order.",
|
21 |
+
"examples": [
|
22 |
+
["1", "List all Canadian provinces in alphabetical order."],
|
23 |
+
["2", "Which ones are on the east side?"],
|
24 |
+
["3", "What foods are famous in each province on the east side?"],
|
25 |
+
["4", "What about sightseeing? or landmarks? list one per province"],
|
26 |
+
],
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"title": "Tell me about Alpacas.",
|
30 |
+
"examples": [
|
31 |
+
["1", "Tell me about alpacas in two sentences"],
|
32 |
+
["2", "What other animals are living in the same area?"],
|
33 |
+
["3", "Are they the same species?"],
|
34 |
+
["4", "Write a Python program to return those species"],
|
35 |
+
],
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"title": "Tell me about the king of France in 2019.",
|
39 |
+
"examples": [
|
40 |
+
["1", "Tell me about the king of France in 2019."],
|
41 |
+
["2", "What about before him?"],
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"title": "Write a Python program that prints the first 10 Fibonacci numbers.",
|
46 |
+
"examples": [
|
47 |
+
["1", "Write a Python program that prints the first 10 Fibonacci numbers."],
|
48 |
+
["2", "Could you explain how the code works?"],
|
49 |
+
["3", "What is recursion?"],
|
50 |
+
]
|
51 |
+
}
|
52 |
+
],
|
53 |
+
"Identity": [
|
54 |
+
{
|
55 |
+
"title": "Conversation with the planet Pluto",
|
56 |
+
"examples": [
|
57 |
+
["1", "Conversation with the planet Pluto", "I'am so curious about you"],
|
58 |
+
["2", "Conversation with the planet Pluto", "Tell me what I would see if I visited"],
|
59 |
+
["3", "Conversation with the planet Pluto", "It sounds beautiful"],
|
60 |
+
["4", "Conversation with the planet Pluto", "I'll keep that in mind. Hey I was wondering have you ever had any visitor?"],
|
61 |
+
["5", "Conversation with the planet Pluto", "That must have been exciting"],
|
62 |
+
["6", "Conversation with the planet Pluto", "That's so great. What else do you wish people knew about you?"],
|
63 |
+
["7", "Conversation with the planet Pluto", "Thanks for talking with me"],
|
64 |
+
]
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"title": "Conversation with a paper airplane",
|
68 |
+
"examples": [
|
69 |
+
["1", "Conversation with a paper airplane", "What's it like being thrown through the air"],
|
70 |
+
["2", "Conversation with a paper airplane", "What's the worst place you've ever landed"],
|
71 |
+
["3", "Conversation with a paper airplane", "Have you ever stucked?"],
|
72 |
+
["4", "Conversation with a paper airplane", "What's the secret to a really good paper airplane?"],
|
73 |
+
["5", "Conversation with a paper airplane", "What's the farthest you've ever flown?"],
|
74 |
+
["6", "Conversation with a paper airplane", "Good to talk to you!"]
|
75 |
+
]
|
76 |
+
}
|
77 |
+
]
|
78 |
+
}
|
79 |
+
|
80 |
+
SPECIAL_STRS = {
|
81 |
+
"continue": "continue.",
|
82 |
+
"summarize": "what have we discussed so far? describe in the user's view and include important entities. also be brief as much as possible."
|
83 |
+
}
|
miscs/styles.py
ADDED
@@ -0,0 +1,727 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PARENT_BLOCK_CSS = """
|
2 |
+
#col-container {
|
3 |
+
width: 95%;
|
4 |
+
height: 100%;
|
5 |
+
margin-left: auto;
|
6 |
+
margin-right: auto;
|
7 |
+
}
|
8 |
+
|
9 |
+
#chatbot {
|
10 |
+
height: 800px;
|
11 |
+
overflow: auto;
|
12 |
+
}
|
13 |
+
|
14 |
+
#chatbot > .wrap {
|
15 |
+
max-height: 780px;
|
16 |
+
}
|
17 |
+
"""
|
18 |
+
|
19 |
+
MODEL_SELECTION_CSS = """
|
20 |
+
|
21 |
+
.message {
|
22 |
+
margin: 0px !important;
|
23 |
+
}
|
24 |
+
|
25 |
+
.load-mode-selector:nth-child(3) {
|
26 |
+
margin: auto !important;
|
27 |
+
text-align: center !important;
|
28 |
+
width: fit-content !important;
|
29 |
+
}
|
30 |
+
|
31 |
+
code {
|
32 |
+
white-space: break-spaces !important;
|
33 |
+
}
|
34 |
+
|
35 |
+
.progress-view {
|
36 |
+
background: transparent !important;
|
37 |
+
border-radius: 25px !important;
|
38 |
+
}
|
39 |
+
|
40 |
+
#landing-container {
|
41 |
+
width: 85%;
|
42 |
+
margin: auto;
|
43 |
+
}
|
44 |
+
|
45 |
+
.landing-btn {
|
46 |
+
font-size: 2.3vw !important;
|
47 |
+
margin-top: 25px !important;
|
48 |
+
border-radius: 25px !important;
|
49 |
+
height: 120px !important;
|
50 |
+
|
51 |
+
@media screen and (max-width: 1000px) {
|
52 |
+
font-size: 20px !important;
|
53 |
+
}
|
54 |
+
}
|
55 |
+
|
56 |
+
#landing-bottom {
|
57 |
+
margin-top: 20px !important;
|
58 |
+
}
|
59 |
+
|
60 |
+
.custom-btn {
|
61 |
+
border: none !important;
|
62 |
+
background: none !important;
|
63 |
+
box-shadow: none !important;
|
64 |
+
display: block !important;
|
65 |
+
text-align: left !important;
|
66 |
+
}
|
67 |
+
.custom-btn:hover {
|
68 |
+
background: rgb(243 244 246) !important;
|
69 |
+
}
|
70 |
+
|
71 |
+
.custom-btn-highlight {
|
72 |
+
border: none !important;
|
73 |
+
background: rgb(243 244 246) !important;
|
74 |
+
box-shadow: none !important;
|
75 |
+
display: block !important;
|
76 |
+
text-align: left !important;
|
77 |
+
|
78 |
+
@media (prefers-color-scheme: dark) {
|
79 |
+
background-color: rgba(17,24,39,255) !important;
|
80 |
+
}
|
81 |
+
}
|
82 |
+
|
83 |
+
#prompt-txt > label > span {
|
84 |
+
display: none !important;
|
85 |
+
}
|
86 |
+
#prompt-txt > label > textarea {
|
87 |
+
border: transparent;
|
88 |
+
border-radius: 20px;
|
89 |
+
}
|
90 |
+
#chatbot {
|
91 |
+
height: 800px;
|
92 |
+
overflow: auto;
|
93 |
+
box-shadow: none !important;
|
94 |
+
border: none !important;
|
95 |
+
}
|
96 |
+
#chatbot > .wrap {
|
97 |
+
max-height: 780px;
|
98 |
+
}
|
99 |
+
#chatbot + div {
|
100 |
+
border-radius: 35px !important;
|
101 |
+
width: 80% !important;
|
102 |
+
margin: auto !important;
|
103 |
+
}
|
104 |
+
|
105 |
+
#left-pane {
|
106 |
+
background-color: #f9fafb;
|
107 |
+
border-radius: 15px;
|
108 |
+
padding: 10px;
|
109 |
+
|
110 |
+
@media (prefers-color-scheme: dark) {
|
111 |
+
background-color: rgba(31,41,55,255) !important;
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
#left-top {
|
116 |
+
padding-left: 10px;
|
117 |
+
padding-right: 10px;
|
118 |
+
text-align: center;
|
119 |
+
font-weight: bold;
|
120 |
+
font-size: large;
|
121 |
+
}
|
122 |
+
|
123 |
+
#chat-history-accordion {
|
124 |
+
background: transparent;
|
125 |
+
border: 0.8px !important;
|
126 |
+
}
|
127 |
+
|
128 |
+
#right-pane {
|
129 |
+
margin-left: 20px;
|
130 |
+
background: white;
|
131 |
+
border-radius: 20px;
|
132 |
+
|
133 |
+
@media (prefers-color-scheme: dark) {
|
134 |
+
background-color: rgba(31,41,55,255) !important;
|
135 |
+
}
|
136 |
+
|
137 |
+
@media screen and (max-width: 1000px) {
|
138 |
+
margin: 0px !important;
|
139 |
+
}
|
140 |
+
}
|
141 |
+
|
142 |
+
#initial-popup {
|
143 |
+
z-index: 100;
|
144 |
+
position: absolute;
|
145 |
+
width: 50%;
|
146 |
+
top: 50%;
|
147 |
+
height: 50%;
|
148 |
+
left: 50%;
|
149 |
+
transform: translate(-50%, -50%);
|
150 |
+
border-radius: 35px;
|
151 |
+
padding: 15px;
|
152 |
+
}
|
153 |
+
|
154 |
+
#initial-popup-title {
|
155 |
+
text-align: center;
|
156 |
+
font-size: 18px;
|
157 |
+
font-weight: bold;
|
158 |
+
}
|
159 |
+
|
160 |
+
#initial-popup-left-pane {
|
161 |
+
min-width: 150px !important;
|
162 |
+
}
|
163 |
+
|
164 |
+
#initial-popup-right-pane {
|
165 |
+
text-align: right;
|
166 |
+
}
|
167 |
+
|
168 |
+
.example-btn {
|
169 |
+
padding-top: 20px !important;
|
170 |
+
padding-bottom: 20px !important;
|
171 |
+
padding-left: 5px !important;
|
172 |
+
padding-right: 5px !important;
|
173 |
+
background: linear-gradient(to bottom right, #f7faff, #ffffff) !important;
|
174 |
+
box-shadow: none !important;
|
175 |
+
border-radius: 20px !important;
|
176 |
+
|
177 |
+
@media (prefers-color-scheme: dark) {
|
178 |
+
background: rgba(70,79,86,255) !important;
|
179 |
+
}
|
180 |
+
}
|
181 |
+
|
182 |
+
.example-btn:hover {
|
183 |
+
box-shadow: 0.3px 0.3px 0.3px gray !important;
|
184 |
+
|
185 |
+
@media (prefers-color-scheme: dark) {
|
186 |
+
background: rgba(34,37,42,255) !important;
|
187 |
+
}
|
188 |
+
}
|
189 |
+
|
190 |
+
.example-btn:active {
|
191 |
+
@media (prefers-color-scheme: dark) {
|
192 |
+
background: rgba(70,79,86,255) !important;
|
193 |
+
}
|
194 |
+
}
|
195 |
+
|
196 |
+
#example-title {
|
197 |
+
margin-bottom: 15px;
|
198 |
+
}
|
199 |
+
|
200 |
+
#aux-btns-popup {
|
201 |
+
z-index: 200;
|
202 |
+
position: absolute !important;
|
203 |
+
bottom: 75px !important;
|
204 |
+
right: 40px !important;
|
205 |
+
}
|
206 |
+
|
207 |
+
#aux-btns-popup > div {
|
208 |
+
flex-wrap: nowrap;
|
209 |
+
width: fit-content;
|
210 |
+
margin: auto;
|
211 |
+
}
|
212 |
+
|
213 |
+
.aux-btn {
|
214 |
+
height: 30px !important;
|
215 |
+
flex-wrap: initial !important;
|
216 |
+
flex: none !important;
|
217 |
+
min-width: min(100px,100%) !important;
|
218 |
+
font-weight: unset !important;
|
219 |
+
font-size: 10pt !important;
|
220 |
+
|
221 |
+
background: linear-gradient(to bottom right, #f7faff, #ffffff) !important;
|
222 |
+
box-shadow: none !important;
|
223 |
+
border-radius: 20px !important;
|
224 |
+
|
225 |
+
opacity: 0.5;
|
226 |
+
border-width: 0.5px;
|
227 |
+
border-color: grey;
|
228 |
+
|
229 |
+
@media (prefers-color-scheme: dark) {
|
230 |
+
opacity: 0.2 !important;
|
231 |
+
color: black !important;
|
232 |
+
}
|
233 |
+
}
|
234 |
+
|
235 |
+
.aux-btn:hover {
|
236 |
+
opacity: 1.0;
|
237 |
+
box-shadow: 0.3px 0.3px 0.3px gray !important;
|
238 |
+
|
239 |
+
@media (prefers-color-scheme: dark) {
|
240 |
+
opacity: 1.0 !important;
|
241 |
+
box-shadow: 0.3px 0.3px 0.3px gray !important;
|
242 |
+
}
|
243 |
+
}
|
244 |
+
|
245 |
+
#aux-viewer {
|
246 |
+
position: absolute !important;
|
247 |
+
border-style: solid !important;
|
248 |
+
overflow: visible !important;
|
249 |
+
border: none !important;
|
250 |
+
box-shadow: none !important;
|
251 |
+
z-index: 1000 !important;
|
252 |
+
opacity: 0.0 !important;
|
253 |
+
width: 75% !important;
|
254 |
+
right: 1px !important;
|
255 |
+
transition: all 0.5s;
|
256 |
+
}
|
257 |
+
|
258 |
+
#aux-viewer:hover {
|
259 |
+
opacity: 1.0 !important;
|
260 |
+
box-shadow: 0px 0.5px 0px 0px gray !important;
|
261 |
+
}
|
262 |
+
|
263 |
+
#aux-viewer > .label-wrap {
|
264 |
+
justify-content: end;
|
265 |
+
}
|
266 |
+
|
267 |
+
#aux-viewer > .label-wrap > span {
|
268 |
+
margin-right: 10px;
|
269 |
+
}
|
270 |
+
|
271 |
+
#aux-viewer-inspector {
|
272 |
+
padding: 0px;
|
273 |
+
}
|
274 |
+
|
275 |
+
#aux-viewer-inspector > label > span {
|
276 |
+
display: none !important;
|
277 |
+
}
|
278 |
+
|
279 |
+
#aux-viewer-inspector > label > textarea {
|
280 |
+
box-shadow: none;
|
281 |
+
border-color: transparent;
|
282 |
+
}
|
283 |
+
|
284 |
+
#global-context > label > span {
|
285 |
+
display: none !important;
|
286 |
+
}
|
287 |
+
|
288 |
+
#chat-back-btn {
|
289 |
+
background: transparent !important;
|
290 |
+
}
|
291 |
+
|
292 |
+
#chat-back-btn:hover {
|
293 |
+
@media (prefers-color-scheme: dark) {
|
294 |
+
background: rgb(75,85,99) !important;
|
295 |
+
}
|
296 |
+
}
|
297 |
+
|
298 |
+
#chat-back-btn:active {
|
299 |
+
@media (prefers-color-scheme: dark) {
|
300 |
+
background: transparent !important;
|
301 |
+
}
|
302 |
+
}
|
303 |
+
|
304 |
+
#col-container {
|
305 |
+
max-width: 70%;
|
306 |
+
height: 100%;
|
307 |
+
margin-left: auto;
|
308 |
+
margin-right: auto;
|
309 |
+
}
|
310 |
+
|
311 |
+
|
312 |
+
#container {
|
313 |
+
max-width: 70%;
|
314 |
+
margin: auto;
|
315 |
+
|
316 |
+
@media screen and (max-width: 1000px) {
|
317 |
+
max-width: 90% !important;
|
318 |
+
}
|
319 |
+
}
|
320 |
+
|
321 |
+
#container2 {
|
322 |
+
max-width: 60%;
|
323 |
+
margin: auto;
|
324 |
+
}
|
325 |
+
|
326 |
+
#container3 {
|
327 |
+
max-width: 60%;
|
328 |
+
margin: auto;
|
329 |
+
}
|
330 |
+
|
331 |
+
.square {
|
332 |
+
height: 100px;
|
333 |
+
|
334 |
+
@media (prefers-color-scheme: dark) {
|
335 |
+
background-color: rgba(70,79,86,255) !important;
|
336 |
+
}
|
337 |
+
}
|
338 |
+
|
339 |
+
.square:hover {
|
340 |
+
@media (prefers-color-scheme: dark) {
|
341 |
+
background-color: rgba(34,37,42,255) !important;
|
342 |
+
}
|
343 |
+
}
|
344 |
+
|
345 |
+
.square:active {
|
346 |
+
@media (prefers-color-scheme: dark) {
|
347 |
+
background-color: rgba(70,79,86,255) !important;
|
348 |
+
}
|
349 |
+
}
|
350 |
+
|
351 |
+
.placeholders {
|
352 |
+
min-width: max-content !important;
|
353 |
+
}
|
354 |
+
|
355 |
+
.placeholders > button {
|
356 |
+
border-color: transparent !important;
|
357 |
+
background-color: transparent !important;
|
358 |
+
box-shadow: none !important;
|
359 |
+
cursor: default !important;
|
360 |
+
}
|
361 |
+
|
362 |
+
.center {
|
363 |
+
text-align: center;
|
364 |
+
overflow: hidden;
|
365 |
+
}
|
366 |
+
|
367 |
+
#30b-placeholder1, #30b-placeholder2, #30b-placeholder3, #30b-placeholder4 {
|
368 |
+
background: red;
|
369 |
+
box-shadow: none;
|
370 |
+
pointer-events: none;
|
371 |
+
width: 100px;
|
372 |
+
height: 100px;
|
373 |
+
background: transparent !important;
|
374 |
+
border-color: transparent !important;
|
375 |
+
box-shadow: none !important;
|
376 |
+
cursor: default !important;
|
377 |
+
}
|
378 |
+
|
379 |
+
#20b-placeholder1, #20b-placeholder2, #20b-placeholder3, #20b-placeholder4 {
|
380 |
+
background: red;
|
381 |
+
box-shadow: none;
|
382 |
+
pointer-events: none;
|
383 |
+
width: 100px;
|
384 |
+
height: 100px;
|
385 |
+
margin: auto;
|
386 |
+
background: transparent !important;
|
387 |
+
border-color: transparent !important;
|
388 |
+
box-shadow: none !important;
|
389 |
+
cursor: default !important;
|
390 |
+
}
|
391 |
+
|
392 |
+
#10b-placeholder1, #10b-placeholder3, #10b-placeholder3, #10b-placeholder4 {
|
393 |
+
background: red;
|
394 |
+
box-shadow: none;
|
395 |
+
pointer-events: none;
|
396 |
+
width: 100px;
|
397 |
+
height: 100px;
|
398 |
+
margin: auto;
|
399 |
+
background: transparent !important;
|
400 |
+
border-color: transparent !important;
|
401 |
+
box-shadow: none !important;
|
402 |
+
cursor: default !important;
|
403 |
+
}
|
404 |
+
|
405 |
+
#camel-5b, #camel-20b {
|
406 |
+
background: url(https://i.ibb.co/qD5HN9T/camel-removebg-preview.png);
|
407 |
+
background-repeat: no-repeat;
|
408 |
+
background-size: 100px 100px;
|
409 |
+
color: transparent;
|
410 |
+
width: 100px;
|
411 |
+
height: 100px;
|
412 |
+
margin: auto;
|
413 |
+
}
|
414 |
+
|
415 |
+
#alpaca-lora-7b, #alpaca-lora-13b {
|
416 |
+
background: url(https://i.ibb.co/z89FTz2/alpaca-lora.png);
|
417 |
+
background-repeat: no-repeat;
|
418 |
+
background-size: 100px 100px;
|
419 |
+
color: transparent;
|
420 |
+
width: 100px;
|
421 |
+
height: 100px;
|
422 |
+
margin: auto;
|
423 |
+
}
|
424 |
+
|
425 |
+
#stablelm-7b {
|
426 |
+
background: url(https://i.ibb.co/d2pd5wk/stable-LM-cropped.png);
|
427 |
+
background-repeat: no-repeat;
|
428 |
+
background-size: 100px 100px;
|
429 |
+
color: transparent;
|
430 |
+
width: 100px;
|
431 |
+
height: 100px;
|
432 |
+
margin: auto;
|
433 |
+
}
|
434 |
+
|
435 |
+
#stackllama-7b {
|
436 |
+
background: url(https://i.ibb.co/Q9vLcYm/tuxpi-com-1682256296-removebg-preview.png);
|
437 |
+
background-repeat: no-repeat;
|
438 |
+
background-size: 100px 100px;
|
439 |
+
color: transparent;
|
440 |
+
width: 100px;
|
441 |
+
height: 100px;
|
442 |
+
margin: auto;
|
443 |
+
}
|
444 |
+
|
445 |
+
#flan-3b, #flan-11b {
|
446 |
+
background: url(https://i.ibb.co/yBTk5bv/flan.png);
|
447 |
+
background-repeat: no-repeat;
|
448 |
+
background-size: 100px 100px;
|
449 |
+
color: transparent;
|
450 |
+
width: 100px;
|
451 |
+
height: 100px;
|
452 |
+
margin: auto;
|
453 |
+
}
|
454 |
+
|
455 |
+
#koalpaca {
|
456 |
+
background: url(https://i.ibb.co/hF9NL7r/koalpaca.png);
|
457 |
+
background-repeat: no-repeat;
|
458 |
+
background-size: 100px 100px;
|
459 |
+
color: transparent;
|
460 |
+
width: 100px;
|
461 |
+
height: 100px;
|
462 |
+
margin: auto;
|
463 |
+
}
|
464 |
+
|
465 |
+
#kullm {
|
466 |
+
background: url(https://i.ibb.co/6ZFqk4J/kullm.png);
|
467 |
+
background-repeat: no-repeat;
|
468 |
+
background-size: 100px 100px;
|
469 |
+
color: transparent;
|
470 |
+
width: 100px;
|
471 |
+
height: 100px;
|
472 |
+
margin: auto;
|
473 |
+
}
|
474 |
+
|
475 |
+
#flan-3b {
|
476 |
+
background: url(https://i.ibb.co/yBTk5bv/flan.png);
|
477 |
+
background-repeat: no-repeat;
|
478 |
+
background-size: 100px 100px;
|
479 |
+
color: transparent;
|
480 |
+
width: 100px;
|
481 |
+
height: 100px;
|
482 |
+
margin: auto;
|
483 |
+
}
|
484 |
+
|
485 |
+
#os-stablelm-7b {
|
486 |
+
background: url(https://i.ibb.co/WszrtVV/stablelm-oasst1.png);
|
487 |
+
background-repeat: no-repeat;
|
488 |
+
background-size: 100px 95px;
|
489 |
+
color: transparent;
|
490 |
+
width: 100px;
|
491 |
+
height: 100px;
|
492 |
+
margin: auto;
|
493 |
+
}
|
494 |
+
|
495 |
+
#t5-vicuna-3b {
|
496 |
+
background: url(https://i.ibb.co/4W7n78b/chansung-vector-logo-of-collective-intelligence-of-cute-llamas-3ef46884-72e6-44da-b88a-e831e5fee747.png);
|
497 |
+
background-repeat: no-repeat;
|
498 |
+
background-size: 100px 95px;
|
499 |
+
color: transparent;
|
500 |
+
width: 100px;
|
501 |
+
height: 100px;
|
502 |
+
margin: auto;
|
503 |
+
}
|
504 |
+
|
505 |
+
#gpt4-alpaca-7b, #gpt4-alpaca-13b {
|
506 |
+
background: url(https://i.ibb.co/qDz3HCG/chansung-vector-logo-of-alpaca-made-out-of-machines-Side-shot-39b27595-8202-48a6-97d1-266a745b2a29-r.png);
|
507 |
+
background-repeat: no-repeat;
|
508 |
+
background-size: 100px 95px;
|
509 |
+
color: transparent;
|
510 |
+
width: 100px;
|
511 |
+
height: 100px;
|
512 |
+
margin: auto;
|
513 |
+
}
|
514 |
+
|
515 |
+
#stable-vicuna-13b {
|
516 |
+
background: url(https://i.ibb.co/b6Vv6Jh/sv.png);
|
517 |
+
background-repeat: no-repeat;
|
518 |
+
background-size: 100px 95px;
|
519 |
+
color: transparent;
|
520 |
+
width: 100px;
|
521 |
+
height: 100px;
|
522 |
+
}
|
523 |
+
|
524 |
+
#starchat-15b, #starchat-beta-15b {
|
525 |
+
background: url(https://i.ibb.co/QjPP0Vv/starcoder.png);
|
526 |
+
background-repeat: no-repeat;
|
527 |
+
background-size: 100px 95px;
|
528 |
+
color: transparent;
|
529 |
+
width: 100px;
|
530 |
+
height: 100px;
|
531 |
+
margin: auto;
|
532 |
+
}
|
533 |
+
|
534 |
+
#redpajama-7b, #redpajama-instruct-7b {
|
535 |
+
background: url(https://i.ibb.co/NNB6qPj/redpajama.png);
|
536 |
+
background-repeat: no-repeat;
|
537 |
+
background-size: 100px 95px;
|
538 |
+
color: transparent;
|
539 |
+
width: 100px;
|
540 |
+
height: 100px;
|
541 |
+
margin: auto;
|
542 |
+
}
|
543 |
+
|
544 |
+
#mpt-7b {
|
545 |
+
background: url(https://i.ibb.co/DwN44Z9/mpt.png);
|
546 |
+
background-repeat: no-repeat;
|
547 |
+
background-size: 100px 95px;
|
548 |
+
color: transparent;
|
549 |
+
width: 100px;
|
550 |
+
height: 100px;
|
551 |
+
margin: auto;
|
552 |
+
}
|
553 |
+
|
554 |
+
#vicuna-7b, #vicuna-13b {
|
555 |
+
background: url(https://i.ibb.co/vqPDrPQ/vicuna.png);
|
556 |
+
background-repeat: no-repeat;
|
557 |
+
background-size: 100px 95px;
|
558 |
+
color: transparent;
|
559 |
+
width: 100px;
|
560 |
+
height: 100px;
|
561 |
+
margin: auto;
|
562 |
+
}
|
563 |
+
|
564 |
+
#llama-deus-7b {
|
565 |
+
background: url(https://i.ibb.co/4mH9LRQ/llama-deus.png);
|
566 |
+
background-repeat: no-repeat;
|
567 |
+
background-size: 100px 95px;
|
568 |
+
color: transparent;
|
569 |
+
width: 100px;
|
570 |
+
height: 100px;
|
571 |
+
margin: auto;
|
572 |
+
}
|
573 |
+
|
574 |
+
#evolinstruct-vicuna-7b, #evolinstruct-vicuna-13b {
|
575 |
+
background: url(https://i.ibb.co/xHDRjLS/evol-vicuna.png);
|
576 |
+
background-repeat: no-repeat;
|
577 |
+
background-size: 100px 95px;
|
578 |
+
color: transparent;
|
579 |
+
width: 100px;
|
580 |
+
height: 100px;
|
581 |
+
margin: auto;
|
582 |
+
}
|
583 |
+
|
584 |
+
#alpacoom-7b {
|
585 |
+
background: url(https://huggingface.co/mrm8488/Alpacoom/resolve/main/alpacoom_logo__1___1___1_-removebg-preview.png);
|
586 |
+
background-repeat: no-repeat;
|
587 |
+
background-size: 100px 95px;
|
588 |
+
color: transparent;
|
589 |
+
width: 100px;
|
590 |
+
height: 100px;
|
591 |
+
margin: auto;
|
592 |
+
}
|
593 |
+
|
594 |
+
#baize-7b, #baize-13b {
|
595 |
+
background: url(https://i.ibb.co/j5VpHb0/baize.png);
|
596 |
+
background-repeat: no-repeat;
|
597 |
+
background-size: 100px 95px;
|
598 |
+
color: transparent;
|
599 |
+
width: 100px;
|
600 |
+
height: 100px;
|
601 |
+
margin: auto;
|
602 |
+
}
|
603 |
+
|
604 |
+
#guanaco-7b, #guanaco-13b, #guanaco-33b, #guanaco-65b {
|
605 |
+
background: url(https://i.ibb.co/DWWsZn7/guanaco.png);
|
606 |
+
background-repeat: no-repeat;
|
607 |
+
background-size: 100px 95px;
|
608 |
+
color: transparent;
|
609 |
+
width: 100px;
|
610 |
+
height: 100px;
|
611 |
+
margin: auto;
|
612 |
+
}
|
613 |
+
|
614 |
+
#falcon-7b, #falcon-40b {
|
615 |
+
background: url(https://i.ibb.co/86yNWwG/falcon.png);
|
616 |
+
background-repeat: no-repeat;
|
617 |
+
background-size: 100px 95px;
|
618 |
+
color: transparent;
|
619 |
+
width: 100px;
|
620 |
+
height: 100px;
|
621 |
+
margin: auto;
|
622 |
+
}
|
623 |
+
|
624 |
+
#wizard-falcon-7b, #wizard-falcon-40b {
|
625 |
+
background: url(https://i.ibb.co/415s0D4/wizard-falcon.png);
|
626 |
+
background-repeat: no-repeat;
|
627 |
+
background-size: 100px 95px;
|
628 |
+
color: transparent;
|
629 |
+
width: 100px;
|
630 |
+
height: 100px;
|
631 |
+
margin: auto;
|
632 |
+
}
|
633 |
+
|
634 |
+
#nous-hermes-13b {
|
635 |
+
background: url(https://i.ibb.co/sm8VgtL/nous-hermes.png);
|
636 |
+
background-repeat: no-repeat;
|
637 |
+
background-size: 100px 95px;
|
638 |
+
color: transparent;
|
639 |
+
width: 100px;
|
640 |
+
height: 100px;
|
641 |
+
margin: auto;
|
642 |
+
}
|
643 |
+
|
644 |
+
#airoboros-7b, #airoboros-13b {
|
645 |
+
background: url(https://i.ibb.co/NLchBkB/airoboros.png);
|
646 |
+
background-repeat: no-repeat;
|
647 |
+
background-size: 100px 95px;
|
648 |
+
color: transparent;
|
649 |
+
width: 100px;
|
650 |
+
height: 100px;
|
651 |
+
margin: auto;
|
652 |
+
}
|
653 |
+
|
654 |
+
#samantha-7b, #samantha-13b, #samantha-33b {
|
655 |
+
background: url(https://i.ibb.co/72t5pyP/samantha.png);
|
656 |
+
background-repeat: no-repeat;
|
657 |
+
background-size: 100px 95px;
|
658 |
+
color: transparent;
|
659 |
+
width: 100px;
|
660 |
+
height: 100px;
|
661 |
+
margin: auto;
|
662 |
+
}
|
663 |
+
|
664 |
+
#lazarus-30b {
|
665 |
+
background: url(https://i.ibb.co/Zm2Bdzt/lazarus.png);
|
666 |
+
background-repeat: no-repeat;
|
667 |
+
background-size: 100px 95px;
|
668 |
+
color: transparent;
|
669 |
+
width: 100px;
|
670 |
+
height: 100px;
|
671 |
+
margin: auto;
|
672 |
+
}
|
673 |
+
|
674 |
+
#chronos-13b, #chronos-33b {
|
675 |
+
background: url(https://i.ibb.co/sQZ3L8j/chronos.png);
|
676 |
+
background-repeat: no-repeat;
|
677 |
+
background-size: 100px 95px;
|
678 |
+
color: transparent;
|
679 |
+
width: 100px;
|
680 |
+
height: 100px;
|
681 |
+
margin: auto;
|
682 |
+
}
|
683 |
+
|
684 |
+
#wizardlm-13b, #wizardlm-30b {
|
685 |
+
background: url(https://i.ibb.co/SRXWKz9/WizardLM.png);
|
686 |
+
background-repeat: no-repeat;
|
687 |
+
background-size: 100px 95px;
|
688 |
+
color: transparent;
|
689 |
+
width: 100px;
|
690 |
+
height: 100px;
|
691 |
+
margin: auto;
|
692 |
+
}
|
693 |
+
|
694 |
+
#replit-3b {
|
695 |
+
background: url(https://i.ibb.co/BrKCKYq/replit.png);
|
696 |
+
background-repeat: no-repeat;
|
697 |
+
background-size: 100px 95px;
|
698 |
+
color: transparent;
|
699 |
+
width: 100px;
|
700 |
+
height: 100px;
|
701 |
+
margin: auto;
|
702 |
+
}
|
703 |
+
|
704 |
+
#byom {
|
705 |
+
background: url(https://i.ibb.co/YhM4B2X/byom.png);
|
706 |
+
background-repeat: no-repeat;
|
707 |
+
background-size: 100px 95px;
|
708 |
+
color: transparent;
|
709 |
+
width: 100px;
|
710 |
+
height: 100px;
|
711 |
+
margin: auto;
|
712 |
+
}
|
713 |
+
|
714 |
+
#chosen-model {
|
715 |
+
background: url(https://i.ibb.co/dLmNh2v/chosen.png);
|
716 |
+
background-repeat: no-repeat;
|
717 |
+
background-size: 100px 95px;
|
718 |
+
color: transparent;
|
719 |
+
width: 100px;
|
720 |
+
height: 100px;
|
721 |
+
margin: auto;
|
722 |
+
}
|
723 |
+
|
724 |
+
.sub-container > div {
|
725 |
+
min-width: max-content !important;
|
726 |
+
}
|
727 |
+
"""
|
model_cards.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|