ffreemt commited on
Commit
b4008c3
1 Parent(s): 3334b25

Update dl "

Browse files
Files changed (2) hide show
  1. app.py +252 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Refer to https://github.com/abacaj/mpt-30B-inference/blob/main/download_model.py."""
2
+ # pylint: disable=invalid-name, missing-function-docstring, missing-class-docstring, redefined-outer-name, broad-except
3
+ import os
4
+ import time
5
+
6
+ import gradio as gr
7
+
8
+ # from mcli import predict
9
+ from huggingface_hub import hf_hub_download
10
+ from loguru import logger
11
+
12
+ URL = os.environ.get("URL")
13
+ _ = """
14
+ if URL is None:
15
+ raise ValueError("URL environment variable must be set")
16
+ if os.environ.get("MOSAICML_API_KEY") is None:
17
+ raise ValueError("git environment variable must be set")
18
+ # """
19
+
20
+ def predict(x, y, z):
21
+ ...
22
+
23
+
24
+ def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
25
+ local_path = os.path.abspath(destination_folder)
26
+ return hf_hub_download(
27
+ repo_id=repo_id,
28
+ filename=model_filename,
29
+ local_dir=local_path,
30
+ local_dir_use_symlinks=True,
31
+ )
32
+
33
+
34
+ class Chat:
35
+ default_system_prompt = "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers."
36
+ system_format = "<|im_start|>system\n{}<|im_end|>\n"
37
+
38
+ def __init__(
39
+ self, system: str = None, user: str = None, assistant: str = None
40
+ ) -> None:
41
+ if system is not None:
42
+ self.set_system_prompt(system)
43
+ else:
44
+ self.reset_system_prompt()
45
+ self.user = user if user else "<|im_start|>user\n{}<|im_end|>\n"
46
+ self.assistant = (
47
+ assistant if assistant else "<|im_start|>assistant\n{}<|im_end|>\n"
48
+ )
49
+ self.response_prefix = self.assistant.split("{}", maxsplit=1)[0]
50
+
51
+ def set_system_prompt(self, system_prompt):
52
+ # self.system = self.system_format.format(system_prompt)
53
+ return system_prompt
54
+
55
+ def reset_system_prompt(self):
56
+ return self.set_system_prompt(self.default_system_prompt)
57
+
58
+ def history_as_formatted_str(self, system, history) -> str:
59
+ system = self.system_format.format(system)
60
+ text = system + "".join(
61
+ [
62
+ "\n".join(
63
+ [
64
+ self.user.format(item[0]),
65
+ self.assistant.format(item[1]),
66
+ ]
67
+ )
68
+ for item in history[:-1]
69
+ ]
70
+ )
71
+ text += self.user.format(history[-1][0])
72
+ text += self.response_prefix
73
+ # stopgap solution to too long sequences
74
+ if len(text) > 4500:
75
+ # delete from the middle between <|im_start|> and <|im_end|>
76
+ # find the middle ones, then expand out
77
+ start = text.find("<|im_start|>", 139)
78
+ end = text.find("<|im_end|>", 139)
79
+ while end < len(text) and len(text) > 4500:
80
+ end = text.find("<|im_end|>", end + 1)
81
+ text = text[:start] + text[end + 1 :]
82
+ if len(text) > 4500:
83
+ # the nice way didn't work, just truncate
84
+ # deleting the beginning
85
+ text = text[-4500:]
86
+
87
+ return text
88
+
89
+ def clear_history(self, history):
90
+ return []
91
+
92
+ def turn(self, user_input: str):
93
+ self.user_turn(user_input)
94
+ return self.bot_turn()
95
+
96
+ def user_turn(self, user_input: str, history):
97
+ history.append([user_input, ""])
98
+ return user_input, history
99
+
100
+ def bot_turn(self, system, history):
101
+ conversation = self.history_as_formatted_str(system, history)
102
+ assistant_response = call_inf_server(conversation)
103
+ history[-1][-1] = assistant_response
104
+ print(system)
105
+ print(history)
106
+ return "", history
107
+
108
+
109
+ def call_inf_server(prompt):
110
+ try:
111
+ response = predict(
112
+ URL,
113
+ {"inputs": [prompt], "temperature": 0.2, "top_p": 0.9, "output_len": 512},
114
+ timeout=70,
115
+ )
116
+ # print(f'prompt: {prompt}')
117
+ # print(f'len(prompt): {len(prompt)}')
118
+ response = response["outputs"][0]
119
+ # print(f'len(response): {len(response)}')
120
+ # remove spl tokens from prompt
121
+ spl_tokens = ["<|im_start|>", "<|im_end|>"]
122
+ clean_prompt = prompt.replace(spl_tokens[0], "").replace(spl_tokens[1], "")
123
+ return response[len(clean_prompt) :] # remove the prompt
124
+ except Exception as e:
125
+ # assume it is our error
126
+ # just wait and try one more time
127
+ print(e)
128
+ time.sleep(1)
129
+ response = predict(
130
+ URL,
131
+ {"inputs": [prompt], "temperature": 0.2, "top_p": 0.9, "output_len": 512},
132
+ timeout=70,
133
+ )
134
+ # print(response)
135
+ response = response["outputs"][0]
136
+ return response[len(prompt) :] # remove the prompt
137
+
138
+
139
+ logger.info("start dl")
140
+ _ = """full url: https://huggingface.co/TheBloke/mpt-30B-chat-GGML/blob/main/mpt-30b-chat.ggmlv0.q4_1.bin"""
141
+
142
+ repo_id = "TheBloke/mpt-30B-chat-GGML"
143
+ model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
144
+ destination_folder = "models"
145
+ download_mpt_quant(destination_folder, repo_id, model_filename)
146
+
147
+ logger.info("done dl")
148
+
149
+ with gr.Blocks(
150
+ theme=gr.themes.Soft(),
151
+ css=".disclaimer {font-variant-caps: all-small-caps;}",
152
+ ) as demo:
153
+ gr.Markdown(
154
+ """<h1><center>MosaicML MPT-30B-Chat</center></h1>
155
+
156
+ This demo is of [MPT-30B-Chat](https://huggingface.co/mosaicml/mpt-30b-ch a t). It is based on [MPT-30B](https://huggingface.co/mosaicml/mpt-30b) fine-tuned on approximately 300,000 turns of high-quality conversations, and is powered by [MosaicML Inference](https://www.mosaicml.com/inference).
157
+
158
+ If you're interested in [training](https://www.mosaicml.com/training) and [deploying](https://www.mosaicml.com/inference) your own MPT or LLMs, [sign up](https://forms.mosaicml.com/demo?utm_source=huggingface&utm_medium=referral&utm_campaign=mpt-30b) for MosaicML platform.
159
+
160
+ """
161
+ )
162
+ conversation = Chat()
163
+ chatbot = gr.Chatbot().style(height=500)
164
+ with gr.Row():
165
+ with gr.Column():
166
+ msg = gr.Textbox(
167
+ label="Chat Message Box",
168
+ placeholder="Chat Message Box",
169
+ show_label=False,
170
+ ).style(container=False)
171
+ with gr.Column():
172
+ with gr.Row():
173
+ submit = gr.Button("Submit")
174
+ stop = gr.Button("Stop")
175
+ clear = gr.Button("Clear")
176
+ with gr.Row():
177
+ with gr.Accordion("Advanced Options:", open=False):
178
+ with gr.Row():
179
+ with gr.Column(scale=2):
180
+ system = gr.Textbox(
181
+ label="System Prompt",
182
+ value=Chat.default_system_prompt,
183
+ show_label=False,
184
+ ).style(container=False)
185
+ with gr.Column():
186
+ with gr.Row():
187
+ change = gr.Button("Change System Prompt")
188
+ reset = gr.Button("Reset System Prompt")
189
+ with gr.Row():
190
+ gr.Markdown(
191
+ "Disclaimer: MPT-30B can produce factually incorrect output, and should not be relied on to produce "
192
+ "factually accurate information. MPT-30B was trained on various public datasets; while great efforts "
193
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
194
+ "biased, or otherwise offensive outputs.",
195
+ elem_classes=["disclaimer"],
196
+ )
197
+ with gr.Row():
198
+ gr.Markdown(
199
+ "[Privacy policy](https://gist.github.com/samhavens/c29c68cdcd420a9aa0202d0839876dac)",
200
+ elem_classes=["disclaimer"],
201
+ )
202
+
203
+ submit_event = msg.submit(
204
+ fn=conversation.user_turn,
205
+ inputs=[msg, chatbot],
206
+ outputs=[msg, chatbot],
207
+ queue=False,
208
+ ).then(
209
+ fn=conversation.bot_turn,
210
+ inputs=[system, chatbot],
211
+ outputs=[msg, chatbot],
212
+ queue=True,
213
+ )
214
+ submit_click_event = submit.click(
215
+ fn=conversation.user_turn,
216
+ inputs=[msg, chatbot],
217
+ outputs=[msg, chatbot],
218
+ queue=False,
219
+ ).then(
220
+ fn=conversation.bot_turn,
221
+ inputs=[system, chatbot],
222
+ outputs=[msg, chatbot],
223
+ queue=True,
224
+ )
225
+ stop.click(
226
+ fn=None,
227
+ inputs=None,
228
+ outputs=None,
229
+ cancels=[submit_event, submit_click_event],
230
+ queue=False,
231
+ )
232
+ clear.click(lambda: None, None, chatbot, queue=False).then(
233
+ fn=conversation.clear_history,
234
+ inputs=[chatbot],
235
+ outputs=[chatbot],
236
+ queue=False,
237
+ )
238
+ change.click(
239
+ fn=conversation.set_system_prompt,
240
+ inputs=[system],
241
+ outputs=[system],
242
+ queue=False,
243
+ )
244
+ reset.click(
245
+ fn=conversation.reset_system_prompt,
246
+ inputs=[],
247
+ outputs=[system],
248
+ queue=False,
249
+ )
250
+
251
+
252
+ demo.queue(max_size=36, concurrency_count=14).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ctransformers==0.2.10
2
+ transformers==4.30.2
3
+ huggingface_hub
4
+ loguru