jZoNg commited on
Commit
7f9f96d
1 Parent(s): c64444f

Add application files

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +271 -0
  3. model.py +79 -0
  4. requirements.txt +9 -0
  5. style.css +16 -0
README.md CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
7
  sdk_version: 3.42.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 3.42.0
8
  app_file: app.py
9
  pinned: false
10
+ license: other
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Iterator
2
+
3
+ import gradio as gr
4
+ import torch
5
+
6
+ from model import get_input_token_length, run
7
+
8
+ # DEFAULT_SYSTEM_PROMPT = """\
9
+ # You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
10
+ # """
11
+
12
+ DEFAULT_SYSTEM_PROMPT = """
13
+ """
14
+ MAX_MAX_NEW_TOKENS = 2048
15
+ DEFAULT_MAX_NEW_TOKENS = 1024
16
+ MAX_INPUT_TOKEN_LENGTH = 4000
17
+
18
+ DESCRIPTION = """
19
+ # Baichuan2-13B-Chat
20
+ Baichuan 2 is the new generation of open-source large language models launched by Baichuan Intelligent Technology. It was trained on a high-quality corpus with 2.6 trillion tokens.
21
+ """
22
+
23
+ LICENSE = """
24
+ """
25
+
26
+ if not torch.cuda.is_available():
27
+ DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'
28
+
29
+
30
+ def clear_and_save_textbox(message: str) -> tuple[str, str]:
31
+ return '', message
32
+
33
+
34
+ def display_input(message: str,
35
+ history: list[tuple[str, str]]) -> list[tuple[str, str]]:
36
+ history.append((message, ''))
37
+ return history
38
+
39
+
40
+ def delete_prev_fn(
41
+ history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
42
+ try:
43
+ message, _ = history.pop()
44
+ except IndexError:
45
+ message = ''
46
+ return history, message or ''
47
+
48
+
49
+ def generate(
50
+ message: str,
51
+ history_with_input: list[tuple[str, str]],
52
+ system_prompt: str,
53
+ max_new_tokens: int,
54
+ temperature: float,
55
+ top_p: float,
56
+ top_k: int,
57
+ ) -> Iterator[list[tuple[str, str]]]:
58
+ if max_new_tokens > MAX_MAX_NEW_TOKENS:
59
+ raise ValueError
60
+
61
+ history = history_with_input[:-1]
62
+ generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
63
+ try:
64
+ first_response = next(generator)
65
+ yield history + [(message, first_response)]
66
+ except StopIteration:
67
+ yield history + [(message, '')]
68
+ for response in generator:
69
+ yield history + [(message, response)]
70
+
71
+
72
+ def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
73
+ generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 1, 0.95, 5)
74
+ for x in generator:
75
+ pass
76
+ return '', x
77
+
78
+
79
+ def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
80
+ input_token_length = get_input_token_length(message, chat_history, system_prompt)
81
+ if input_token_length > MAX_INPUT_TOKEN_LENGTH:
82
+ raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
83
+
84
+
85
+ with gr.Blocks(css='style.css') as demo:
86
+ gr.Markdown(DESCRIPTION)
87
+ gr.DuplicateButton(value='Duplicate Space for private use',
88
+ elem_id='duplicate-button')
89
+
90
+ with gr.Group():
91
+ chatbot = gr.Chatbot(label='Chatbot')
92
+ with gr.Row():
93
+ textbox = gr.Textbox(
94
+ container=False,
95
+ show_label=False,
96
+ placeholder='Type a message...',
97
+ scale=10,
98
+ )
99
+ submit_button = gr.Button('Submit',
100
+ variant='primary',
101
+ scale=1,
102
+ min_width=0)
103
+ with gr.Row():
104
+ retry_button = gr.Button('🔄 Retry', variant='secondary')
105
+ undo_button = gr.Button('↩️ Undo', variant='secondary')
106
+ clear_button = gr.Button('🗑️ Clear', variant='secondary')
107
+
108
+ saved_input = gr.State()
109
+
110
+ with gr.Accordion(label='Advanced options', open=False):
111
+ system_prompt = gr.Textbox(label='System prompt',
112
+ value=DEFAULT_SYSTEM_PROMPT,
113
+ lines=6)
114
+ max_new_tokens = gr.Slider(
115
+ label='Max new tokens',
116
+ minimum=1,
117
+ maximum=MAX_MAX_NEW_TOKENS,
118
+ step=1,
119
+ value=DEFAULT_MAX_NEW_TOKENS,
120
+ )
121
+ temperature = gr.Slider(
122
+ label='Temperature',
123
+ minimum=0.1,
124
+ maximum=4.0,
125
+ step=0.1,
126
+ value=1.0,
127
+ )
128
+ top_p = gr.Slider(
129
+ label='Top-p (nucleus sampling)',
130
+ minimum=0.05,
131
+ maximum=1.0,
132
+ step=0.05,
133
+ value=0.95,
134
+ )
135
+ top_k = gr.Slider(
136
+ label='Top-k',
137
+ minimum=1,
138
+ maximum=1000,
139
+ step=1,
140
+ value=50,
141
+ )
142
+
143
+ gr.Examples(
144
+ examples=[
145
+ 'Hello there! How are you doing?',
146
+ 'Can you explain briefly to me what is the Python programming language?',
147
+ 'Explain the plot of Cinderella in a sentence.',
148
+ 'How many hours does it take a man to eat a Helicopter?',
149
+ "Write a 100-word article on 'Benefits of Open-Source in AI research'",
150
+ ],
151
+ inputs=textbox,
152
+ outputs=[textbox, chatbot],
153
+ fn=process_example,
154
+ cache_examples=True,
155
+ )
156
+
157
+ gr.Markdown(LICENSE)
158
+
159
+ textbox.submit(
160
+ fn=clear_and_save_textbox,
161
+ inputs=textbox,
162
+ outputs=[textbox, saved_input],
163
+ api_name=False,
164
+ queue=False,
165
+ ).then(
166
+ fn=display_input,
167
+ inputs=[saved_input, chatbot],
168
+ outputs=chatbot,
169
+ api_name=False,
170
+ queue=False,
171
+ ).then(
172
+ fn=check_input_token_length,
173
+ inputs=[saved_input, chatbot, system_prompt],
174
+ api_name=False,
175
+ queue=False,
176
+ ).success(
177
+ fn=generate,
178
+ inputs=[
179
+ saved_input,
180
+ chatbot,
181
+ system_prompt,
182
+ max_new_tokens,
183
+ temperature,
184
+ top_p,
185
+ top_k,
186
+ ],
187
+ outputs=chatbot,
188
+ api_name=False,
189
+ )
190
+
191
+ button_event_preprocess = submit_button.click(
192
+ fn=clear_and_save_textbox,
193
+ inputs=textbox,
194
+ outputs=[textbox, saved_input],
195
+ api_name=False,
196
+ queue=False,
197
+ ).then(
198
+ fn=display_input,
199
+ inputs=[saved_input, chatbot],
200
+ outputs=chatbot,
201
+ api_name=False,
202
+ queue=False,
203
+ ).then(
204
+ fn=check_input_token_length,
205
+ inputs=[saved_input, chatbot, system_prompt],
206
+ api_name=False,
207
+ queue=False,
208
+ ).success(
209
+ fn=generate,
210
+ inputs=[
211
+ saved_input,
212
+ chatbot,
213
+ system_prompt,
214
+ max_new_tokens,
215
+ temperature,
216
+ top_p,
217
+ top_k,
218
+ ],
219
+ outputs=chatbot,
220
+ api_name=False,
221
+ )
222
+
223
+ retry_button.click(
224
+ fn=delete_prev_fn,
225
+ inputs=chatbot,
226
+ outputs=[chatbot, saved_input],
227
+ api_name=False,
228
+ queue=False,
229
+ ).then(
230
+ fn=display_input,
231
+ inputs=[saved_input, chatbot],
232
+ outputs=chatbot,
233
+ api_name=False,
234
+ queue=False,
235
+ ).then(
236
+ fn=generate,
237
+ inputs=[
238
+ saved_input,
239
+ chatbot,
240
+ system_prompt,
241
+ max_new_tokens,
242
+ temperature,
243
+ top_p,
244
+ top_k,
245
+ ],
246
+ outputs=chatbot,
247
+ api_name=False,
248
+ )
249
+
250
+ undo_button.click(
251
+ fn=delete_prev_fn,
252
+ inputs=chatbot,
253
+ outputs=[chatbot, saved_input],
254
+ api_name=False,
255
+ queue=False,
256
+ ).then(
257
+ fn=lambda x: x,
258
+ inputs=[saved_input],
259
+ outputs=textbox,
260
+ api_name=False,
261
+ queue=False,
262
+ )
263
+
264
+ clear_button.click(
265
+ fn=lambda: ([], ''),
266
+ outputs=[chatbot, saved_input],
267
+ queue=False,
268
+ api_name=False,
269
+ )
270
+
271
+ demo.queue(max_size=20).launch()
model.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from threading import Thread
2
+ from typing import Iterator
3
+
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from transformers.generation.utils import GenerationConfig
7
+
8
+ model_id = 'baichuan-inc/Baichuan2-13B-Chat'
9
+
10
+ if torch.cuda.is_available():
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_id,
13
+ device_map='auto',
14
+ torch_dtype=torch.bfloat16,
15
+ trust_remote_code=True
16
+ )
17
+ model.generation_config = GenerationConfig.from_pretrained(model_id)
18
+ else:
19
+ model = None
20
+ tokenizer = AutoTokenizer.from_pretrained(
21
+ model_id,
22
+ use_fast=False,
23
+ trust_remote_code=True
24
+ )
25
+
26
+ def get_prompt(message: str, chat_history: list[tuple[str, str]],
27
+ system_prompt: str) -> str:
28
+ texts = [f'<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
29
+ # The first user input is _not_ stripped
30
+ do_strip = False
31
+ for user_input, response in chat_history:
32
+ user_input = user_input.strip() if do_strip else user_input
33
+ do_strip = True
34
+ texts.append(f'{user_input} [/INST] {response.strip()} </s><s>[INST] ')
35
+ message = message.strip() if do_strip else message
36
+ texts.append(f'{message} [/INST]')
37
+ return ''.join(texts)
38
+
39
+
40
+ def get_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> int:
41
+ prompt = get_prompt(message, chat_history, system_prompt)
42
+ input_ids = tokenizer([prompt], return_tensors='np', add_special_tokens=False)['input_ids']
43
+ return input_ids.shape[-1]
44
+
45
+
46
+ def run(message: str,
47
+ chat_history: list[tuple[str, str]],
48
+ system_prompt: str,
49
+ max_new_tokens: int = 2048,
50
+ temperature: float = 0.3,
51
+ top_p: float = 0.85,
52
+ top_k: int = 5) -> Iterator[str]:
53
+ prompt = get_prompt(message, chat_history, system_prompt)
54
+ inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')
55
+
56
+ streamer = TextIteratorStreamer(
57
+ tokenizer,
58
+ timeout=10.,
59
+ skip_prompt=True,
60
+ skip_special_tokens=True
61
+ )
62
+
63
+ generate_kwargs = dict(
64
+ inputs,
65
+ streamer=streamer,
66
+ max_new_tokens=max_new_tokens,
67
+ do_sample=True,
68
+ top_p=top_p,
69
+ top_k=top_k,
70
+ temperature=temperature,
71
+ num_beams=1,
72
+ )
73
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
74
+ t.start()
75
+
76
+ outputs = []
77
+ for text in streamer:
78
+ outputs.append(text)
79
+ yield ''.join(outputs)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ bitsandbytes
3
+ gradio==3.42.0
4
+ sentencepiece
5
+ transformers_stream_generator
6
+ cpm_kernels
7
+ torch==2.0.1
8
+ transformers==4.31.0
9
+ protobuf==3.20.3
style.css ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+
5
+ #duplicate-button {
6
+ margin: auto;
7
+ color: white;
8
+ background: #1565c0;
9
+ border-radius: 100vh;
10
+ }
11
+
12
+ #component-0 {
13
+ max-width: 900px;
14
+ margin: auto;
15
+ padding-top: 1.5rem;
16
+ }