lianglv commited on
Commit
c9b27b0
1 Parent(s): 2dec85e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -72
app.py CHANGED
@@ -1,3 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import argparse
2
  from collections import defaultdict
3
  import datetime
@@ -6,26 +23,100 @@ import os
6
  import time
7
  import uuid
8
 
9
- os.system("pip install gradio==3.28.0")
10
 
11
  import gradio as gr
12
  import requests
13
 
14
- from fastchat.conversation import (
15
- Conversation,
16
- compute_skip_echo_len,
17
- SeparatorStyle,
 
18
  )
19
  from fastchat.constants import LOGDIR
20
  from fastchat.utils import (
21
  build_logger,
22
- server_error_msg,
23
  violates_moderation,
24
- moderation_msg,
25
  )
26
- from fastchat.serve.gradio_patch import Chatbot as grChatbot
27
- from fastchat.serve.gradio_css import code_highlight_css
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  logger = build_logger("gradio_web_server", "gradio_web_server.log")
31
 
@@ -38,17 +129,26 @@ disable_btn = gr.Button.update(interactive=False)
38
  controller_url = None
39
  enable_moderation = False
40
 
41
- conv_template_bf16 = Conversation(
42
- system="A chat between a curious human and an artificial intelligence assistant. "
43
- "The assistant gives helpful, detailed, and polite answers to the human's questions.",
44
- roles=("Human", "Assistant"),
45
- messages=(),
46
- offset=0,
47
- sep_style=SeparatorStyle.SINGLE,
48
- sep="\n",
49
- sep2="<|endoftext|>",
50
- )
51
 
 
 
 
 
 
 
 
 
 
52
  # conv_template_bf16 = Conversation(
53
  # system="",
54
  # roles=("", ""),
@@ -59,6 +159,22 @@ conv_template_bf16 = Conversation(
59
  # sep2="<|endoftext|>",
60
  # )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def set_global_vars(controller_url_, enable_moderation_):
63
  global controller_url, enable_moderation
64
  controller_url = controller_url_
@@ -72,9 +188,7 @@ def get_conv_log_filename():
72
 
73
 
74
  def get_model_list(controller_url):
75
- ret = requests.post(controller_url + "/refresh_all_workers")
76
- assert ret.status_code == 200
77
- ret = requests.post(controller_url + "/list_models")
78
  models = ret.json()["models"]
79
  logger.info(f"Models: {models}")
80
  return models
@@ -161,7 +275,7 @@ def add_text(state, text, request: gr.Request):
161
  logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
162
 
163
  if state is None:
164
- state = conv_template_bf16.copy()
165
 
166
  if len(text) <= 0:
167
  state.skip_next = True
@@ -175,7 +289,7 @@ def add_text(state, text, request: gr.Request):
175
  no_change_btn,
176
  ) * 5
177
 
178
- text = text[:1536] # Hard cut-off
179
  state.append_message(state.roles[0], text)
180
  state.append_message(state.roles[1], None)
181
  state.skip_next = False
@@ -208,47 +322,32 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request:
208
 
209
  if len(state.messages) == state.offset + 2:
210
  # First round of conversation
211
- new_state = conv_template_bf16.copy()
212
- new_state.conv_id = uuid.uuid4().hex
213
- new_state.model_name = state.model_name or model_selector
 
 
214
  new_state.append_message(new_state.roles[0], state.messages[-2][1])
215
  new_state.append_message(new_state.roles[1], None)
216
  state = new_state
217
 
218
- # Query worker address
219
- ret = requests.post(
220
- controller_url + "/get_worker_address", json={"model": model_name}
221
- )
222
- worker_addr = ret.json()["address"]
223
- logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")
224
-
225
- # No available worker
226
- if worker_addr == "":
227
- state.messages[-1][-1] = server_error_msg
228
- yield (
229
- state,
230
- state.to_gradio_chatbot(),
231
- disable_btn,
232
- disable_btn,
233
- disable_btn,
234
- enable_btn,
235
- enable_btn,
236
- )
237
- return
238
-
239
  # Construct prompt
240
  prompt = state.get_prompt()
 
241
  skip_echo_len = compute_skip_echo_len(model_name, state, prompt) - 1
242
 
243
  # Make requests
244
  pload = {
245
- "model": model_name,
246
  "prompt": prompt,
 
247
  "temperature": temperature,
 
 
 
248
  "max_new_tokens": max_new_tokens,
249
- "topk": topk,
250
- "stop": "<|endoftext|>"
251
  }
 
252
  logger.info(f"==== request ====\n{pload}")
253
 
254
  start_time = time.time()
@@ -259,17 +358,21 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request:
259
  try:
260
  # Stream output
261
  response = requests.post(
262
- controller_url + "/worker_generate_stream",
263
  headers=headers,
264
  json=pload,
265
  stream=True,
266
  timeout=20,
267
  )
 
268
  for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
269
  if chunk:
 
 
270
  data = json.loads(chunk.decode())
 
271
  if data["error_code"] == 0:
272
- output = data["text"][skip_echo_len:].strip()
273
  output = post_process_code(output)
274
  state.messages[-1][-1] = output + "▌"
275
  yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
@@ -302,7 +405,8 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request:
302
  # elapsed_time = "\n{}s".format(round(finish_tstamp, 4))
303
  # elapsed_time = "<p class='time-style'>{}s </p>".format(round(finish_tstamp, 4))
304
 
305
- state.messages[-1][-1] = state.messages[-1][-1][:-1] + elapsed_time
 
306
  yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
307
 
308
  logger.info(f"{output}")
@@ -393,6 +497,19 @@ gradio-app {
393
  background-size: contain;
394
  }
395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  #chatbot .wrap {
397
  margin-top: 30px !important;
398
  }
@@ -406,11 +523,11 @@ gradio-app {
406
 
407
  .user, .bot {
408
  width: 80% !important;
409
-
410
  }
411
 
412
  .bot {
413
- white-space: pre-wrap !important;
414
  line-height: 1.3 !important;
415
  display: flex;
416
  flex-direction: column;
@@ -426,7 +543,7 @@ gradio-app {
426
  #btn-list-style {
427
  background: #eee0;
428
  border: 1px solid #0053f4;
429
- }
430
 
431
  .title {
432
  font-size: 1.5rem;
@@ -467,9 +584,7 @@ footer {
467
 
468
  .img-logo-right-style {
469
  width: 3.5rem;
470
- float: right;
471
- margin-top: -1rem;
472
- margin-left: 1rem;
473
  }
474
 
475
  .neural-studio-img-style {
@@ -487,25 +602,31 @@ footer {
487
 
488
 
489
  def build_single_model_ui(models):
490
-
491
  notice_markdown = """
492
  <div class="title">
493
  <div style="
494
  color: #fff;
495
  ">Large Language Model <p style="
496
  font-size: 0.8rem;
497
- ">4th Gen Intel® Xeon® with Intel® AMX</p></div>
498
-
499
  </div>
500
  """
501
-
502
- learn_more_markdown = """<div class="footer">
503
- <p>Powered by <a href="https://github.com/intel/intel-extension-for-transformers" style="text-decoration: underline;" target="_blank">Intel Extension for Transformers</a>
504
- <img src='https://i.postimg.cc/Pfv4vV6R/Microsoft-Teams-image-23.png' class='img-logo-right-style'/></p>
 
 
 
 
 
 
505
  </div>
506
  <div class="acknowledgments">
507
  <p></p></div>
508
-
509
  """
510
 
511
  state = gr.State()
@@ -519,7 +640,7 @@ def build_single_model_ui(models):
519
  show_label=False,
520
  ).style(container=False)
521
 
522
- chatbot = grChatbot(elem_id="chatbot", visible=False).style(height=550)
523
  with gr.Row(elem_id="text-box-style"):
524
  with gr.Column(scale=20):
525
  textbox = gr.Textbox(
@@ -534,7 +655,7 @@ def build_single_model_ui(models):
534
  temperature = gr.Slider(
535
  minimum=0.0,
536
  maximum=1.0,
537
- value=0.95,
538
  step=0.1,
539
  interactive=True,
540
  label="Temperature",
@@ -654,8 +775,9 @@ def build_demo(models):
654
 
655
  if __name__ == "__main__":
656
 
657
- controller_url = "http://3.223.220.249:80"
658
  host = "0.0.0.0"
 
659
  concurrency_count = 10
660
  model_list_mode = "once"
661
  share = False
@@ -668,5 +790,5 @@ if __name__ == "__main__":
668
  demo.queue(
669
  concurrency_count=concurrency_count, status_update_rate=10, api_open=False
670
  ).launch(
671
- server_name=host, share=share, max_threads=200
672
  )
 
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # Copyright (c) 2023 Intel Corporation
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
  import argparse
19
  from collections import defaultdict
20
  import datetime
 
23
  import time
24
  import uuid
25
 
26
+ os.system("pip install gradio==3.34.0")
27
 
28
  import gradio as gr
29
  import requests
30
 
31
+ import sys
32
+ sys.path.insert(0, './')
33
+ from conversation import (
34
+ get_conv_template,
35
+ compute_skip_echo_len
36
  )
37
  from fastchat.constants import LOGDIR
38
  from fastchat.utils import (
39
  build_logger,
 
40
  violates_moderation,
 
41
  )
 
 
42
 
43
+ code_highlight_css = """
44
+ #chatbot .hll { background-color: #ffffcc }
45
+ #chatbot .c { color: #408080; font-style: italic }
46
+ #chatbot .err { border: 1px solid #FF0000 }
47
+ #chatbot .k { color: #008000; font-weight: bold }
48
+ #chatbot .o { color: #666666 }
49
+ #chatbot .ch { color: #408080; font-style: italic }
50
+ #chatbot .cm { color: #408080; font-style: italic }
51
+ #chatbot .cp { color: #BC7A00 }
52
+ #chatbot .cpf { color: #408080; font-style: italic }
53
+ #chatbot .c1 { color: #408080; font-style: italic }
54
+ #chatbot .cs { color: #408080; font-style: italic }
55
+ #chatbot .gd { color: #A00000 }
56
+ #chatbot .ge { font-style: italic }
57
+ #chatbot .gr { color: #FF0000 }
58
+ #chatbot .gh { color: #000080; font-weight: bold }
59
+ #chatbot .gi { color: #00A000 }
60
+ #chatbot .go { color: #888888 }
61
+ #chatbot .gp { color: #000080; font-weight: bold }
62
+ #chatbot .gs { font-weight: bold }
63
+ #chatbot .gu { color: #800080; font-weight: bold }
64
+ #chatbot .gt { color: #0044DD }
65
+ #chatbot .kc { color: #008000; font-weight: bold }
66
+ #chatbot .kd { color: #008000; font-weight: bold }
67
+ #chatbot .kn { color: #008000; font-weight: bold }
68
+ #chatbot .kp { color: #008000 }
69
+ #chatbot .kr { color: #008000; font-weight: bold }
70
+ #chatbot .kt { color: #B00040 }
71
+ #chatbot .m { color: #666666 }
72
+ #chatbot .s { color: #BA2121 }
73
+ #chatbot .na { color: #7D9029 }
74
+ #chatbot .nb { color: #008000 }
75
+ #chatbot .nc { color: #0000FF; font-weight: bold }
76
+ #chatbot .no { color: #880000 }
77
+ #chatbot .nd { color: #AA22FF }
78
+ #chatbot .ni { color: #999999; font-weight: bold }
79
+ #chatbot .ne { color: #D2413A; font-weight: bold }
80
+ #chatbot .nf { color: #0000FF }
81
+ #chatbot .nl { color: #A0A000 }
82
+ #chatbot .nn { color: #0000FF; font-weight: bold }
83
+ #chatbot .nt { color: #008000; font-weight: bold }
84
+ #chatbot .nv { color: #19177C }
85
+ #chatbot .ow { color: #AA22FF; font-weight: bold }
86
+ #chatbot .w { color: #bbbbbb }
87
+ #chatbot .mb { color: #666666 }
88
+ #chatbot .mf { color: #666666 }
89
+ #chatbot .mh { color: #666666 }
90
+ #chatbot .mi { color: #666666 }
91
+ #chatbot .mo { color: #666666 }
92
+ #chatbot .sa { color: #BA2121 }
93
+ #chatbot .sb { color: #BA2121 }
94
+ #chatbot .sc { color: #BA2121 }
95
+ #chatbot .dl { color: #BA2121 }
96
+ #chatbot .sd { color: #BA2121; font-style: italic }
97
+ #chatbot .s2 { color: #BA2121 }
98
+ #chatbot .se { color: #BB6622; font-weight: bold }
99
+ #chatbot .sh { color: #BA2121 }
100
+ #chatbot .si { color: #BB6688; font-weight: bold }
101
+ #chatbot .sx { color: #008000 }
102
+ #chatbot .sr { color: #BB6688 }
103
+ #chatbot .s1 { color: #BA2121 }
104
+ #chatbot .ss { color: #19177C }
105
+ #chatbot .bp { color: #008000 }
106
+ #chatbot .fm { color: #0000FF }
107
+ #chatbot .vc { color: #19177C }
108
+ #chatbot .vg { color: #19177C }
109
+ #chatbot .vi { color: #19177C }
110
+ #chatbot .vm { color: #19177C }
111
+ #chatbot .il { color: #666666 }
112
+ """
113
+
114
+ server_error_msg = (
115
+ "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
116
+ )
117
+ moderation_msg = (
118
+ "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
119
+ )
120
 
121
  logger = build_logger("gradio_web_server", "gradio_web_server.log")
122
 
 
129
  controller_url = None
130
  enable_moderation = False
131
 
132
+ # conv_template_bf16 = Conversation(
133
+ # system="A chat between a curious human and an artificial intelligence assistant. "
134
+ # "The assistant gives helpful, detailed, and polite answers to the human's questions.",
135
+ # roles=("Human", "Assistant"),
136
+ # messages=(),
137
+ # offset=0,
138
+ # sep_style=SeparatorStyle.SINGLE,
139
+ # sep="\n",
140
+ # sep2="<|endoftext|>",
141
+ # )
142
 
143
+ # conv_template_bf16 = Conversation(
144
+ # system="",
145
+ # roles=("### Human", "### Assistant"),
146
+ # messages=(),
147
+ # offset=0,
148
+ # sep_style=SeparatorStyle.SINGLE,
149
+ # sep="\n",
150
+ # sep2="</s>",
151
+ # )
152
  # conv_template_bf16 = Conversation(
153
  # system="",
154
  # roles=("", ""),
 
159
  # sep2="<|endoftext|>",
160
  # )
161
 
162
+ # start_message = """<|im_start|>system
163
+ # - You are a helpful assistant chatbot trained by Intel.
164
+ # - You answer questions.
165
+ # - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
166
+ # - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"""
167
+
168
+ # conv_template_bf16 = Conversation(
169
+ # system=start_message,
170
+ # roles=("<|im_start|>user", "<|im_start|>assistant"),
171
+ # messages=(),
172
+ # offset=0,
173
+ # sep_style=SeparatorStyle.TWO,
174
+ # sep="\n",
175
+ # sep2="<|im_end|>",
176
+ # )
177
+
178
  def set_global_vars(controller_url_, enable_moderation_):
179
  global controller_url, enable_moderation
180
  controller_url = controller_url_
 
188
 
189
 
190
  def get_model_list(controller_url):
191
+ ret = requests.post(controller_url + "/v1/models")
 
 
192
  models = ret.json()["models"]
193
  logger.info(f"Models: {models}")
194
  return models
 
275
  logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
276
 
277
  if state is None:
278
+ state = get_conv_template("neural-chat-7b-v2")
279
 
280
  if len(text) <= 0:
281
  state.skip_next = True
 
289
  no_change_btn,
290
  ) * 5
291
 
292
+ text = text[:2560] # Hard cut-off
293
  state.append_message(state.roles[0], text)
294
  state.append_message(state.roles[1], None)
295
  state.skip_next = False
 
322
 
323
  if len(state.messages) == state.offset + 2:
324
  # First round of conversation
325
+ if "Llama-2-7b-chat-hf" in model_name:
326
+ model_name = "llama-2"
327
+ new_state = get_conv_template(model_name.split('/')[-1])
328
+ #new_state.conv_id = uuid.uuid4().hex
329
+ #new_state.model_name = state.model_name or model_selector
330
  new_state.append_message(new_state.roles[0], state.messages[-2][1])
331
  new_state.append_message(new_state.roles[1], None)
332
  state = new_state
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  # Construct prompt
335
  prompt = state.get_prompt()
336
+ # print("prompt==============", prompt)
337
  skip_echo_len = compute_skip_echo_len(model_name, state, prompt) - 1
338
 
339
  # Make requests
340
  pload = {
 
341
  "prompt": prompt,
342
+ "device": "cpu",
343
  "temperature": temperature,
344
+ "top_p": 0.95,
345
+ "top_k": topk,
346
+ "repetition_penalty": 1.0,
347
  "max_new_tokens": max_new_tokens,
348
+ "stream": True,
 
349
  }
350
+
351
  logger.info(f"==== request ====\n{pload}")
352
 
353
  start_time = time.time()
 
358
  try:
359
  # Stream output
360
  response = requests.post(
361
+ controller_url + "/v1/chat/completions",
362
  headers=headers,
363
  json=pload,
364
  stream=True,
365
  timeout=20,
366
  )
367
+ output = ""
368
  for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
369
  if chunk:
370
+ if chunk.strip() == b'data: [DONE]':
371
+ break
372
  data = json.loads(chunk.decode())
373
+ # print("data======", data, skip_echo_len)
374
  if data["error_code"] == 0:
375
+ output += data["text"].strip() + " "
376
  output = post_process_code(output)
377
  state.messages[-1][-1] = output + "▌"
378
  yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
 
405
  # elapsed_time = "\n{}s".format(round(finish_tstamp, 4))
406
  # elapsed_time = "<p class='time-style'>{}s </p>".format(round(finish_tstamp, 4))
407
 
408
+ # state.messages[-1][-1] = state.messages[-1][-1][:-1] + elapsed_time
409
+ state.messages[-1][-1] = state.messages[-1][-1][:-1]
410
  yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
411
 
412
  logger.info(f"{output}")
 
497
  background-size: contain;
498
  }
499
 
500
+ #chatbot::after {
501
+ content: "";
502
+ position: absolute;
503
+ top: 0;
504
+ right: 60px;
505
+ width: 60px;
506
+ height: 60px;
507
+ background-image: url(https://i.postimg.cc/QCBQ45b4/Microsoft-Teams-image-44.png);
508
+ background-repeat: no-repeat;
509
+ background-position: center center;
510
+ background-size: contain;
511
+ }
512
+
513
  #chatbot .wrap {
514
  margin-top: 30px !important;
515
  }
 
523
 
524
  .user, .bot {
525
  width: 80% !important;
526
+
527
  }
528
 
529
  .bot {
530
+ white-space: pre-wrap !important;
531
  line-height: 1.3 !important;
532
  display: flex;
533
  flex-direction: column;
 
543
  #btn-list-style {
544
  background: #eee0;
545
  border: 1px solid #0053f4;
546
+ }
547
 
548
  .title {
549
  font-size: 1.5rem;
 
584
 
585
  .img-logo-right-style {
586
  width: 3.5rem;
587
+ display: inline-block !important;
 
 
588
  }
589
 
590
  .neural-studio-img-style {
 
602
 
603
 
604
  def build_single_model_ui(models):
605
+
606
  notice_markdown = """
607
  <div class="title">
608
  <div style="
609
  color: #fff;
610
  ">Large Language Model <p style="
611
  font-size: 0.8rem;
612
+ ">Future Gen Intel® Xeon® (codenamed Granite Rapids) with Intel® AMX</p></div>
613
+
614
  </div>
615
  """
616
+ # <div class="footer">
617
+ # <p>Powered by <a href="https://github.com/intel/intel-extension-for-transformers" style="text-decoration: underline;" target="_blank">Intel Extension for Transformers</a> and <a href="https://github.com/intel/intel-extension-for-pytorch" style="text-decoration: underline;" target="_blank">Intel Extension for PyTorch</a>
618
+ # <img src='https://i.postimg.cc/Pfv4vV6R/Microsoft-Teams-image-23.png' class='img-logo-right-style'/></p>
619
+ # </div>
620
+ # <div class="acknowledgments">
621
+ # <p></p></div>
622
+
623
+ learn_more_markdown = """<div class="footer">
624
+ <p>Powered by <a href="https://github.com/intel/intel-extension-for-transformers" style="text-decoration: underline;" target="_blank">Intel Extension for Transformers</a> and <a href="https://github.com/intel/intel-extension-for-pytorch" style="text-decoration: underline;" target="_blank">Intel Extension for PyTorch</a>
625
+ </p>
626
  </div>
627
  <div class="acknowledgments">
628
  <p></p></div>
629
+
630
  """
631
 
632
  state = gr.State()
 
640
  show_label=False,
641
  ).style(container=False)
642
 
643
+ chatbot = gr.Chatbot(elem_id="chatbot", visible=False).style(height=550)
644
  with gr.Row(elem_id="text-box-style"):
645
  with gr.Column(scale=20):
646
  textbox = gr.Textbox(
 
655
  temperature = gr.Slider(
656
  minimum=0.0,
657
  maximum=1.0,
658
+ value=0.001,
659
  step=0.1,
660
  interactive=True,
661
  label="Temperature",
 
775
 
776
  if __name__ == "__main__":
777
 
778
+ controller_url = "http://198.175.88.97:8000"
779
  host = "0.0.0.0"
780
+
781
  concurrency_count = 10
782
  model_list_mode = "once"
783
  share = False
 
790
  demo.queue(
791
  concurrency_count=concurrency_count, status_update_rate=10, api_open=False
792
  ).launch(
793
+ server_name=host, server_port=80, share=share, max_threads=200
794
  )