vlff李飞飞 commited on
Commit
fc211c5
·
1 Parent(s): 0024d65

update oai

Browse files
browser_qwen/src/popup.html CHANGED
@@ -112,9 +112,9 @@
112
  <!-- <iframe src=$popup_url style="height: 550px"></iframe>-->
113
  <div id="iframe_area" style="height: 570px"></div>
114
 
115
- <h3>Customize Address:</h3>
116
- <input type="text" id="addr" name="addr" class="input-text">
117
- <button id="set_addr" class="upload_btn">Change</button>
118
 
119
  <script src="popup.js"></script>
120
  </body>
 
112
  <!-- <iframe src=$popup_url style="height: 550px"></iframe>-->
113
  <div id="iframe_area" style="height: 570px"></div>
114
 
115
+ <h3 style="display: none;">Customize Address:</h3>
116
+ <input type="text" id="addr" name="addr" class="input-text" style="display: none;">
117
+ <button id="set_addr" class="upload_btn" style="display: none;">Change</button>
118
 
119
  <script src="popup.js"></script>
120
  </body>
qwen_agent/llm/qwen_oai.py CHANGED
@@ -291,8 +291,8 @@ def text_complete_last_message(history, stop_words_ids, gen_kwargs):
291
  _stop_words_ids.append(s)
292
  stop_words_ids = _stop_words_ids
293
 
294
- input_ids = torch.tensor([tokenizer.encode(prompt)]).to(model.device)
295
- output = model.generate(input_ids, stop_words_ids=stop_words_ids, **gen_kwargs).tolist()[0]
296
  output = tokenizer.decode(output, errors="ignore")
297
  assert output.startswith(prompt)
298
  output = output[len(prompt):]
@@ -302,7 +302,7 @@ def text_complete_last_message(history, stop_words_ids, gen_kwargs):
302
 
303
 
304
  def create_chat_completion(request: ChatCompletionRequest):
305
- global model, tokenizer
306
 
307
  gen_kwargs = {}
308
  if request.temperature is not None:
@@ -333,7 +333,7 @@ def create_chat_completion(request: ChatCompletionRequest):
333
  if query is _TEXT_COMPLETION_CMD:
334
  response = text_complete_last_message(history, stop_words_ids=stop_words_ids, gen_kwargs=gen_kwargs)
335
  else:
336
- response, _ = model.chat(
337
  tokenizer,
338
  query,
339
  history=history,
@@ -367,7 +367,7 @@ def _dump_json(data: BaseModel, *args, **kwargs) -> str:
367
  def predict(
368
  query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict,
369
  ):
370
- global model, tokenizer
371
  choice_data = ChatCompletionResponseStreamChoice(
372
  index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
373
  )
@@ -381,11 +381,8 @@ def predict(
381
  stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
382
  if stop_words:
383
  # TODO: It's a little bit tricky to trim stop words in the stream mode.
384
- raise Exception(
385
- status_code=400,
386
- detail="Invalid request: custom stop words are not yet supported for stream mode.",
387
- )
388
- response_generator = model.chat_stream(
389
  tokenizer, query, history=history, stop_words_ids=stop_words_ids, **gen_kwargs
390
  )
391
  for new_response in response_generator:
@@ -420,7 +417,7 @@ def predict(
420
  class QwenChatAsOAI(BaseChatModel):
421
 
422
  def __init__(self, model: str, api_key: str, model_server: str):
423
- self.checkpoint_path = copy.copy(model)
424
  super().__init__()
425
  tokenizer = AutoTokenizer.from_pretrained(
426
  self.checkpoint_path,
@@ -429,26 +426,26 @@ class QwenChatAsOAI(BaseChatModel):
429
  )
430
  device_map = "cpu"
431
  # device_map = "auto"
432
- model = AutoModelForCausalLM.from_pretrained(
433
  self.checkpoint_path,
434
  device_map=device_map,
435
  trust_remote_code=True,
436
  resume_download=True,
437
  ).eval()
438
 
439
- model.generation_config = GenerationConfig.from_pretrained(
440
  self.checkpoint_path,
441
  trust_remote_code=True,
442
  resume_download=True,
443
  )
444
- self.model = model
445
 
446
  def _chat_stream(
447
  self,
448
  messages: List[Dict],
449
  stop: Optional[List[str]] = None,
450
  ) -> Iterator[str]:
451
- _request = ChatCompletionRequest(model=self.checkpoint_path,
452
  messages=messages,
453
  stop=stop,
454
  stream=True)
@@ -463,7 +460,7 @@ class QwenChatAsOAI(BaseChatModel):
463
  messages: List[Dict],
464
  stop: Optional[List[str]] = None,
465
  ) -> str:
466
- _request = ChatCompletionRequest(model=self.checkpoint_path,
467
  messages=messages,
468
  stop=stop,
469
  stream=False)
@@ -475,12 +472,12 @@ class QwenChatAsOAI(BaseChatModel):
475
  messages: List[Dict],
476
  functions: Optional[List[Dict]] = None) -> Dict:
477
  if functions:
478
- _request = ChatCompletionRequest(model=self.checkpoint_path,
479
  messages=messages,
480
  functions=functions)
481
  response = create_chat_completion(_request)
482
  else:
483
- _request = ChatCompletionRequest(model=self.checkpoint_path,
484
  messages=messages)
485
  response = create_chat_completion(_request)
486
  # TODO: error handling
 
291
  _stop_words_ids.append(s)
292
  stop_words_ids = _stop_words_ids
293
 
294
+ input_ids = torch.tensor([tokenizer.encode(prompt)]).to(qmodel.device)
295
+ output = qmodel.generate(input_ids, stop_words_ids=stop_words_ids, **gen_kwargs).tolist()[0]
296
  output = tokenizer.decode(output, errors="ignore")
297
  assert output.startswith(prompt)
298
  output = output[len(prompt):]
 
302
 
303
 
304
  def create_chat_completion(request: ChatCompletionRequest):
305
+ global qmodel, tokenizer
306
 
307
  gen_kwargs = {}
308
  if request.temperature is not None:
 
333
  if query is _TEXT_COMPLETION_CMD:
334
  response = text_complete_last_message(history, stop_words_ids=stop_words_ids, gen_kwargs=gen_kwargs)
335
  else:
336
+ response, _ = qmodel.chat(
337
  tokenizer,
338
  query,
339
  history=history,
 
367
  def predict(
368
  query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict,
369
  ):
370
+ global qmodel, tokenizer
371
  choice_data = ChatCompletionResponseStreamChoice(
372
  index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
373
  )
 
381
  stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
382
  if stop_words:
383
  # TODO: It's a little bit tricky to trim stop words in the stream mode.
384
+ raise Exception("Invalid request: custom stop words are not yet supported for stream mode.",)
385
+ response_generator = qmodel.chat_stream(
 
 
 
386
  tokenizer, query, history=history, stop_words_ids=stop_words_ids, **gen_kwargs
387
  )
388
  for new_response in response_generator:
 
417
  class QwenChatAsOAI(BaseChatModel):
418
 
419
  def __init__(self, model: str, api_key: str, model_server: str):
420
+ self.model = model
421
  super().__init__()
422
  tokenizer = AutoTokenizer.from_pretrained(
423
  self.checkpoint_path,
 
426
  )
427
  device_map = "cpu"
428
  # device_map = "auto"
429
+ qmodel = AutoModelForCausalLM.from_pretrained(
430
  self.checkpoint_path,
431
  device_map=device_map,
432
  trust_remote_code=True,
433
  resume_download=True,
434
  ).eval()
435
 
436
+ qmodel.generation_config = GenerationConfig.from_pretrained(
437
  self.checkpoint_path,
438
  trust_remote_code=True,
439
  resume_download=True,
440
  )
441
+
442
 
443
  def _chat_stream(
444
  self,
445
  messages: List[Dict],
446
  stop: Optional[List[str]] = None,
447
  ) -> Iterator[str]:
448
+ _request = ChatCompletionRequest(model=self.model,
449
  messages=messages,
450
  stop=stop,
451
  stream=True)
 
460
  messages: List[Dict],
461
  stop: Optional[List[str]] = None,
462
  ) -> str:
463
+ _request = ChatCompletionRequest(model=self.model,
464
  messages=messages,
465
  stop=stop,
466
  stream=False)
 
472
  messages: List[Dict],
473
  functions: Optional[List[Dict]] = None) -> Dict:
474
  if functions:
475
+ _request = ChatCompletionRequest(model=self.model,
476
  messages=messages,
477
  functions=functions)
478
  response = create_chat_completion(_request)
479
  else:
480
+ _request = ChatCompletionRequest(model=self.model,
481
  messages=messages)
482
  response = create_chat_completion(_request)
483
  # TODO: error handling