Spaces:

llmbb
/

LLMBB-Agent

Running

vlff李飞飞 commited on Dec 29, 2023

Commit

fe2448f

•

1 Parent(s): 87ba825

update oai

Files changed (1) hide show

qwen_agent/llm/qwen_oai.py CHANGED Viewed

@@ -374,7 +374,8 @@ def predict(
     chunk = ChatCompletionResponse(
         model=model_id, choices=[choice_data], object="chat.completion.chunk"
     )
-    yield "{}".format(_dump_json(chunk, exclude_unset=True))
     current_length = 0
     stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
@@ -400,7 +401,8 @@ def predict(
         chunk = ChatCompletionResponse(
             model=model_id, choices=[choice_data], object="chat.completion.chunk"
         )
-        yield "{}".format(_dump_json(chunk, exclude_unset=True))
     choice_data = ChatCompletionResponseStreamChoice(
         index=0, delta=DeltaMessage(), finish_reason="stop"
@@ -408,8 +410,9 @@ def predict(
     chunk = ChatCompletionResponse(
         model=model_id, choices=[choice_data], object="chat.completion.chunk"
     )
-    yield "{}".format(_dump_json(chunk, exclude_unset=True))
-    yield "[DONE]"
     _gc()
@@ -452,7 +455,6 @@ class QwenChatAsOAI(BaseChatModel):
         response = create_chat_completion(_request)
         # TODO: error handling
         for chunk in response:
-            chunk = json.loads(chunk)
             if hasattr(chunk.choices[0].delta, 'content'):
                 yield chunk.choices[0].delta.content

     chunk = ChatCompletionResponse(
         model=model_id, choices=[choice_data], object="chat.completion.chunk"
     )
+    # yield "{}".format(_dump_json(chunk, exclude_unset=True))
+    yield chunk
     current_length = 0
     stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
         chunk = ChatCompletionResponse(
             model=model_id, choices=[choice_data], object="chat.completion.chunk"
         )
+        # yield "{}".format(_dump_json(chunk, exclude_unset=True))
+        yield chunk
     choice_data = ChatCompletionResponseStreamChoice(
         index=0, delta=DeltaMessage(), finish_reason="stop"
     chunk = ChatCompletionResponse(
         model=model_id, choices=[choice_data], object="chat.completion.chunk"
     )
+    # yield "{}".format(_dump_json(chunk, exclude_unset=True))
+    yield chunk
+    # yield "[DONE]"
     _gc()
         response = create_chat_completion(_request)
         # TODO: error handling
         for chunk in response:
             if hasattr(chunk.choices[0].delta, 'content'):
                 yield chunk.choices[0].delta.content