THUDM
/

chatglm3-6b

@@ -1074,11 +1074,19 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
             response = tokenizer.decode(outputs)
             if response and response[-1] != "�":
-                response, new_history = self.process_response(response, history)
-                if return_past_key_values:
-                    yield response, new_history, past_key_values
-                else:
-                    yield response, new_history
     @torch.inference_mode()
     def stream_generate(

             outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
             response = tokenizer.decode(outputs)
             if response and response[-1] != "�":
+                if response.startswith("\n") or history[0]["role"] != "system" or "tools" not in history[0]:
+                    new_response, new_history = self.process_response(response, history)
+                    if return_past_key_values:
+                        yield new_response, new_history, past_key_values
+                    else:
+                        yield new_response, new_history
+        if response and "new_response" not in locals().keys():
+            new_response, new_history = self.process_response(response, history)
+            if return_past_key_values:
+                yield new_response, new_history, past_key_values
+            else:
+                yield new_response, new_history
     @torch.inference_mode()
     def stream_generate(