openbmb
/

MiniCPM-2B-sft-fp32

Text Generation

Model card Files Files and versions Community

hyx21 commited on Jan 31

Commit

7112c67

•

1 Parent(s): 8535080

Upload modeling_minicpm.py

Files changed (1) hide show

modeling_minicpm.py +28 -1

modeling_minicpm.py CHANGED Viewed

@@ -20,7 +20,7 @@
 """ PyTorch MiniCPM model."""
 import math
 import warnings
-from typing import List, Optional, Tuple, Union
 import torch
 import torch.nn.functional as F
@@ -49,6 +49,7 @@ from transformers.utils import (
 )
 from transformers.utils.import_utils import is_torch_fx_available
 from .configuration_minicpm import MiniCPMConfig
 if is_flash_attn_2_available():
@@ -1302,6 +1303,32 @@ class MiniCPMForCausalLM(MiniCPMPreTrainedModel):
                 tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
             )
         return reordered_past
 @add_start_docstrings(

 """ PyTorch MiniCPM model."""
 import math
 import warnings
+from typing import List, Optional, Tuple, Union, Dict
 import torch
 import torch.nn.functional as F
 )
 from transformers.utils.import_utils import is_torch_fx_available
 from .configuration_minicpm import MiniCPMConfig
+import re
 if is_flash_attn_2_available():
                 tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
             )
         return reordered_past
+    @torch.inference_mode()
+    def chat(self, tokenizer, query: str, history: List[Dict] = None, role: str = "user",
+             max_length: int = 4096, num_beams=1, do_sample=True, top_p=0.8, temperature=0.3, logits_processor=None,
+             **kwargs):
+        if history is None:
+            history = []
+        if logits_processor:
+            gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
+                        "temperature": temperature, "logits_processor": logits_processor, **kwargs}
+        else:
+            gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
+                        "temperature": temperature, "logits_processor": logits_processor, **kwargs}
+        history.append({"role": role, "content": query})
+        history_str = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=False)
+        inputs = tokenizer(history_str, return_tensors='pt').to(self.device)
+        outputs = self.generate(**inputs, **gen_kwargs)
+        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
+        response = tokenizer.decode(outputs)
+        pattern = re.compile(r".*?(?=<AI>|<用户>)", re.DOTALL)
+        matches = pattern.findall(response)
+        if len(matches) > 0:
+            response = matches[0]
+        history.append({"role": "assistant", "content": response})
+        return response, history
 @add_start_docstrings(