Update README.md
Browse files
README.md
CHANGED
@@ -29,14 +29,18 @@ MiniCPM3-4B has a 32k context window. Equipped with LLMxMapReduce, MiniCPM3-4B c
|
|
29 |
```python
|
30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
31 |
import torch
|
|
|
32 |
path = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
33 |
device = "cuda"
|
|
|
34 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
35 |
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)
|
|
|
36 |
messages = [
|
37 |
{"role": "user", "content": "推荐5个北京的景点。"},
|
38 |
]
|
39 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
|
|
40 |
model_outputs = model.generate(
|
41 |
model_inputs,
|
42 |
max_new_tokens=1024,
|
@@ -44,9 +48,11 @@ model_outputs = model.generate(
|
|
44 |
temperature=0.7,
|
45 |
repetition_penalty=1.02
|
46 |
)
|
|
|
47 |
output_token_ids = [
|
48 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs))
|
49 |
]
|
|
|
50 |
responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]
|
51 |
print(responses)
|
52 |
```
|
@@ -55,18 +61,23 @@ print(responses)
|
|
55 |
```python
|
56 |
from transformers import AutoTokenizer
|
57 |
from vllm import LLM, SamplingParams
|
|
|
58 |
model_name = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
59 |
prompt = [{"role": "user", "content": "推荐5个北京的景点。"}]
|
|
|
60 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
61 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
|
62 |
llm = LLM(
|
63 |
model=model_name,
|
64 |
trust_remote_code=True,
|
65 |
tensor_parallel_size=1,
|
66 |
-
quantization='gptq'
|
67 |
)
|
68 |
sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=1024, repetition_penalty=1.02)
|
|
|
69 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
|
|
70 |
print(outputs[0].outputs[0].text)
|
71 |
```
|
72 |
|
@@ -226,7 +237,7 @@ print(outputs[0].outputs[0].text)
|
|
226 |
<td>63.2 </td>
|
227 |
</tr>
|
228 |
<tr>
|
229 |
-
<td>LiveCodeBench</td>
|
230 |
<td>22.2</td>
|
231 |
<td>20.2</td>
|
232 |
<td>19.2</td>
|
@@ -239,7 +250,7 @@ print(outputs[0].outputs[0].text)
|
|
239 |
<td colspan="15" align="left"><strong>Function Call</strong></td>
|
240 |
</tr>
|
241 |
<tr>
|
242 |
-
<td>BFCL</td>
|
243 |
<td>71.6</td>
|
244 |
<td>70.1</td>
|
245 |
<td>19.2</td>
|
@@ -263,6 +274,7 @@ print(outputs[0].outputs[0].text)
|
|
263 |
</tr>
|
264 |
</table>
|
265 |
|
|
|
266 |
## Statement
|
267 |
* As a language model, MiniCPM3-4B generates content by learning from a vast amount of text.
|
268 |
* However, it does not possess the ability to comprehend or express personal opinions or value judgments.
|
@@ -283,4 +295,4 @@ print(outputs[0].outputs[0].text)
|
|
283 |
journal={arXiv preprint arXiv:2404.06395},
|
284 |
year={2024}
|
285 |
}
|
286 |
-
```
|
|
|
29 |
```python
|
30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
31 |
import torch
|
32 |
+
|
33 |
path = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
34 |
device = "cuda"
|
35 |
+
|
36 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
37 |
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)
|
38 |
+
|
39 |
messages = [
|
40 |
{"role": "user", "content": "推荐5个北京的景点。"},
|
41 |
]
|
42 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
43 |
+
|
44 |
model_outputs = model.generate(
|
45 |
model_inputs,
|
46 |
max_new_tokens=1024,
|
|
|
48 |
temperature=0.7,
|
49 |
repetition_penalty=1.02
|
50 |
)
|
51 |
+
|
52 |
output_token_ids = [
|
53 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs))
|
54 |
]
|
55 |
+
|
56 |
responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]
|
57 |
print(responses)
|
58 |
```
|
|
|
61 |
```python
|
62 |
from transformers import AutoTokenizer
|
63 |
from vllm import LLM, SamplingParams
|
64 |
+
|
65 |
model_name = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
66 |
prompt = [{"role": "user", "content": "推荐5个北京的景点。"}]
|
67 |
+
|
68 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
69 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
70 |
+
|
71 |
llm = LLM(
|
72 |
model=model_name,
|
73 |
trust_remote_code=True,
|
74 |
tensor_parallel_size=1,
|
75 |
+
quantization='gptq'
|
76 |
)
|
77 |
sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=1024, repetition_penalty=1.02)
|
78 |
+
|
79 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
80 |
+
|
81 |
print(outputs[0].outputs[0].text)
|
82 |
```
|
83 |
|
|
|
237 |
<td>63.2 </td>
|
238 |
</tr>
|
239 |
<tr>
|
240 |
+
<td>LiveCodeBench v3</td>
|
241 |
<td>22.2</td>
|
242 |
<td>20.2</td>
|
243 |
<td>19.2</td>
|
|
|
250 |
<td colspan="15" align="left"><strong>Function Call</strong></td>
|
251 |
</tr>
|
252 |
<tr>
|
253 |
+
<td>BFCL v2</td>
|
254 |
<td>71.6</td>
|
255 |
<td>70.1</td>
|
256 |
<td>19.2</td>
|
|
|
274 |
</tr>
|
275 |
</table>
|
276 |
|
277 |
+
|
278 |
## Statement
|
279 |
* As a language model, MiniCPM3-4B generates content by learning from a vast amount of text.
|
280 |
* However, it does not possess the ability to comprehend or express personal opinions or value judgments.
|
|
|
295 |
journal={arXiv preprint arXiv:2404.06395},
|
296 |
year={2024}
|
297 |
}
|
298 |
+
```
|