Update README.md
Browse files
README.md
CHANGED
@@ -17,32 +17,36 @@ It also replaces the 354 token `\u0000` with an emoji so that it can be converte
|
|
17 |
## How to use
|
18 |
|
19 |
- Load
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
|
26 |
- Apply chatml template
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
```
|
|
|
17 |
## How to use
|
18 |
|
19 |
- Load
|
20 |
+
```python
|
21 |
+
from transformers import AutoTokenizer
|
22 |
+
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(RangiLyu/InternLM2-tokenizer-llama)
|
24 |
+
```
|
25 |
|
26 |
- Apply chatml template
|
27 |
+
```python
|
28 |
+
chat = [{"role": "user", "content": "Hello! What's your name?"},
|
29 |
+
{"role": "assistant", "content": "My name is InternLM2!"},
|
30 |
+
{"role": "user", "content": "Nice to meet you InternLM2!"},]
|
31 |
+
|
32 |
+
chat_ids = llama_tokenizer.apply_chat_template(chat)
|
33 |
+
print("ids: ", chat_ids)
|
34 |
+
print("tokens: ", llama_tokenizer.convert_ids_to_tokens(chat_ids))
|
35 |
+
|
36 |
+
# convert the chat history to a string for generation
|
37 |
+
chat_str = llama_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
38 |
+
print("chat string: ", chat_str)
|
39 |
+
```
|
40 |
+
|
41 |
+
```
|
42 |
+
ids: [1, 92543, 1008, 364, 9843, 346, 3716, 725, 829, 963, 345, 92542, 364, 92543, 525, 11353, 364, 5211, 963, 505, 4576, 11146, 314, 346, 92542, 364, 92543, 1008, 364, 44501, 442, 3531, 629, 4576, 11146, 314, 346, 92542, 364]
|
43 |
+
tokens: ['<s>', '<|im_start|>', 'user', '\n', 'Hello', '!', '▁What', "'s", '▁your', '▁name', '?', '<|im_end|>', '\n', '<|im_start|>', 'ass', 'istant', '\n', 'My', '▁name', '▁is', '▁Intern', 'LM', '2', '!', '<|im_end|>', '\n', '<|im_start|>', 'user', '\n', 'Nice', '▁to', '▁meet', '▁you', '▁Intern', 'LM', '2', '!', '<|im_end|>', '\n']
|
44 |
+
chat string: <s><|im_start|>user
|
45 |
+
Hello! What's your name?<|im_end|>
|
46 |
+
<|im_start|>assistant
|
47 |
+
My name is InternLM2!<|im_end|>
|
48 |
+
<|im_start|>user
|
49 |
+
Nice to meet you InternLM2!<|im_end|>
|
50 |
+
<|im_start|>assistant
|
51 |
+
|
52 |
```
|