Use FastTokenizer
Browse files- README.md +1 -3
- tokenizer.json +0 -0
- tokenizer_config.json +2 -1
README.md
CHANGED
@@ -11,14 +11,12 @@ This repository provides Japanese language models trained by [SB Intuitions](htt
|
|
11 |
|
12 |
## How to use
|
13 |
|
14 |
-
Please set **use_fast=False** to use our tokenizer properly.
|
15 |
-
|
16 |
```python
|
17 |
import torch
|
18 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, set_seed
|
19 |
|
20 |
model = AutoModelForCausalLM.from_pretrained("sbintuitions/sarashina1-65b", torch_dtype=torch.float16, device_map="auto")
|
21 |
-
tokenizer = AutoTokenizer.from_pretrained("sbintuitions/sarashina1-65b"
|
22 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
23 |
set_seed(123)
|
24 |
|
|
|
11 |
|
12 |
## How to use
|
13 |
|
|
|
|
|
14 |
```python
|
15 |
import torch
|
16 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, set_seed
|
17 |
|
18 |
model = AutoModelForCausalLM.from_pretrained("sbintuitions/sarashina1-65b", torch_dtype=torch.float16, device_map="auto")
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained("sbintuitions/sarashina1-65b")
|
20 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
21 |
set_seed(123)
|
22 |
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -9,7 +9,8 @@
|
|
9 |
"mask_token": "<mask>",
|
10 |
"cls_token": "<cls>",
|
11 |
"sep_token": "<sep>",
|
|
|
12 |
"sp_model_kwargs": {},
|
13 |
"special_tokens_map_file": null,
|
14 |
-
"tokenizer_class": "
|
15 |
}
|
|
|
9 |
"mask_token": "<mask>",
|
10 |
"cls_token": "<cls>",
|
11 |
"sep_token": "<sep>",
|
12 |
+
"padding_side": "left",
|
13 |
"sp_model_kwargs": {},
|
14 |
"special_tokens_map_file": null,
|
15 |
+
"tokenizer_class": "PreTrainedTokenizerFast"
|
16 |
}
|