ekazuki commited on
Commit
70648a5
1 Parent(s): 7d3c49d

Upload tokenizer

Browse files
sentencepiece.bpe.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:988bc5a00281c6d210a5d34bd143d0363741a432fefe741bf71e61b1869d4314
3
- size 810912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f98f266fdc548c94216aaadc13ffaaafacf0c8793303e2195322d954549ea261
3
+ size 808767
special_tokens_map.json CHANGED
@@ -4,27 +4,9 @@
4
  "</s>NOTUSED",
5
  "<unk>NOTUSED"
6
  ],
7
- "bos_token": {
8
- "content": "<s>",
9
- "lstrip": false,
10
- "normalized": false,
11
- "rstrip": false,
12
- "single_word": false
13
- },
14
- "cls_token": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false
20
- },
21
- "eos_token": {
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false
27
- },
28
  "mask_token": {
29
  "content": "<mask>",
30
  "lstrip": true,
@@ -32,25 +14,7 @@
32
  "rstrip": false,
33
  "single_word": false
34
  },
35
- "pad_token": {
36
- "content": "<pad>",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false
41
- },
42
- "sep_token": {
43
- "content": "</s>",
44
- "lstrip": false,
45
- "normalized": false,
46
- "rstrip": false,
47
- "single_word": false
48
- },
49
- "unk_token": {
50
- "content": "<unk>",
51
- "lstrip": false,
52
- "normalized": false,
53
- "rstrip": false,
54
- "single_word": false
55
- }
56
  }
 
4
  "</s>NOTUSED",
5
  "<unk>NOTUSED"
6
  ],
7
+ "bos_token": "<s>",
8
+ "cls_token": "<s>",
9
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "mask_token": {
11
  "content": "<mask>",
12
  "lstrip": true,
 
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
+ "pad_token": "<pad>",
18
+ "sep_token": "</s>",
19
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -75,7 +75,7 @@
75
  "cls_token": "<s>",
76
  "eos_token": "</s>",
77
  "mask_token": "<mask>",
78
- "model_max_length": 512,
79
  "pad_token": "<pad>",
80
  "sep_token": "</s>",
81
  "sp_model_kwargs": {},
 
75
  "cls_token": "<s>",
76
  "eos_token": "</s>",
77
  "mask_token": "<mask>",
78
+ "model_max_length": 1000000000000000019884624838656,
79
  "pad_token": "<pad>",
80
  "sep_token": "</s>",
81
  "sp_model_kwargs": {},