kubota commited on
Commit
bfa39ce
1 Parent(s): 2b1a434

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +79 -0
tokenizer_config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "__type": "AddedToken",
5
+ "content": "<ent>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ {
12
+ "__type": "AddedToken",
13
+ "content": "<ent2>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ {
20
+ "__type": "AddedToken",
21
+ "content": "<ent>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ {
28
+ "__type": "AddedToken",
29
+ "content": "<ent2>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ ],
36
+ "bos_token": "<s>",
37
+ "cls_token": "<s>",
38
+ "entity_mask2_token": "[MASK2]",
39
+ "entity_mask_token": "[MASK]",
40
+ "entity_pad_token": "[PAD]",
41
+ "entity_token_1": {
42
+ "__type": "AddedToken",
43
+ "content": "<ent>",
44
+ "lstrip": false,
45
+ "normalized": true,
46
+ "rstrip": false,
47
+ "single_word": false
48
+ },
49
+ "entity_token_2": {
50
+ "__type": "AddedToken",
51
+ "content": "<ent2>",
52
+ "lstrip": false,
53
+ "normalized": true,
54
+ "rstrip": false,
55
+ "single_word": false
56
+ },
57
+ "entity_unk_token": "[UNK]",
58
+ "eos_token": "</s>",
59
+ "mask_token": {
60
+ "__type": "AddedToken",
61
+ "content": "<mask>",
62
+ "lstrip": true,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false
66
+ },
67
+ "max_entity_length": 32,
68
+ "max_mention_length": 30,
69
+ "model_max_length": 512,
70
+ "name_or_path": "studio-ousia/luke-japanese-large",
71
+ "pad_token": "<pad>",
72
+ "sep_token": "</s>",
73
+ "sp_model_kwargs": {},
74
+ "special_tokens_map_file": "models/luke-japanese-large/hf_xlm_roberta/special_tokens_map.json",
75
+ "task": null,
76
+ "tokenizer_class": "MLukeTokenizer",
77
+ "tokenizer_file": "models/luke-japanese-large/hf_luke_japanese_epoch20/tokenizer.json",
78
+ "unk_token": "<unk>"
79
+ }