msharma95 Raj-Sanjay-Shah commited on
Commit
4a0b2f6
1 Parent(s): 4b75810

Update tokenizer_config.json (#1)

Browse files

- Update tokenizer_config.json (a2613c4ba555397d478c10d97d957e994bd30066)


Co-authored-by: Raj Sanjay Shah <Raj-Sanjay-Shah@users.noreply.huggingface.co>

Files changed (1) hide show
  1. tokenizer_config.json +113 -1
tokenizer_config.json CHANGED
@@ -1 +1,113 @@
1
- {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "added_tokens_decoder": {"0": {"content": "<mask>", "lstrip": true, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "1": {"content": "<pad>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "2": {"content": "<unk>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "3": {"content": "<s>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "4": {"content": "</s>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}}, "clean_up_tokenization_spaces": true, "max_length": 128, "model_max_length": 1000000000000000019884624838656, "pad_to_multiple_of": null, "pad_token_type_id": 0, "padding_side": "right", "stride": 0, "trim_offsets": true, "truncation_side": "right", "truncation_strategy": "longest_first", "special_tokens_map_file": "cache_dir/8ea647fe2507dd9424cae28e30a169caf024d03e72369935c0ce9cf791bd2e6d.50c9a6a3342271e7e900bb03520d7f844b78e2b2ef8352a0239b688c7d12bdc6", "name_or_path": "Raj-Sanjay-Shah/baby_berta_duplicate", "tokenizer_class": "RobertaTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "errors": "replace",
3
+ "bos_token": {
4
+ "content": "<s>",
5
+ "single_word": false,
6
+ "lstrip": false,
7
+ "rstrip": false,
8
+ "normalized": true,
9
+ "__type": "AddedToken"
10
+ },
11
+ "eos_token": {
12
+ "content": "</s>",
13
+ "single_word": false,
14
+ "lstrip": false,
15
+ "rstrip": false,
16
+ "normalized": true,
17
+ "__type": "AddedToken"
18
+ },
19
+ "unk_token": {
20
+ "content": "<unk>",
21
+ "single_word": false,
22
+ "lstrip": false,
23
+ "rstrip": false,
24
+ "normalized": true,
25
+ "__type": "AddedToken"
26
+ },
27
+ "sep_token": {
28
+ "content": "</s>",
29
+ "single_word": false,
30
+ "lstrip": false,
31
+ "rstrip": false,
32
+ "normalized": true,
33
+ "__type": "AddedToken"
34
+ },
35
+ "cls_token": {
36
+ "content": "<s>",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": true,
41
+ "__type": "AddedToken"
42
+ },
43
+ "pad_token": {
44
+ "content": "<pad>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": true,
49
+ "__type": "AddedToken"
50
+ },
51
+ "mask_token": {
52
+ "content": "<mask>",
53
+ "single_word": false,
54
+ "lstrip": true,
55
+ "rstrip": false,
56
+ "normalized": true,
57
+ "__type": "AddedToken"
58
+ },
59
+ "add_prefix_space": true,
60
+ "added_tokens_decoder": {
61
+ "0": {
62
+ "content": "<mask>",
63
+ "lstrip": true,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "1": {
70
+ "content": "<pad>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "2": {
78
+ "content": "<unk>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "3": {
86
+ "content": "<s>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "4": {
94
+ "content": "</s>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ }
101
+ },
102
+ "clean_up_tokenization_spaces": true,
103
+ "max_length": 128,
104
+ "model_max_length": 1e+30,
105
+ "pad_to_multiple_of": null,
106
+ "pad_token_type_id": 0,
107
+ "padding_side": "right",
108
+ "stride": 0,
109
+ "trim_offsets": true,
110
+ "truncation_side": "right",
111
+ "truncation_strategy": "longest_first",
112
+ "tokenizer_class": "RobertaTokenizer"
113
+ }