Erda commited on
Commit
e4dcf41
1 Parent(s): 9bd9d95

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. special_tokens_map.json +21 -3
  3. tokenizer_config.json +7 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: apache-2.0
3
- base_model: google-t5/t5-base
4
  tags:
5
  - generated_from_trainer
 
6
  model-index:
7
  - name: results
8
  results: []
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
+ base_model: google-t5/t5-base
6
  model-index:
7
  - name: results
8
  results: []
special_tokens_map.json CHANGED
@@ -101,7 +101,25 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
  }
tokenizer_config.json CHANGED
@@ -930,8 +930,15 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
 
933
  "model_max_length": 1000000000000000019884624838656,
 
934
  "pad_token": "<pad>",
 
 
 
935
  "tokenizer_class": "T5Tokenizer",
 
 
936
  "unk_token": "<unk>"
937
  }
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
+ "max_length": 128,
934
  "model_max_length": 1000000000000000019884624838656,
935
+ "pad_to_multiple_of": null,
936
  "pad_token": "<pad>",
937
+ "pad_token_type_id": 0,
938
+ "padding_side": "right",
939
+ "stride": 0,
940
  "tokenizer_class": "T5Tokenizer",
941
+ "truncation_side": "right",
942
+ "truncation_strategy": "longest_first",
943
  "unk_token": "<unk>"
944
  }