gozu888 commited on
Commit
96ad7f9
1 Parent(s): de9b079

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -8
  2. tokenizer_config.json +1 -2
tokenizer.json CHANGED
@@ -1,14 +1,7 @@
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
- "padding": {
5
- "strategy": "BatchLongest",
6
- "direction": "Right",
7
- "pad_to_multiple_of": null,
8
- "pad_id": 0,
9
- "pad_type_id": 0,
10
- "pad_token": "<pad>"
11
- },
12
  "added_tokens": [
13
  {
14
  "id": 0,
@@ -557,6 +550,7 @@
557
  "continuing_subword_prefix": null,
558
  "end_of_word_suffix": null,
559
  "fuse_unk": true,
 
560
  "vocab": {
561
  "<pad>": 0,
562
  "</s>": 1,
 
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
550
  "continuing_subword_prefix": null,
551
  "end_of_word_suffix": null,
552
  "fuse_unk": true,
553
+ "byte_fallback": false,
554
  "vocab": {
555
  "<pad>": 0,
556
  "</s>": 1,
tokenizer_config.json CHANGED
@@ -49,13 +49,12 @@
49
  "<extra_id_46>",
50
  "<extra_id_47>"
51
  ],
 
52
  "eos_token": "</s>",
53
  "extra_ids": 48,
54
  "model_max_length": 1000000000000000019884624838656,
55
- "name_or_path": "./trained/transx",
56
  "pad_token": "<pad>",
57
  "sp_model_kwargs": {},
58
- "special_tokens_map_file": "/root/.cache/huggingface/hub/models--VietAI--envit5-translation/snapshots/840bc88104d5a4277af740eaedb024df8c3093e7/special_tokens_map.json",
59
  "tokenizer_class": "T5Tokenizer",
60
  "unk_token": "<unk>"
61
  }
 
49
  "<extra_id_46>",
50
  "<extra_id_47>"
51
  ],
52
+ "clean_up_tokenization_spaces": true,
53
  "eos_token": "</s>",
54
  "extra_ids": 48,
55
  "model_max_length": 1000000000000000019884624838656,
 
56
  "pad_token": "<pad>",
57
  "sp_model_kwargs": {},
 
58
  "tokenizer_class": "T5Tokenizer",
59
  "unk_token": "<unk>"
60
  }