kisejin commited on
Commit
ed38b8c
1 Parent(s): 9b6cdb4

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +6 -6
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 277,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 277
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
@@ -958,8 +958,8 @@
958
  {
959
  "type": "Metaspace",
960
  "replacement": "▁",
961
- "add_prefix_space": true,
962
- "prepend_scheme": "always"
963
  }
964
  ]
965
  },
@@ -1020,8 +1020,8 @@
1020
  "decoder": {
1021
  "type": "Metaspace",
1022
  "replacement": "▁",
1023
- "add_prefix_space": true,
1024
- "prepend_scheme": "always"
1025
  },
1026
  "model": {
1027
  "type": "Unigram",
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 295,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 295
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
958
  {
959
  "type": "Metaspace",
960
  "replacement": "▁",
961
+ "prepend_scheme": "always",
962
+ "split": true
963
  }
964
  ]
965
  },
 
1020
  "decoder": {
1021
  "type": "Metaspace",
1022
  "replacement": "▁",
1023
+ "prepend_scheme": "always",
1024
+ "split": true
1025
  },
1026
  "model": {
1027
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -930,7 +930,7 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
- "model_max_length": 512,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
+ "model_max_length": 1000000000000000019884624838656,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"