ming030890 commited on
Commit
5fea977
1 Parent(s): e872323

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1 +1,3 @@
1
- {}
 
 
 
1
+ {
2
+ "pad_token": "<pad>"
3
+ }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -58,7 +63,17 @@
58
  "prepend_scheme": "always",
59
  "split": true
60
  },
61
- "post_processor": null,
 
 
 
 
 
 
 
 
 
 
62
  "decoder": {
63
  "type": "Metaspace",
64
  "replacement": "▁",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
63
  "prepend_scheme": "always",
64
  "split": true
65
  },
66
+ "post_processor": {
67
+ "type": "BertProcessing",
68
+ "sep": [
69
+ "</s>",
70
+ 2
71
+ ],
72
+ "cls": [
73
+ "<s>",
74
+ 1
75
+ ]
76
+ },
77
  "decoder": {
78
  "type": "Metaspace",
79
  "replacement": "▁",
tokenizer_config.json CHANGED
@@ -42,6 +42,12 @@
42
  }
43
  },
44
  "clean_up_tokenization_spaces": true,
45
- "model_max_length": 1000000000000000019884624838656,
46
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
 
 
 
47
  }
 
42
  }
43
  },
44
  "clean_up_tokenization_spaces": true,
45
+ "max_length": 512,
46
+ "model_max_length": 512,
47
+ "pad_token": "<pad>",
48
+ "stride": 0,
49
+ "tokenizer_class": "PreTrainedTokenizerFast",
50
+ "truncation": true,
51
+ "truncation_side": "right",
52
+ "truncation_strategy": "longest_first"
53
  }