ayjays132 commited on
Commit
3c91395
1 Parent(s): e30b2de

Upload 5 files

Browse files
Files changed (4) hide show
  1. special_tokens_map.json +1 -7
  2. tokenizer.json +13 -6
  3. tokenizer_config.json +16 -1
  4. vocab.json +0 -0
special_tokens_map.json CHANGED
@@ -122,13 +122,7 @@
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
- "pad_token": {
126
- "content": "<pad>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false
131
- },
132
  "sep_token": {
133
  "content": "[SEP]",
134
  "lstrip": false,
 
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
+ "pad_token": "</s>",
 
 
 
 
 
 
126
  "sep_token": {
127
  "content": "[SEP]",
128
  "lstrip": false,
tokenizer.json CHANGED
@@ -2,19 +2,17 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
- "strategy": {
11
- "Fixed": 512
12
- },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
15
- "pad_id": 30534,
16
  "pad_type_id": 0,
17
- "pad_token": "<pad>"
18
  },
19
  "added_tokens": [
20
  {
@@ -62,6 +60,15 @@
62
  "normalized": false,
63
  "special": true
64
  },
 
 
 
 
 
 
 
 
 
65
  {
66
  "id": 3407,
67
  "content": "happy",
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 1024,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
+ "strategy": "BatchLongest",
 
 
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
+ "pad_id": 0,
14
  "pad_type_id": 0,
15
+ "pad_token": "[PAD]"
16
  },
17
  "added_tokens": [
18
  {
 
60
  "normalized": false,
61
  "special": true
62
  },
63
+ {
64
+ "id": 2204,
65
+ "content": "good",
66
+ "single_word": false,
67
+ "lstrip": false,
68
+ "rstrip": false,
69
+ "normalized": true,
70
+ "special": false
71
+ },
72
  {
73
  "id": 3407,
74
  "content": "happy",
tokenizer_config.json CHANGED
@@ -40,6 +40,14 @@
40
  "single_word": false,
41
  "special": true
42
  },
 
 
 
 
 
 
 
 
43
  "3407": {
44
  "content": "happy",
45
  "lstrip": false,
@@ -1084,11 +1092,18 @@
1084
  "do_lower_case": true,
1085
  "eos_token": "</s>",
1086
  "mask_token": "[MASK]",
 
1087
  "model_max_length": 1000000000000000019884624838656,
1088
- "pad_token": "<pad>",
 
 
 
1089
  "sep_token": "[SEP]",
 
1090
  "strip_accents": null,
1091
  "tokenize_chinese_chars": true,
1092
  "tokenizer_class": "BertTokenizer",
 
 
1093
  "unk_token": "<unk>"
1094
  }
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "2204": {
44
+ "content": "good",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
  "3407": {
52
  "content": "happy",
53
  "lstrip": false,
 
1092
  "do_lower_case": true,
1093
  "eos_token": "</s>",
1094
  "mask_token": "[MASK]",
1095
+ "max_length": 512,
1096
  "model_max_length": 1000000000000000019884624838656,
1097
+ "pad_to_multiple_of": null,
1098
+ "pad_token": "</s>",
1099
+ "pad_token_type_id": 0,
1100
+ "padding_side": "right",
1101
  "sep_token": "[SEP]",
1102
+ "stride": 0,
1103
  "strip_accents": null,
1104
  "tokenize_chinese_chars": true,
1105
  "tokenizer_class": "BertTokenizer",
1106
+ "truncation_side": "right",
1107
+ "truncation_strategy": "longest_first",
1108
  "unk_token": "<unk>"
1109
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff