madeinglasgow commited on
Commit
27555c5
1 Parent(s): dc758dc

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +1 -1
  2. tokenizer.json +27 -0
special_tokens_map.json CHANGED
@@ -13,7 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "</s>",
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "▁***",
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer.json CHANGED
@@ -71,6 +71,12 @@
71
  "id": "A",
72
  "type_id": 0
73
  }
 
 
 
 
 
 
74
  }
75
  ],
76
  "pair": [
@@ -86,6 +92,12 @@
86
  "type_id": 0
87
  }
88
  },
 
 
 
 
 
 
89
  {
90
  "SpecialToken": {
91
  "id": "<s>",
@@ -97,9 +109,24 @@
97
  "id": "B",
98
  "type_id": 1
99
  }
 
 
 
 
 
 
100
  }
101
  ],
102
  "special_tokens": {
 
 
 
 
 
 
 
 
 
103
  "<s>": {
104
  "id": "<s>",
105
  "ids": [
 
71
  "id": "A",
72
  "type_id": 0
73
  }
74
+ },
75
+ {
76
+ "SpecialToken": {
77
+ "id": "</s>",
78
+ "type_id": 0
79
+ }
80
  }
81
  ],
82
  "pair": [
 
92
  "type_id": 0
93
  }
94
  },
95
+ {
96
+ "SpecialToken": {
97
+ "id": "</s>",
98
+ "type_id": 0
99
+ }
100
+ },
101
  {
102
  "SpecialToken": {
103
  "id": "<s>",
 
109
  "id": "B",
110
  "type_id": 1
111
  }
112
+ },
113
+ {
114
+ "SpecialToken": {
115
+ "id": "</s>",
116
+ "type_id": 1
117
+ }
118
  }
119
  ],
120
  "special_tokens": {
121
+ "</s>": {
122
+ "id": "</s>",
123
+ "ids": [
124
+ 2
125
+ ],
126
+ "tokens": [
127
+ "</s>"
128
+ ]
129
+ },
130
  "<s>": {
131
  "id": "<s>",
132
  "ids": [