julio2027 commited on
Commit
8c5f483
1 Parent(s): 93f7ee0

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +20 -4
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -89,7 +103,8 @@
89
  {
90
  "type": "Metaspace",
91
  "replacement": "▁",
92
- "add_prefix_space": true
 
93
  }
94
  ]
95
  },
@@ -177,7 +192,8 @@
177
  "decoder": {
178
  "type": "Metaspace",
179
  "replacement": "▁",
180
- "add_prefix_space": true
 
181
  },
182
  "model": {
183
  "type": "Unigram",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 512
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 1,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<pad>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
103
  {
104
  "type": "Metaspace",
105
  "replacement": "▁",
106
+ "add_prefix_space": true,
107
+ "prepend_scheme": "always"
108
  }
109
  ]
110
  },
 
192
  "decoder": {
193
  "type": "Metaspace",
194
  "replacement": "▁",
195
+ "add_prefix_space": true,
196
+ "prepend_scheme": "always"
197
  },
198
  "model": {
199
  "type": "Unigram",