SaulLu commited on
Commit
9ff1d40
1 Parent(s): f472ef8

tokenizer v2- include normalization discussed with Bengali community

Browse files
special_tokens_map.json CHANGED
@@ -8,7 +8,7 @@
8
  "mask_token": {
9
  "content": "[MASK]",
10
  "single_word": false,
11
- "lstrip": false,
12
  "rstrip": false,
13
  "normalized": false
14
  }
8
  "mask_token": {
9
  "content": "[MASK]",
10
  "single_word": false,
11
+ "lstrip": true,
12
  "rstrip": false,
13
  "normalized": false
14
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -8,9 +8,9 @@
8
  "mask_token": {
9
  "content": "[MASK]",
10
  "single_word": false,
11
- "lstrip": false,
12
  "rstrip": false,
13
- "normalized": false,
14
  "__type": "AddedToken"
15
  },
16
  "model_max_length": 512,
8
  "mask_token": {
9
  "content": "[MASK]",
10
  "single_word": false,
11
+ "lstrip": true,
12
  "rstrip": false,
13
+ "normalized": true,
14
  "__type": "AddedToken"
15
  },
16
  "model_max_length": 512,