thomasht86 commited on
Commit
9ea1ca9
1 Parent(s): 8bf4851

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +3 -1
  2. tokenizer_config.json +16 -0
added_tokens.json CHANGED
@@ -1,3 +1,5 @@
1
  {
2
- "<unk>": 37
 
 
3
  }
 
1
  {
2
+ "<unk>": 37,
3
+ "æ": 38,
4
+ "ø": 39
5
  }
tokenizer_config.json CHANGED
@@ -16,6 +16,22 @@
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  },
21
  "clean_up_tokenization_spaces": true,
 
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
+ },
20
+ "38": {
21
+ "content": "æ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "39": {
29
+ "content": "ø",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
  }
36
  },
37
  "clean_up_tokenization_spaces": true,