Wietse de Vries commited on
Commit
b23d41b
1 Parent(s): 61330c1

add missing char tokens to vocab (with embeddings close to [UNK])

Browse files
Files changed (4) hide show
  1. config.json +6 -3
  2. pytorch_model.bin +2 -2
  3. tf_model.h5 +2 -2
  4. vocab.txt +73 -0
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "wietsedv/bert-base-dutch-cased",
3
  "architectures": [
4
  "BertForMaskedLM"
5
  ],
@@ -16,6 +16,9 @@
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 3,
 
 
19
  "type_vocab_size": 2,
20
- "vocab_size": 30000
21
- }
 
 
1
  {
2
+ "_name_or_path": "bert-base-dutch-cased",
3
  "architectures": [
4
  "BertForMaskedLM"
5
  ],
 
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 3,
19
+ "position_embedding_type": "absolute",
20
+ "transformers_version": "4.5.1",
21
  "type_vocab_size": 2,
22
+ "use_cache": true,
23
+ "vocab_size": 30073
24
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6b583cdd24b3628d62041f927b783f432ba9f2ba1f203e7f02218e56ea22025
3
- size 436538834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ffe408c7eea0ffee4c257c6028f8c98146967e3ac3db51dba8e2bc8a4abddf5
3
+ size 436761702
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315fa75c371599484d77dedbadb0d49048d1aad9d8b2c56598403caa3b3f58c2
3
- size 532840208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cc47b929d21ed816d6ad8d5abea5c06ccae04a5f04f2d6b07da7d212aa18e1
3
+ size 530923844
vocab.txt CHANGED
@@ -29998,3 +29998,76 @@ zóó
29998
  ##öl
29999
  ##ön
30000
  ##ör
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29998
  ##öl
29999
  ##ön
30000
  ##ör
30001
+ ##Q
30002
+ ##X
30003
+ ##Ç
30004
+ ##Ó
30005
+ ##Ô
30006
+ ##Ú
30007
+ ##Û
30008
+ ##Ü
30009
+ ##à
30010
+ ##á
30011
+ ##â
30012
+ ##ä
30013
+ ##ê
30014
+ ##ì
30015
+ ##í
30016
+ ##î
30017
+ ##ñ
30018
+ ##ò
30019
+ ##ô
30020
+ ##ù
30021
+ ##ú
30022
+ ##û
30023
+ ##ü
30024
+ Q
30025
+ X
30026
+ a
30027
+ c
30028
+ e
30029
+ f
30030
+ g
30031
+ h
30032
+ i
30033
+ j
30034
+ k
30035
+ l
30036
+ m
30037
+ n
30038
+ o
30039
+ p
30040
+ q
30041
+ r
30042
+ s
30043
+ t
30044
+ u
30045
+ x
30046
+ y
30047
+ Ç
30048
+ Ó
30049
+ Ô
30050
+ Ú
30051
+ Û
30052
+ Ü
30053
+ à
30054
+ á
30055
+ â
30056
+ ä
30057
+ è
30058
+ é
30059
+ ê
30060
+ ë
30061
+ ì
30062
+ í
30063
+ î
30064
+ ï
30065
+ ñ
30066
+ ò
30067
+ ó
30068
+ ô
30069
+ ö
30070
+ ù
30071
+ ú
30072
+ û
30073
+ ü