vesteinn commited on
Commit
5ddac6f
1 Parent(s): c0fc129

Model added

Browse files
.gitattributes CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ rmh.w2v.model.syn1neg.npy filter=lfs diff=lfs merge=lfs -text
29
+ rmh.w2v.model.wv.vectors.npy filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # word2vec model trained on Icelandic
2
+
3
+ This model is trained on the lemmas of the Icelandic Gigaword Corpus version 20.05. It is trained using the gensim package, version 4.1.0. and parameters were set to default (100 dimensions, windows size 5)
4
+
5
+ This model can not be loaded directly since it uses gensim, clone the repository and run the following to use it.
6
+
7
+ ```python
8
+ import gensim
9
+ model = gensim.models.Word2Vec.load("./rmh.w2v.model")
10
+ ```
11
+
12
+ ## Example output
13
+
14
+ ```bash
15
+ In [6]: model.wv.most_similar("england")
16
+ Out[6]:
17
+ [('wales', 0.8113704323768616),
18
+ ('skotland', 0.7611601948738098),
19
+ ('bretlandseyjar', 0.7280426621437073),
20
+ ('gateshead', 0.6975484490394592),
21
+ ('ástralía', 0.6963852047920227),
22
+ ('eastbourne', 0.6939234137535095),
23
+ ('englandi', 0.6908402442932129),
24
+ ('bath', 0.6849308013916016),
25
+ ('lynndie', 0.6826340556144714),
26
+ ('glasgow', 0.6815919876098633)]
27
+
28
+ In [7]: model.wv.most_similar("ísland")
29
+ Out[7]:
30
+ [('norðurlönd', 0.6843729615211487),
31
+ ('land', 0.6696498990058899),
32
+ ('íslendingur', 0.6645756959915161),
33
+ ('íslenskur', 0.6627770662307739),
34
+ ('hérlendis', 0.6609933376312256),
35
+ ('íslandi', 0.6514216661453247),
36
+ ('evrópa', 0.6289927959442139),
37
+ ('fróðskaparsetur', 0.6046777367591858),
38
+ ('evrópuland', 0.5911464095115662),
39
+ ('bandaríkin', 0.5906434655189514)]
40
+
41
+ ```
42
+
43
+
rmh.w2v.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0e9c7653c8eb4ab7a6bf662b6b25db878e520c8829c265d5b9fca072a8d5f
3
+ size 22182144
rmh.w2v.model.syn1neg.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0050b23d5fd9e44c2b0a8b91584adbd6aa412b6ef7e33df7b7dd08a3ebd8cb
3
+ size 254173328
rmh.w2v.model.wv.vectors.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad46c1a37641fdc6dafc536a8668e318aafa46abe3a8a98c2b69240b073c002
3
+ size 254173328