vesteinn commited on
Commit
169c8b7
1 Parent(s): d2f7993

Model added

Browse files
.gitattributes CHANGED
@@ -25,3 +25,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ rmh.fasttext.model.wv.vectors_vocab.npy filter=lfs diff=lfs merge=lfs -text
29
+ rmh.fasttext.model.syn1neg.npy filter=lfs diff=lfs merge=lfs -text
30
+ rmh.fasttext.model.wv.vectors_ngrams.npy filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FastText model trained on Icelandic
2
+
3
+ This model is trained on the lemmas of the Icelandic Gigaword Corpus version 20.05. It is trained using the gensim package, version 4.1.0. and parameters were set to default (100 dimensions, windows size 5)
4
+
5
+ This model can not be loaded directly since it uses gensim, clone the repository and run the following to use it.
6
+
7
+ ```python
8
+ import gensim
9
+ model = gensim.models.FastText.load("./rmh.w2v.model")
10
+ ```
11
+
12
+ ## Example output
13
+
14
+ ```bash
15
+ In [1]: model.wv.most_similar("england")
16
+ Out[1]:
17
+ [('englands', 0.8778558969497681),
18
+ ('southland', 0.8573296070098877),
19
+ ('skotland', 0.846065878868103),
20
+ ('englaland', 0.8320872187614441),
21
+ ('hoogland', 0.8299505114555359),
22
+ ('hoagland', 0.8277317881584167),
23
+ ('totland', 0.8265103697776794),
24
+ ('lackland', 0.8234561681747437),
25
+ ('skarpengland', 0.8227219581604004),
26
+ ('langland', 0.8222305774688721)]
27
+
28
+ In [2]: model.wv.most_similar("kanína")
29
+ Out[2]:
30
+ [('loðkanína', 0.9271067976951599),
31
+ ('dvergkanína', 0.9106121063232422),
32
+ ('angórakanína', 0.895512044429779),
33
+ ('angórukanína', 0.8741581439971924),
34
+ ('feldkanína', 0.8696010708808899),
35
+ ('kanínubangsi', 0.8562541604042053),
36
+ ('holdakanína', 0.8543838858604431),
37
+ ('villikanína', 0.8525990843772888),
38
+ ('silkikanína', 0.8515204191207886),
39
+ ('kaníni', 0.8445548415184021)]
40
+ ```
41
+
42
+
rmh.fasttext.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3106a906f1e3de15f0288b5c5b22b7483afc3187f35c05f14a796ec9f04f42cf
3
+ size 22182275
rmh.fasttext.model.syn1neg.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6645845c533af0c9fc48fa51bf821c491c3bbabdacfdb5c33944ccfddb767a6
3
+ size 254173328
rmh.fasttext.model.wv.vectors_ngrams.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f20a2a1a6d838a578d5e63a4c99522b38ac9ad7c7975750a7297047292bd9cd
3
+ size 800000128
rmh.fasttext.model.wv.vectors_vocab.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:537302871b15584f270f5e603b22065ec8ff4f0f2d4daffe2380e33d64eaa6d6
3
+ size 254173328