sakelariev
commited on
Commit
•
a287cf6
1
Parent(s):
66ec8c9
Fix missing lemmatizer
Browse files- bg_news_sm-3.5.4-py3-none-any.whl +2 -2
- config.cfg +26 -7
- meta.json +2 -0
bg_news_sm-3.5.4-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:194f9627e344ed20d7c2ac9d23df68512b14d1461f11c29ec1a1106937d16fa6
|
3 |
+
size 14636081
|
config.cfg
CHANGED
@@ -12,7 +12,7 @@ gpu_allocator = null
|
|
12 |
|
13 |
[nlp]
|
14 |
lang = "bg"
|
15 |
-
pipeline = ["tok2vec","tagger","morphologizer","parser","ner"]
|
16 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
17 |
disabled = []
|
18 |
before_creation = null
|
@@ -129,6 +129,24 @@ depth = 4
|
|
129 |
window_size = 1
|
130 |
maxout_pieces = 3
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
[corpora]
|
133 |
|
134 |
[corpora.dev]
|
@@ -192,17 +210,18 @@ eps = 0.00000001
|
|
192 |
learn_rate = 0.001
|
193 |
|
194 |
[training.score_weights]
|
195 |
-
tag_acc = 0.
|
196 |
-
pos_acc = 0.
|
197 |
-
morph_acc = 0.
|
198 |
morph_per_feat = null
|
199 |
-
|
200 |
-
|
|
|
201 |
dep_las_per_type = null
|
202 |
sents_p = null
|
203 |
sents_r = null
|
204 |
sents_f = 0.0
|
205 |
-
ents_f = 0.
|
206 |
ents_p = 0.0
|
207 |
ents_r = 0.0
|
208 |
ents_per_type = null
|
|
|
12 |
|
13 |
[nlp]
|
14 |
lang = "bg"
|
15 |
+
pipeline = ["tok2vec","tagger","morphologizer","trainable_lemmatizer","parser","ner"]
|
16 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
17 |
disabled = []
|
18 |
before_creation = null
|
|
|
129 |
window_size = 1
|
130 |
maxout_pieces = 3
|
131 |
|
132 |
+
[components.trainable_lemmatizer]
|
133 |
+
factory = "trainable_lemmatizer"
|
134 |
+
backoff = "orth"
|
135 |
+
min_tree_freq = 3
|
136 |
+
overwrite = false
|
137 |
+
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
138 |
+
top_k = 1
|
139 |
+
|
140 |
+
[components.trainable_lemmatizer.model]
|
141 |
+
@architectures = "spacy.Tagger.v2"
|
142 |
+
nO = null
|
143 |
+
normalize = false
|
144 |
+
|
145 |
+
[components.trainable_lemmatizer.model.tok2vec]
|
146 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
147 |
+
width = 96
|
148 |
+
upstream = "tok2vec"
|
149 |
+
|
150 |
[corpora]
|
151 |
|
152 |
[corpora.dev]
|
|
|
210 |
learn_rate = 0.001
|
211 |
|
212 |
[training.score_weights]
|
213 |
+
tag_acc = 0.2
|
214 |
+
pos_acc = 0.1
|
215 |
+
morph_acc = 0.1
|
216 |
morph_per_feat = null
|
217 |
+
lemma_acc = 0.2
|
218 |
+
dep_uas = 0.1
|
219 |
+
dep_las = 0.1
|
220 |
dep_las_per_type = null
|
221 |
sents_p = null
|
222 |
sents_r = null
|
223 |
sents_f = 0.0
|
224 |
+
ents_f = 0.2
|
225 |
ents_p = 0.0
|
226 |
ents_r = 0.0
|
227 |
ents_per_type = null
|
meta.json
CHANGED
@@ -1031,6 +1031,7 @@
|
|
1031 |
"tok2vec",
|
1032 |
"tagger",
|
1033 |
"morphologizer",
|
|
|
1034 |
"parser",
|
1035 |
"ner"
|
1036 |
],
|
@@ -1038,6 +1039,7 @@
|
|
1038 |
"tok2vec",
|
1039 |
"tagger",
|
1040 |
"morphologizer",
|
|
|
1041 |
"parser",
|
1042 |
"ner"
|
1043 |
],
|
|
|
1031 |
"tok2vec",
|
1032 |
"tagger",
|
1033 |
"morphologizer",
|
1034 |
+
"trainable_lemmatizer",
|
1035 |
"parser",
|
1036 |
"ner"
|
1037 |
],
|
|
|
1039 |
"tok2vec",
|
1040 |
"tagger",
|
1041 |
"morphologizer",
|
1042 |
+
"trainable_lemmatizer",
|
1043 |
"parser",
|
1044 |
"ner"
|
1045 |
],
|