sakelariev commited on
Commit
a287cf6
1 Parent(s): 66ec8c9

Fix missing lemmatizer

Browse files
Files changed (3) hide show
  1. bg_news_sm-3.5.4-py3-none-any.whl +2 -2
  2. config.cfg +26 -7
  3. meta.json +2 -0
bg_news_sm-3.5.4-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8dbd54a6a84709c1fb367e2287c79bfcd88e53ac325864bba6f14c25648f202
3
- size 14635982
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194f9627e344ed20d7c2ac9d23df68512b14d1461f11c29ec1a1106937d16fa6
3
+ size 14636081
config.cfg CHANGED
@@ -12,7 +12,7 @@ gpu_allocator = null
12
 
13
  [nlp]
14
  lang = "bg"
15
- pipeline = ["tok2vec","tagger","morphologizer","parser","ner"]
16
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
17
  disabled = []
18
  before_creation = null
@@ -129,6 +129,24 @@ depth = 4
129
  window_size = 1
130
  maxout_pieces = 3
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  [corpora]
133
 
134
  [corpora.dev]
@@ -192,17 +210,18 @@ eps = 0.00000001
192
  learn_rate = 0.001
193
 
194
  [training.score_weights]
195
- tag_acc = 0.26
196
- pos_acc = 0.12
197
- morph_acc = 0.12
198
  morph_per_feat = null
199
- dep_uas = 0.12
200
- dep_las = 0.12
 
201
  dep_las_per_type = null
202
  sents_p = null
203
  sents_r = null
204
  sents_f = 0.0
205
- ents_f = 0.26
206
  ents_p = 0.0
207
  ents_r = 0.0
208
  ents_per_type = null
 
12
 
13
  [nlp]
14
  lang = "bg"
15
+ pipeline = ["tok2vec","tagger","morphologizer","trainable_lemmatizer","parser","ner"]
16
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
17
  disabled = []
18
  before_creation = null
 
129
  window_size = 1
130
  maxout_pieces = 3
131
 
132
+ [components.trainable_lemmatizer]
133
+ factory = "trainable_lemmatizer"
134
+ backoff = "orth"
135
+ min_tree_freq = 3
136
+ overwrite = false
137
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
138
+ top_k = 1
139
+
140
+ [components.trainable_lemmatizer.model]
141
+ @architectures = "spacy.Tagger.v2"
142
+ nO = null
143
+ normalize = false
144
+
145
+ [components.trainable_lemmatizer.model.tok2vec]
146
+ @architectures = "spacy.Tok2VecListener.v1"
147
+ width = 96
148
+ upstream = "tok2vec"
149
+
150
  [corpora]
151
 
152
  [corpora.dev]
 
210
  learn_rate = 0.001
211
 
212
  [training.score_weights]
213
+ tag_acc = 0.2
214
+ pos_acc = 0.1
215
+ morph_acc = 0.1
216
  morph_per_feat = null
217
+ lemma_acc = 0.2
218
+ dep_uas = 0.1
219
+ dep_las = 0.1
220
  dep_las_per_type = null
221
  sents_p = null
222
  sents_r = null
223
  sents_f = 0.0
224
+ ents_f = 0.2
225
  ents_p = 0.0
226
  ents_r = 0.0
227
  ents_per_type = null
meta.json CHANGED
@@ -1031,6 +1031,7 @@
1031
  "tok2vec",
1032
  "tagger",
1033
  "morphologizer",
 
1034
  "parser",
1035
  "ner"
1036
  ],
@@ -1038,6 +1039,7 @@
1038
  "tok2vec",
1039
  "tagger",
1040
  "morphologizer",
 
1041
  "parser",
1042
  "ner"
1043
  ],
 
1031
  "tok2vec",
1032
  "tagger",
1033
  "morphologizer",
1034
+ "trainable_lemmatizer",
1035
  "parser",
1036
  "ner"
1037
  ],
 
1039
  "tok2vec",
1040
  "tagger",
1041
  "morphologizer",
1042
+ "trainable_lemmatizer",
1043
  "parser",
1044
  "ner"
1045
  ],