Jacobo commited on
Commit
4584a76
1 Parent(s): 969afdd

Update spaCy pipeline

Browse files
README.md CHANGED
The diff for this file is too large to render. See raw diff
 
config.cfg CHANGED
@@ -1,8 +1,8 @@
1
  [paths]
2
- train = "corpus/train/grc_proiel-ud-train.spacy"
3
- dev = "corpus/dev/grc_proiel-ud-dev.spacy"
4
- vectors = "vectors/large"
5
  init_tok2vec = null
 
6
 
7
  [system]
8
  gpu_allocator = "pytorch"
@@ -10,7 +10,7 @@ seed = 1
10
 
11
  [nlp]
12
  lang = "grc"
13
- pipeline = ["transformer","morphologizer","tagger","parser","senter","lemmatizer","attribute_ruler"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
@@ -89,26 +89,6 @@ grad_factor = 1.0
89
  pooling = {"@layers":"reduce_mean.v1"}
90
  upstream = "transformer"
91
 
92
- [components.senter]
93
- factory = "senter"
94
- overwrite = false
95
- scorer = {"@scorers":"spacy.senter_scorer.v1"}
96
-
97
- [components.senter.model]
98
- @architectures = "spacy.Tagger.v2"
99
- nO = null
100
- normalize = false
101
-
102
- [components.senter.model.tok2vec]
103
- @architectures = "spacy.HashEmbedCNN.v2"
104
- pretrained_vectors = true
105
- width = 12
106
- depth = 1
107
- embed_size = 2000
108
- window_size = 1
109
- maxout_pieces = 2
110
- subword_features = true
111
-
112
  [components.tagger]
113
  factory = "tagger"
114
  label_smoothing = 0.0
@@ -133,7 +113,7 @@ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotati
133
 
134
  [components.transformer.model]
135
  @architectures = "spacy-transformers.TransformerModel.v3"
136
- name = "Jacobo/aristoBERTo"
137
  mixed_precision = false
138
 
139
  [components.transformer.model.get_spans]
@@ -178,7 +158,7 @@ max_epochs = 0
178
  max_steps = 20000
179
  eval_frequency = 200
180
  frozen_components = ["lemmatizer"]
181
- annotating_components = ["lemmatizer"]
182
  before_to_disk = null
183
  before_update = null
184
 
@@ -215,27 +195,27 @@ total_steps = 20000
215
  initial_rate = 0.00005
216
 
217
  [training.score_weights]
218
- pos_acc = 0.07
219
- morph_acc = 0.07
220
  morph_per_feat = null
221
- tag_acc = 0.14
222
- dep_uas = 0.07
223
- dep_las = 0.07
224
  dep_las_per_type = null
225
  sents_p = null
226
  sents_r = null
227
  sents_f = 0.0
228
- lemma_acc = 0.58
229
 
230
  [pretraining]
231
 
232
  [initialize]
233
- vectors = ${paths.vectors}
234
  init_tok2vec = ${paths.init_tok2vec}
235
  vocab_data = null
236
  lookups = null
237
  before_init = null
238
  after_init = null
 
239
 
240
  [initialize.components]
241
 
 
1
  [paths]
2
+ train = "corpus/proiel/train/grc_proiel-ud-train.spacy"
3
+ dev = "corpus/proiel/dev/grc_proiel-ud-dev.spacy"
 
4
  init_tok2vec = null
5
+ vectors = null
6
 
7
  [system]
8
  gpu_allocator = "pytorch"
 
10
 
11
  [nlp]
12
  lang = "grc"
13
+ pipeline = ["transformer","morphologizer","tagger","parser","lemmatizer","attribute_ruler"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
 
89
  pooling = {"@layers":"reduce_mean.v1"}
90
  upstream = "transformer"
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  [components.tagger]
93
  factory = "tagger"
94
  label_smoothing = 0.0
 
113
 
114
  [components.transformer.model]
115
  @architectures = "spacy-transformers.TransformerModel.v3"
116
+ name = "cabrooks/LOGION-50k_wordpiece"
117
  mixed_precision = false
118
 
119
  [components.transformer.model.get_spans]
 
158
  max_steps = 20000
159
  eval_frequency = 200
160
  frozen_components = ["lemmatizer"]
161
+ annotating_components = []
162
  before_to_disk = null
163
  before_update = null
164
 
 
195
  initial_rate = 0.00005
196
 
197
  [training.score_weights]
198
+ pos_acc = 0.06
199
+ morph_acc = 0.06
200
  morph_per_feat = null
201
+ tag_acc = 0.15
202
+ dep_uas = 0.06
203
+ dep_las = 0.06
204
  dep_las_per_type = null
205
  sents_p = null
206
  sents_r = null
207
  sents_f = 0.0
208
+ lemma_acc = 0.61
209
 
210
  [pretraining]
211
 
212
  [initialize]
 
213
  init_tok2vec = ${paths.init_tok2vec}
214
  vocab_data = null
215
  lookups = null
216
  before_init = null
217
  after_init = null
218
+ vectors = ${paths.vectors}
219
 
220
  [initialize.components]
221
 
grc_proiel_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:841ef57502dfd9fb257e699d9aaf215a967b00b9e7c6f69e616eda2d5d8b1a8b
3
- size 666854510
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:555cdbb17adee883902051334423989b70065cbbdad0c37b73e175caacd9ef2b
3
+ size 495436349
lemmatizer/cfg CHANGED
The diff for this file is too large to render. See raw diff
 
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1241e6da8d549970b6f2731b833524dcb8b4e45a2aae75972aff868583eb88b
3
- size 24263260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee3567720a57a22b28099f9ccd9c39ee5b455f448e531763e00d59219f0b4671
3
+ size 27089840
lemmatizer/trees CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0b4cacd6282ecfa887b731fe8aed793709e236050f81662e72cd06fe73a6458
3
- size 5318689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a53607dd846f3e82212b030f5fde5e8487e55fe146f7f90da6078e87837bc0a
3
+ size 6009899
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"grc",
3
  "name":"proiel_trf",
4
- "version":"3.7",
5
  "description":"",
6
  "author":"",
7
  "email":"",
@@ -10,10 +10,10 @@
10
  "spacy_version":">=3.7.4,<3.8.0",
11
  "spacy_git_version":"bff8725f4",
12
  "vectors":{
13
- "width":300,
14
- "vectors":200000,
15
- "keys":-1,
16
- "name":"grc_pipeline.vectors"
17
  },
18
  "labels":{
19
  "transformer":[
@@ -45,6 +45,7 @@
45
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN",
46
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
47
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp",
 
48
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem",
49
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN",
50
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN",
@@ -727,6 +728,7 @@
727
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act",
728
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
729
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ",
 
730
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass",
731
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN",
732
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN",
@@ -1073,7 +1075,8 @@
1073
  "Px",
1074
  "R-",
1075
  "S-",
1076
- "V-"
 
1077
  ],
1078
  "parser":[
1079
  "ROOT",
@@ -1105,6 +1108,7 @@
1105
  "obl:agent",
1106
  "orphan",
1107
  "parataxis",
 
1108
  "vocative",
1109
  "xcomp"
1110
  ],
@@ -1117,7 +1121,6 @@
1117
  "morphologizer",
1118
  "tagger",
1119
  "parser",
1120
- "senter",
1121
  "lemmatizer",
1122
  "attribute_ruler"
1123
  ],
@@ -1126,7 +1129,6 @@
1126
  "morphologizer",
1127
  "tagger",
1128
  "parser",
1129
- "senter",
1130
  "lemmatizer",
1131
  "attribute_ruler"
1132
  ],
@@ -1134,33 +1136,33 @@
1134
 
1135
  ],
1136
  "performance":{
1137
- "pos_acc":0.9838118957,
1138
- "morph_acc":0.9423527688,
1139
  "morph_per_feat":{
1140
  "Case":{
1141
- "p":0.986852086,
1142
- "r":0.985108531,
1143
- "f":0.9859795377
1144
  },
1145
  "Gender":{
1146
- "p":0.9429406532,
1147
- "r":0.9467908639,
1148
- "f":0.9448618362
1149
  },
1150
  "Number":{
1151
- "p":0.9958574979,
1152
- "r":0.9943128942,
1153
- "f":0.9950845967
1154
  },
1155
  "Person":{
1156
- "p":0.9914877868,
1157
- "r":0.9889258029,
1158
- "f":0.9902051377
1159
  },
1160
  "PronType":{
1161
- "p":0.9911879896,
1162
- "r":0.986038961,
1163
- "f":0.9886067708
1164
  },
1165
  "Polarity":{
1166
  "p":1.0,
@@ -1168,39 +1170,39 @@
1168
  "f":0.9895287958
1169
  },
1170
  "Aspect":{
1171
- "p":0.9914236707,
1172
- "r":0.9841089671,
1173
- "f":0.987752777
1174
  },
1175
  "Mood":{
1176
- "p":0.990836197,
1177
- "r":0.9902690326,
1178
- "f":0.9905525336
1179
  },
1180
  "Tense":{
1181
- "p":0.9879890185,
1182
- "r":0.989007214,
1183
- "f":0.9884978541
1184
  },
1185
  "VerbForm":{
1186
- "p":0.9962251201,
1187
- "r":0.9965671129,
1188
- "f":0.9963960872
1189
  },
1190
  "Voice":{
1191
- "p":0.982841455,
1192
- "r":0.9838543456,
1193
- "f":0.9833476395
1194
  },
1195
  "Degree":{
1196
- "p":0.9740437158,
1197
- "r":0.9583333333,
1198
- "f":0.9661246612
1199
  },
1200
  "Definite":{
1201
- "p":0.9956942949,
1202
- "r":0.9994597515,
1203
- "f":0.9975734699
1204
  },
1205
  "Reflex":{
1206
  "p":1.0,
@@ -1209,163 +1211,163 @@
1209
  },
1210
  "Poss":{
1211
  "p":1.0,
1212
- "r":0.8947368421,
1213
- "f":0.9444444444
1214
  }
1215
  },
1216
- "tag_acc":0.9847641371,
1217
- "dep_uas":0.8557720481,
1218
- "dep_las":0.8225168473,
1219
  "dep_las_per_type":{
1220
  "nsubj":{
1221
- "p":0.8533627343,
1222
- "r":0.8385698808,
1223
- "f":0.8459016393
1224
  },
1225
  "discourse":{
1226
- "p":0.853223594,
1227
- "r":0.8543956044,
1228
- "f":0.853809197
1229
  },
1230
  "mark":{
1231
- "p":0.9068825911,
1232
- "r":0.9032258065,
1233
- "f":0.9050505051
1234
  },
1235
  "advmod":{
1236
- "p":0.7938461538,
1237
- "r":0.7667161961,
1238
- "f":0.7800453515
1239
  },
1240
  "advcl":{
1241
- "p":0.7967257844,
1242
- "r":0.7978142077,
1243
- "f":0.7972696246
1244
  },
1245
  "xcomp":{
1246
- "p":0.7148594378,
1247
- "r":0.712,
1248
- "f":0.7134268537
1249
  },
1250
  "cop":{
1251
- "p":0.8224299065,
1252
- "r":0.8224299065,
1253
- "f":0.8224299065
1254
  },
1255
  "root":{
1256
- "p":0.8472222222,
1257
- "r":0.897939156,
1258
- "f":0.8718437351
1259
  },
1260
  "det":{
1261
- "p":0.9480744267,
1262
- "r":0.9501300954,
1263
- "f":0.9491011479
1264
  },
1265
  "nmod":{
1266
- "p":0.7866184448,
1267
- "r":0.7658450704,
1268
- "f":0.7760927743
1269
  },
1270
  "obj":{
1271
- "p":0.881443299,
1272
- "r":0.8878504673,
1273
- "f":0.8846352819
1274
  },
1275
  "case":{
1276
- "p":0.9638157895,
1277
- "r":0.9638157895,
1278
- "f":0.9638157895
1279
  },
1280
  "obl":{
1281
- "p":0.7702539299,
1282
- "r":0.7777777778,
1283
- "f":0.7739975699
1284
  },
1285
  "cc":{
1286
- "p":0.7302904564,
1287
- "r":0.7242798354,
1288
- "f":0.7272727273
1289
  },
1290
  "conj":{
1291
- "p":0.6684709066,
1292
- "r":0.6482939633,
1293
- "f":0.6582278481
1294
  },
1295
  "obl:agent":{
1296
- "p":0.8928571429,
1297
- "r":0.6756756757,
1298
- "f":0.7692307692
1299
  },
1300
  "ccomp":{
1301
- "p":0.6972972973,
1302
- "r":0.6417910448,
1303
- "f":0.6683937824
1304
  },
1305
  "nsubj:pass":{
1306
- "p":0.7777777778,
1307
- "r":0.8504672897,
1308
- "f":0.8125
1309
  },
1310
  "amod":{
1311
- "p":0.7942583732,
1312
- "r":0.779342723,
1313
- "f":0.7867298578
1314
  },
1315
  "acl":{
1316
- "p":0.5414012739,
1317
  "r":0.5151515152,
1318
- "f":0.5279503106
1319
  },
1320
  "iobj":{
1321
- "p":0.7762863535,
1322
- "r":0.8013856813,
1323
- "f":0.7886363636
1324
  },
1325
  "nummod":{
1326
- "p":0.8360655738,
1327
- "r":0.75,
1328
- "f":0.7906976744
1329
  },
1330
  "vocative":{
1331
- "p":0.8153846154,
1332
- "r":0.768115942,
1333
- "f":0.7910447761
1334
  },
1335
  "orphan":{
1336
- "p":0.4,
1337
- "r":0.1860465116,
1338
- "f":0.253968254
1339
  },
1340
  "appos":{
1341
- "p":0.5210084034,
1342
- "r":0.4397163121,
1343
- "f":0.4769230769
1344
  },
1345
- "parataxis":{
1346
- "p":0.2857142857,
1347
- "r":0.1,
1348
- "f":0.1481481481
1349
  },
1350
  "dep":{
1351
  "p":0.0,
1352
  "r":0.0,
1353
  "f":0.0
1354
  },
1355
- "dislocated":{
1356
- "p":0.6363636364,
1357
- "r":0.2692307692,
1358
- "f":0.3783783784
1359
  },
1360
  "csubj:pass":{
1361
- "p":0.25,
1362
- "r":0.2,
1363
- "f":0.2222222222
1364
  },
1365
  "flat:name":{
1366
- "p":0.9285714286,
1367
  "r":0.5909090909,
1368
- "f":0.7222222222
1369
  },
1370
  "aux:pass":{
1371
  "p":0.0,
@@ -1374,8 +1376,8 @@
1374
  },
1375
  "fixed":{
1376
  "p":1.0,
1377
- "r":0.5,
1378
- "f":0.6666666667
1379
  },
1380
  "aux":{
1381
  "p":0.0,
@@ -1383,15 +1385,14 @@
1383
  "f":0.0
1384
  }
1385
  },
1386
- "sents_p":0.6777777778,
1387
- "sents_r":0.7183513248,
1388
- "sents_f":0.6974749881,
1389
- "lemma_acc":0.967111046,
1390
- "transformer_loss":16283.6197895598,
1391
- "morphologizer_loss":319.4725572726,
1392
- "tagger_loss":53.8895646907,
1393
- "parser_loss":24873.6728275445,
1394
- "senter_loss":25970.2717437744
1395
  },
1396
  "requirements":[
1397
  "spacy-transformers>=1.3.4,<1.4.0"
 
1
  {
2
  "lang":"grc",
3
  "name":"proiel_trf",
4
+ "version":"3.7.4",
5
  "description":"",
6
  "author":"",
7
  "email":"",
 
10
  "spacy_version":">=3.7.4,<3.8.0",
11
  "spacy_git_version":"bff8725f4",
12
  "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
  },
18
  "labels":{
19
  "transformer":[
 
45
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN",
46
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
47
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp",
48
+ "POS=PUNCT",
49
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem",
50
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN",
51
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN",
 
728
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act",
729
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
730
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ",
731
+ "POS=AUX",
732
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass",
733
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN",
734
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN",
 
1075
  "Px",
1076
  "R-",
1077
  "S-",
1078
+ "V-",
1079
+ "Z"
1080
  ],
1081
  "parser":[
1082
  "ROOT",
 
1108
  "obl:agent",
1109
  "orphan",
1110
  "parataxis",
1111
+ "punct",
1112
  "vocative",
1113
  "xcomp"
1114
  ],
 
1121
  "morphologizer",
1122
  "tagger",
1123
  "parser",
 
1124
  "lemmatizer",
1125
  "attribute_ruler"
1126
  ],
 
1129
  "morphologizer",
1130
  "tagger",
1131
  "parser",
 
1132
  "lemmatizer",
1133
  "attribute_ruler"
1134
  ],
 
1136
 
1137
  ],
1138
  "performance":{
1139
+ "pos_acc":0.985754209,
1140
+ "morph_acc":0.9466975666,
1141
  "morph_per_feat":{
1142
  "Case":{
1143
+ "p":0.9866161616,
1144
+ "r":0.9861181222,
1145
+ "f":0.986367079
1146
  },
1147
  "Gender":{
1148
+ "p":0.9454891995,
1149
+ "r":0.9494704606,
1150
+ "f":0.9474756478
1151
  },
1152
  "Number":{
1153
+ "p":0.9960678808,
1154
+ "r":0.9953469135,
1155
+ "f":0.9957072666
1156
  },
1157
  "Person":{
1158
+ "p":0.9922423347,
1159
+ "r":0.9915097822,
1160
+ "f":0.9918759232
1161
  },
1162
  "PronType":{
1163
+ "p":0.9921824104,
1164
+ "r":0.988961039,
1165
+ "f":0.9905691057
1166
  },
1167
  "Polarity":{
1168
  "p":1.0,
 
1170
  "f":0.9895287958
1171
  },
1172
  "Aspect":{
1173
+ "p":0.9857305936,
1174
+ "r":0.9801362089,
1175
+ "f":0.9829254411
1176
  },
1177
  "Mood":{
1178
+ "p":0.9913990826,
1179
+ "r":0.9896966228,
1180
+ "f":0.9905471212
1181
  },
1182
  "Tense":{
1183
+ "p":0.9845467033,
1184
+ "r":0.9848849193,
1185
+ "f":0.9847157822
1186
  },
1187
  "VerbForm":{
1188
+ "p":0.9969104016,
1189
+ "r":0.9969104016,
1190
+ "f":0.9969104016
1191
  },
1192
  "Voice":{
1193
+ "p":0.9776785714,
1194
+ "r":0.978014428,
1195
+ "f":0.9778464709
1196
  },
1197
  "Degree":{
1198
+ "p":0.9528936743,
1199
+ "r":0.9516129032,
1200
+ "f":0.9522528581
1201
  },
1202
  "Definite":{
1203
+ "p":0.9956896552,
1204
+ "r":0.9983792545,
1205
+ "f":0.9970326409
1206
  },
1207
  "Reflex":{
1208
  "p":1.0,
 
1211
  },
1212
  "Poss":{
1213
  "p":1.0,
1214
+ "r":0.9473684211,
1215
+ "f":0.972972973
1216
  }
1217
  },
1218
+ "tag_acc":0.9867084725,
1219
+ "dep_uas":0.8896213286,
1220
+ "dep_las":0.8552699041,
1221
  "dep_las_per_type":{
1222
  "nsubj":{
1223
+ "p":0.8678223185,
1224
+ "r":0.8678223185,
1225
+ "f":0.8678223185
1226
  },
1227
  "discourse":{
1228
+ "p":0.8641304348,
1229
+ "r":0.8736263736,
1230
+ "f":0.868852459
1231
  },
1232
  "mark":{
1233
+ "p":0.9047619048,
1234
+ "r":0.9193548387,
1235
+ "f":0.912
1236
  },
1237
  "advmod":{
1238
+ "p":0.8021638331,
1239
+ "r":0.7711738484,
1240
+ "f":0.7863636364
1241
  },
1242
  "advcl":{
1243
+ "p":0.8187830688,
1244
+ "r":0.8456284153,
1245
+ "f":0.8319892473
1246
  },
1247
  "xcomp":{
1248
+ "p":0.7142857143,
1249
+ "r":0.74,
1250
+ "f":0.7269155206
1251
  },
1252
  "cop":{
1253
+ "p":0.8248847926,
1254
+ "r":0.8364485981,
1255
+ "f":0.8306264501
1256
  },
1257
  "root":{
1258
+ "p":0.958863859,
1259
+ "r":0.9607458292,
1260
+ "f":0.9598039216
1261
  },
1262
  "det":{
1263
+ "p":0.9548022599,
1264
+ "r":0.9527320035,
1265
+ "f":0.9537660082
1266
  },
1267
  "nmod":{
1268
+ "p":0.7927927928,
1269
+ "r":0.7746478873,
1270
+ "f":0.7836153161
1271
  },
1272
  "obj":{
1273
+ "p":0.8841025641,
1274
+ "r":0.8951194185,
1275
+ "f":0.8895768834
1276
  },
1277
  "case":{
1278
+ "p":0.9704595186,
1279
+ "r":0.9725877193,
1280
+ "f":0.9715224535
1281
  },
1282
  "obl":{
1283
+ "p":0.800982801,
1284
+ "r":0.7960927961,
1285
+ "f":0.7985303123
1286
  },
1287
  "cc":{
1288
+ "p":0.8436213992,
1289
+ "r":0.8436213992,
1290
+ "f":0.8436213992
1291
  },
1292
  "conj":{
1293
+ "p":0.7836411609,
1294
+ "r":0.7795275591,
1295
+ "f":0.7815789474
1296
  },
1297
  "obl:agent":{
1298
+ "p":0.8461538462,
1299
+ "r":0.5945945946,
1300
+ "f":0.6984126984
1301
  },
1302
  "ccomp":{
1303
+ "p":0.7434554974,
1304
+ "r":0.7064676617,
1305
+ "f":0.7244897959
1306
  },
1307
  "nsubj:pass":{
1308
+ "p":0.7543859649,
1309
+ "r":0.8037383178,
1310
+ "f":0.778280543
1311
  },
1312
  "amod":{
1313
+ "p":0.8118811881,
1314
+ "r":0.7699530516,
1315
+ "f":0.7903614458
1316
  },
1317
  "acl":{
1318
+ "p":0.5448717949,
1319
  "r":0.5151515152,
1320
+ "f":0.5295950156
1321
  },
1322
  "iobj":{
1323
+ "p":0.8048780488,
1324
+ "r":0.8383371824,
1325
+ "f":0.8212669683
1326
  },
1327
  "nummod":{
1328
+ "p":0.873015873,
1329
+ "r":0.8088235294,
1330
+ "f":0.8396946565
1331
  },
1332
  "vocative":{
1333
+ "p":0.935483871,
1334
+ "r":0.8405797101,
1335
+ "f":0.8854961832
1336
  },
1337
  "orphan":{
1338
+ "p":0.3913043478,
1339
+ "r":0.2093023256,
1340
+ "f":0.2727272727
1341
  },
1342
  "appos":{
1343
+ "p":0.5384615385,
1344
+ "r":0.4468085106,
1345
+ "f":0.488372093
1346
  },
1347
+ "dislocated":{
1348
+ "p":0.5714285714,
1349
+ "r":0.3076923077,
1350
+ "f":0.4
1351
  },
1352
  "dep":{
1353
  "p":0.0,
1354
  "r":0.0,
1355
  "f":0.0
1356
  },
1357
+ "parataxis":{
1358
+ "p":0.6666666667,
1359
+ "r":0.2,
1360
+ "f":0.3076923077
1361
  },
1362
  "csubj:pass":{
1363
+ "p":0.0,
1364
+ "r":0.0,
1365
+ "f":0.0
1366
  },
1367
  "flat:name":{
1368
+ "p":0.8666666667,
1369
  "r":0.5909090909,
1370
+ "f":0.7027027027
1371
  },
1372
  "aux:pass":{
1373
  "p":0.0,
 
1376
  },
1377
  "fixed":{
1378
  "p":1.0,
1379
+ "r":0.7,
1380
+ "f":0.8235294118
1381
  },
1382
  "aux":{
1383
  "p":0.0,
 
1385
  "f":0.0
1386
  }
1387
  },
1388
+ "sents_p":0.9970617042,
1389
+ "sents_r":0.9990186457,
1390
+ "sents_f":0.9980392157,
1391
+ "lemma_acc":0.9653057051,
1392
+ "transformer_loss":443.1381021026,
1393
+ "morphologizer_loss":3.3029946992,
1394
+ "tagger_loss":1.3335578433,
1395
+ "parser_loss":19904.7449291433
 
1396
  },
1397
  "requirements":[
1398
  "spacy-transformers>=1.3.4,<1.4.0"
morphologizer/cfg CHANGED
@@ -27,6 +27,7 @@
27
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem|Number=Sing",
28
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":"Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
29
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":"Case=Dat|Gender=Masc|Number=Plur|PronType=Rcp",
 
30
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|PronType=Dem",
31
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":"Case=Nom|Gender=Masc|Number=Plur",
32
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":"Case=Acc|Gender=Masc|Number=Plur",
@@ -709,6 +710,7 @@
709
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act",
710
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":"Aspect=Perf|Mood=Sub|Number=Plur|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
711
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing",
 
712
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass",
713
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem,Masc|Number=Sing",
714
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Dat|Gender=Fem,Masc|Number=Sing",
@@ -1058,6 +1060,7 @@
1058
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":92,
1059
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":100,
1060
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":95,
 
1061
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":90,
1062
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":92,
1063
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":92,
@@ -1740,6 +1743,7 @@
1740
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":100,
1741
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":100,
1742
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":84,
 
1743
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":100,
1744
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
1745
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
 
27
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem|Number=Sing",
28
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":"Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
29
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":"Case=Dat|Gender=Masc|Number=Plur|PronType=Rcp",
30
+ "POS=PUNCT":"",
31
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|PronType=Dem",
32
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":"Case=Nom|Gender=Masc|Number=Plur",
33
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":"Case=Acc|Gender=Masc|Number=Plur",
 
710
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act",
711
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":"Aspect=Perf|Mood=Sub|Number=Plur|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
712
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing",
713
+ "POS=AUX":"",
714
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass",
715
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem,Masc|Number=Sing",
716
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Dat|Gender=Fem,Masc|Number=Sing",
 
1060
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":92,
1061
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":100,
1062
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":95,
1063
+ "POS=PUNCT":97,
1064
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":90,
1065
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":92,
1066
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":92,
 
1743
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":100,
1744
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":100,
1745
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":84,
1746
+ "POS=AUX":87,
1747
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":100,
1748
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
1749
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84622808919a33cf8082567f7cc878ece10a773522bdd40b06d13d8fe14519fe
3
- size 3165857
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cf8607c13ee0221b098e501600c2584bfd49f3a0400b02ca7018ffc9dc0a320
3
+ size 3172009
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f5e8adeb2b784c18e3496d3776f53b23ce970dc93290ee83702ee2c7975bf43
3
- size 1906923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce04316becfea07f15b8b4ac961ae12cc9d1c628e917ea5847ccddfb34677c6d
3
+ size 1919319
parser/moves CHANGED
@@ -1 +1 @@
1
- ��moves�1{"0":{"":99376},"1":{"":72645},"2":{"det":25097,"case":13178,"advmod":8281,"nsubj":8155,"discourse":7820,"advcl":5178,"obj":4646,"obl":4348,"mark":3054,"cc":2782,"iobj":1765,"cop":1464,"nmod":1391,"amod":1350,"nsubj:pass":966,"det||nsubj":961,"xcomp":917,"vocative":752,"nummod":549,"mark||advcl":409,"case||obl":395,"obj||advcl":366,"det||obj":353,"dislocated":261,"acl":254,"obl||advcl":245,"orphan":206,"nmod||nsubj":192,"nsubj||advcl":175,"ccomp":161,"nsubj||ccomp":158,"det||nsubj:pass":140,"advmod||advcl":136,"obj||xcomp":132,"obl:agent":126,"cc||advcl":121,"conj||advcl":118,"det||obl":115,"nmod||obj":108,"parataxis":106,"det||iobj":94,"amod||obj":88,"det||nmod":79,"xcomp||advcl":77,"amod||nsubj":75,"obj||ccomp":71,"iobj||advcl":70,"obl||xcomp":64,"iobj||xcomp":64,"advmod||xcomp":55,"advmod||ccomp":49,"appos||nsubj":47,"obl||ccomp":45,"ccomp||advcl":44,"det||advmod":42,"cc||nsubj":42,"nmod||obl":41,"advmod||advmod":39,"nsubj:pass||advcl":34,"iobj||ccomp":34,"amod||obl":30,"dep":0},"3":{"conj":8819,"cc":8497,"obl":7446,"obj":6636,"nmod":5543,"nsubj":3918,"advcl":3876,"det":3859,"iobj":3825,"xcomp":2226,"ccomp":2144,"discourse":2105,"advmod":1915,"appos":1627,"acl":1443,"amod":1361,"cop":1355,"nsubj:pass":710,"orphan":452,"obl:agent":273,"flat:name":248,"vocative":243,"nummod":240,"acl||obj":174,"acl||nsubj":152,"fixed":148,"appos||nsubj":120,"csubj:pass":118,"nmod||obj":116,"conj||nsubj":113,"parataxis":110,"cc||nsubj":110,"nmod||nsubj":102,"conj||obj":101,"cc||obj":92,"appos||obj":84,"amod||obj":79,"case":69,"conj||obl":65,"cc||obl":63,"amod||nsubj":63,"dislocated":59,"det||obj":54,"acl||obl":53,"appos||obl":48,"conj||xcomp":46,"det||nsubj":45,"cop||xcomp":40,"iobj||xcomp":38,"conj||nmod":37,"obl||xcomp":35,"conj||iobj":35,"cc||nmod":35,"cop||ccomp":34,"cc||iobj":33,"cc||xcomp":32,"dep":0},"4":{"ROOT":15014}}�cfg��neg_key�
 
1
+ ��moves�?{"0":{"":99376},"1":{"":87659},"2":{"det":25097,"case":13178,"advmod":8281,"nsubj":8155,"discourse":7820,"advcl":5178,"obj":4646,"obl":4348,"mark":3054,"cc":2782,"iobj":1765,"cop":1464,"nmod":1391,"amod":1350,"nsubj:pass":966,"det||nsubj":961,"xcomp":917,"vocative":752,"nummod":549,"mark||advcl":409,"case||obl":395,"obj||advcl":366,"det||obj":353,"dislocated":261,"acl":254,"obl||advcl":245,"orphan":206,"nmod||nsubj":192,"nsubj||advcl":175,"ccomp":161,"nsubj||ccomp":158,"det||nsubj:pass":140,"advmod||advcl":136,"obj||xcomp":132,"obl:agent":126,"cc||advcl":121,"conj||advcl":118,"det||obl":115,"nmod||obj":108,"parataxis":106,"det||iobj":94,"amod||obj":88,"det||nmod":79,"xcomp||advcl":77,"amod||nsubj":75,"obj||ccomp":71,"iobj||advcl":70,"obl||xcomp":64,"iobj||xcomp":64,"advmod||xcomp":55,"advmod||ccomp":49,"appos||nsubj":47,"obl||ccomp":45,"ccomp||advcl":44,"det||advmod":42,"cc||nsubj":42,"nmod||obl":41,"advmod||advmod":39,"nsubj:pass||advcl":34,"iobj||ccomp":34,"amod||obl":30,"dep":0},"3":{"punct":15014,"conj":8819,"cc":8497,"obl":7446,"obj":6636,"nmod":5543,"nsubj":3918,"advcl":3876,"det":3859,"iobj":3825,"xcomp":2226,"ccomp":2144,"discourse":2105,"advmod":1915,"appos":1627,"acl":1443,"amod":1361,"cop":1355,"nsubj:pass":710,"orphan":452,"obl:agent":273,"flat:name":248,"vocative":243,"nummod":240,"acl||obj":174,"acl||nsubj":152,"fixed":148,"appos||nsubj":120,"csubj:pass":118,"nmod||obj":116,"conj||nsubj":113,"parataxis":110,"cc||nsubj":110,"nmod||nsubj":102,"conj||obj":101,"cc||obj":92,"appos||obj":84,"amod||obj":79,"case":69,"conj||obl":65,"cc||obl":63,"amod||nsubj":63,"dislocated":59,"det||obj":54,"acl||obl":53,"appos||obl":48,"conj||xcomp":46,"det||nsubj":45,"cop||xcomp":40,"iobj||xcomp":38,"conj||nmod":37,"obl||xcomp":35,"conj||iobj":35,"cc||nmod":35,"cop||ccomp":34,"cc||iobj":33,"cc||xcomp":32,"dep":0},"4":{"ROOT":15014}}�cfg��neg_key�
tagger/cfg CHANGED
@@ -23,7 +23,8 @@
23
  "Px",
24
  "R-",
25
  "S-",
26
- "V-"
 
27
  ],
28
  "neg_prefix":"!",
29
  "overwrite":false
 
23
  "Px",
24
  "R-",
25
  "S-",
26
+ "V-",
27
+ "Z"
28
  ],
29
  "neg_prefix":"!",
30
  "overwrite":false
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0079081301b8dc4832c92400cfea876a8674ea1fb91fef7e5c5cf73cf9e48b06
3
- size 453378326
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766b290e344b548ea87fae03eb56d5ce4303ceea9fd4081f6921adf08b6ec2e1
3
+ size 500074867
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e7fcdb72c0d1d8f0885eb06657b9f82e90747bbf21062f980600ed2923414f8
3
- size 17973476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3726a679fe296082a7268b903f16b1a57d2a5451384596394aa60e1309164585
3
+ size 23343529
vocab/vectors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:649fa469a04ae5093d8e61ce90c9a0272ea09ba48f0ed6ec0803ba21021748e9
3
- size 240000128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14772b683e726436d5948ad3fff2b43d036ef2ebbe3458aafed6004e05a40706
3
+ size 128
vocab/vectors.cfg CHANGED
@@ -1,10 +1,3 @@
1
  {
2
- "mode":"floret",
3
- "minn":2,
4
- "maxn":10,
5
- "hash_count":2,
6
- "hash_seed":2166136261,
7
- "bow":"<",
8
- "eow":">",
9
- "attr":65
10
  }
 
1
  {
2
+ "mode":"default"
 
 
 
 
 
 
 
3
  }