Jacobo commited on
Commit
53cd854
1 Parent(s): 54704b5

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -13,57 +13,57 @@ model-index:
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
- value: 0.9602988573
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
- value: 0.9567096396
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
- value: 0.874890126
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
- value: 0.9265309112
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
- value: 0.7571784354
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Labeled Attachment Score (LAS)
50
  type: f_score
51
- value: 0.7027541752
52
  - task:
53
  name: SENTS
54
  type: token-classification
55
  metrics:
56
  - name: Sentences F-Score
57
  type: f_score
58
- value: 0.5178571429
59
  ---
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_proiel_sm` |
63
- | **Version** | `3.5.2` |
64
- | **spaCy** | `>=3.5.2,<3.6.0` |
65
- | **Default Pipeline** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
66
- | **Components** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
67
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
68
  | **Sources** | n/a |
69
  | **License** | n/a |
@@ -87,16 +87,16 @@ model-index:
87
 
88
  | Type | Score |
89
  | --- | --- |
90
- | `POS_ACC` | 95.67 |
91
- | `MORPH_ACC` | 87.49 |
92
- | `TAG_ACC` | 96.03 |
93
- | `DEP_UAS` | 75.72 |
94
- | `DEP_LAS` | 70.28 |
95
- | `SENTS_P` | 49.68 |
96
- | `SENTS_R` | 54.07 |
97
- | `SENTS_F` | 51.79 |
98
- | `LEMMA_ACC` | 92.65 |
99
- | `TOK2VEC_LOSS` | 3625989.38 |
100
- | `MORPHOLOGIZER_LOSS` | 260805.91 |
101
- | `TAGGER_LOSS` | 82647.60 |
102
- | `PARSER_LOSS` | 2809976.88 |
 
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
+ value: 0.9605186053
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
+ value: 0.9557573982
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
+ value: 0.8745238793
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
+ value: 0.966891298
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
+ value: 0.7632581307
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Labeled Attachment Score (LAS)
50
  type: f_score
51
+ value: 0.706782889
52
  - task:
53
  name: SENTS
54
  type: token-classification
55
  metrics:
56
  - name: Sentences F-Score
57
  type: f_score
58
+ value: 0.5236742424
59
  ---
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_proiel_sm` |
63
+ | **Version** | `3.5.3` |
64
+ | **spaCy** | `>=3.5.3,<3.6.0` |
65
+ | **Default Pipeline** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `lemmatizer`, `attribute_ruler` |
66
+ | **Components** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `lemmatizer`, `attribute_ruler` |
67
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
68
  | **Sources** | n/a |
69
  | **License** | n/a |
 
87
 
88
  | Type | Score |
89
  | --- | --- |
90
+ | `POS_ACC` | 95.89 |
91
+ | `MORPH_ACC` | 88.10 |
92
+ | `TAG_ACC` | 96.24 |
93
+ | `DEP_UAS` | 76.22 |
94
+ | `DEP_LAS` | 70.53 |
95
+ | `SENTS_P` | 51.87 |
96
+ | `SENTS_R` | 54.37 |
97
+ | `SENTS_F` | 53.09 |
98
+ | `LEMMA_ACC` | 96.69 |
99
+ | `TOK2VEC_LOSS` | 4107659.15 |
100
+ | `MORPHOLOGIZER_LOSS` | 127942.48 |
101
+ | `TAGGER_LOSS` | 38984.64 |
102
+ | `PARSER_LOSS` | 1907099.83 |
accuracy.json CHANGED
@@ -3,81 +3,81 @@
3
  "token_p":1.0,
4
  "token_r":1.0,
5
  "token_f":1.0,
6
- "pos_acc":0.9547093285,
7
- "morph_acc":0.8669070152,
8
- "morph_micro_p":0.9396512578,
9
- "morph_micro_r":0.9344743497,
10
- "morph_micro_f":0.9370556537,
11
  "morph_per_feat":{
12
  "Case":{
13
- "p":0.9481647152,
14
- "r":0.9479152966,
15
- "f":0.9480399895
16
  },
17
  "Gender":{
18
- "p":0.9016111992,
19
- "r":0.9053175971,
20
- "f":0.9034605968
21
  },
22
  "Number":{
23
- "p":0.9768393838,
24
- "r":0.9729613734,
25
- "f":0.9748965221
26
  },
27
  "Aspect":{
28
- "p":0.8729710656,
29
- "r":0.8668535389,
30
- "f":0.8699015471
31
  },
32
  "Mood":{
33
- "p":0.9322235434,
34
- "r":0.9132207338,
35
- "f":0.9226243013
36
  },
37
  "Person":{
38
- "p":0.9563369397,
39
- "r":0.9410646388,
40
- "f":0.9486393254
41
  },
42
  "Tense":{
43
- "p":0.879406308,
44
- "r":0.8649635036,
45
- "f":0.872125115
46
  },
47
  "VerbForm":{
48
- "p":0.9599257885,
49
- "r":0.9438161255,
50
- "f":0.9518027962
51
  },
52
  "Voice":{
53
- "p":0.9087198516,
54
- "r":0.8937956204,
55
- "f":0.9011959522
56
  },
57
  "PronType":{
58
- "p":0.9744795165,
59
- "r":0.9705685619,
60
- "f":0.9725201072
61
  },
62
  "Degree":{
63
- "p":0.8539176627,
64
- "r":0.8416230366,
65
- "f":0.8477257746
66
  },
67
  "Definite":{
68
- "p":0.9839779006,
69
- "r":0.9922005571,
70
- "f":0.9880721221
71
  },
72
  "Reflex":{
73
- "p":1.0,
74
- "r":0.9782608696,
75
- "f":0.989010989
76
  },
77
  "Polarity":{
78
- "p":0.990990991,
79
- "r":0.9954751131,
80
- "f":0.993227991
81
  },
82
  "Poss":{
83
  "p":0.875,
@@ -85,122 +85,122 @@
85
  "f":0.6666666667
86
  }
87
  },
88
- "tag_acc":0.9568123779,
89
- "sents_p":0.4986595174,
90
- "sents_r":0.5329512894,
91
- "sents_f":0.5152354571,
92
- "dep_uas":0.751239297,
93
- "dep_las":0.6945320715,
94
  "dep_las_per_type":{
95
  "iobj":{
96
- "p":0.6298701299,
97
- "r":0.6659038902,
98
- "f":0.6473859844
99
  },
100
  "root":{
101
- "p":0.7176050045,
102
- "r":0.7669531996,
103
- "f":0.7414589104
104
  },
105
  "nsubj":{
106
- "p":0.6647727273,
107
- "r":0.6558295964,
108
- "f":0.6602708804
109
  },
110
  "advmod":{
111
- "p":0.6348314607,
112
- "r":0.6277777778,
113
- "f":0.6312849162
114
  },
115
  "advcl":{
116
- "p":0.53125,
117
- "r":0.5638474295,
118
- "f":0.5470635559
119
  },
120
  "ccomp":{
121
- "p":0.4347826087,
122
- "r":0.4320987654,
123
- "f":0.4334365325
124
  },
125
  "discourse":{
126
- "p":0.7661623109,
127
- "r":0.7661623109,
128
- "f":0.7661623109
129
  },
130
  "obj":{
131
- "p":0.6868905742,
132
- "r":0.6990077178,
133
- "f":0.6928961749
134
  },
135
  "det":{
136
- "p":0.9027595269,
137
- "r":0.9043440105,
138
- "f":0.9035510741
139
  },
140
  "nmod":{
141
- "p":0.6595330739,
142
- "r":0.6544401544,
143
- "f":0.6569767442
144
  },
145
  "cop":{
146
- "p":0.6681222707,
147
- "r":0.7216981132,
148
- "f":0.693877551
149
  },
150
  "appos":{
151
- "p":0.3578947368,
152
- "r":0.2073170732,
153
- "f":0.2625482625
154
  },
155
  "case":{
156
- "p":0.9270482604,
157
- "r":0.9397042093,
158
- "f":0.9333333333
159
  },
160
  "acl":{
161
- "p":0.4322033898,
162
- "r":0.3109756098,
163
- "f":0.3617021277
164
  },
165
  "mark":{
166
- "p":0.7918552036,
167
- "r":0.8101851852,
168
- "f":0.8009153318
169
  },
170
  "obl":{
171
- "p":0.6398514851,
172
  "r":0.6494974874,
173
- "f":0.644638404
174
  },
175
  "nsubj:pass":{
176
- "p":0.5324675325,
177
- "r":0.4315789474,
178
- "f":0.476744186
179
  },
180
  "xcomp":{
181
- "p":0.5265700483,
182
- "r":0.4541666667,
183
- "f":0.4876957494
184
  },
185
  "cc":{
186
- "p":0.5874125874,
187
- "r":0.5821782178,
188
- "f":0.5847836897
189
  },
190
  "conj":{
191
- "p":0.4885496183,
192
- "r":0.4987012987,
193
- "f":0.4935732648
194
  },
195
  "dislocated":{
196
- "p":0.5,
197
- "r":0.0714285714,
198
- "f":0.125
199
  },
200
  "amod":{
201
- "p":0.6739130435,
202
- "r":0.5740740741,
203
- "f":0.62
204
  },
205
  "parataxis":{
206
  "p":0.0,
@@ -213,14 +213,19 @@
213
  "f":0.0
214
  },
215
  "nummod":{
216
- "p":0.7887323944,
217
- "r":0.8,
218
- "f":0.7943262411
219
  },
220
  "fixed":{
221
- "p":0.6666666667,
222
- "r":0.5,
223
- "f":0.5714285714
 
 
 
 
 
224
  },
225
  "csubj:pass":{
226
  "p":0.0,
@@ -228,19 +233,14 @@
228
  "f":0.0
229
  },
230
  "obl:agent":{
231
- "p":1.0,
232
  "r":0.3181818182,
233
- "f":0.4827586207
234
- },
235
- "orphan":{
236
- "p":0.125,
237
- "r":0.0571428571,
238
- "f":0.0784313725
239
  },
240
  "vocative":{
241
- "p":0.7346938776,
242
- "r":0.5901639344,
243
- "f":0.6545454545
244
  },
245
  "aux":{
246
  "p":0.0,
@@ -258,6 +258,6 @@
258
  "f":0.8571428571
259
  }
260
  },
261
- "lemma_acc":0.9247408743,
262
- "speed":6887.5650416149
263
  }
 
3
  "token_p":1.0,
4
  "token_r":1.0,
5
  "token_f":1.0,
6
+ "pos_acc":0.9522307346,
7
+ "morph_acc":0.8679585399,
8
+ "morph_micro_p":0.9386159703,
9
+ "morph_micro_r":0.9353097905,
10
+ "morph_micro_f":0.9369599638,
11
  "morph_per_feat":{
12
  "Case":{
13
+ "p":0.9519483939,
14
+ "r":0.9510719453,
15
+ "f":0.9515099678
16
  },
17
  "Gender":{
18
+ "p":0.8993377483,
19
+ "r":0.9004110861,
20
+ "f":0.8998740971
21
  },
22
  "Number":{
23
+ "p":0.9764389457,
24
+ "r":0.9738197425,
25
+ "f":0.9751275853
26
  },
27
  "Aspect":{
28
+ "p":0.8838028169,
29
+ "r":0.8794674142,
30
+ "f":0.8816297857
31
  },
32
  "Mood":{
33
+ "p":0.9252501471,
34
+ "r":0.9155503786,
35
+ "f":0.9203747073
36
  },
37
  "Person":{
38
+ "p":0.9468003084,
39
+ "r":0.9338403042,
40
+ "f":0.9402756508
41
  },
42
  "Tense":{
43
+ "p":0.8789996322,
44
+ "r":0.8722627737,
45
+ "f":0.8756182451
46
  },
47
  "VerbForm":{
48
+ "p":0.9532916513,
49
+ "r":0.9456402773,
50
+ "f":0.9494505495
51
  },
52
  "Voice":{
53
+ "p":0.9106289077,
54
+ "r":0.903649635,
55
+ "f":0.9071258472
56
  },
57
  "PronType":{
58
+ "p":0.9730549006,
59
+ "r":0.9662207358,
60
+ "f":0.9696257761
61
  },
62
  "Degree":{
63
+ "p":0.8448500652,
64
+ "r":0.8481675393,
65
+ "f":0.8465055519
66
  },
67
  "Definite":{
68
+ "p":0.9812568908,
69
+ "r":0.991643454,
70
+ "f":0.9864228318
71
  },
72
  "Reflex":{
73
+ "p":0.9777777778,
74
+ "r":0.9565217391,
75
+ "f":0.967032967
76
  },
77
  "Polarity":{
78
+ "p":0.995412844,
79
+ "r":0.9819004525,
80
+ "f":0.9886104784
81
  },
82
  "Poss":{
83
  "p":0.875,
 
85
  "f":0.6666666667
86
  }
87
  },
88
+ "tag_acc":0.95598618,
89
+ "sents_p":0.5046040516,
90
+ "sents_r":0.523400191,
91
+ "sents_f":0.513830286,
92
+ "dep_uas":0.7510139703,
93
+ "dep_las":0.6927294577,
94
  "dep_las_per_type":{
95
  "iobj":{
96
+ "p":0.6517647059,
97
+ "r":0.6338672769,
98
+ "f":0.6426914153
99
  },
100
  "root":{
101
+ "p":0.7246777164,
102
+ "r":0.7516714422,
103
+ "f":0.7379278012
104
  },
105
  "nsubj":{
106
+ "p":0.6446927374,
107
+ "r":0.6468609865,
108
+ "f":0.645775042
109
  },
110
  "advmod":{
111
+ "p":0.633744856,
112
+ "r":0.6416666667,
113
+ "f":0.6376811594
114
  },
115
  "advcl":{
116
+ "p":0.5542168675,
117
+ "r":0.6102819237,
118
+ "f":0.5808997632
119
  },
120
  "ccomp":{
121
+ "p":0.4700854701,
122
+ "r":0.3395061728,
123
+ "f":0.394265233
124
  },
125
  "discourse":{
126
+ "p":0.7565698479,
127
+ "r":0.7524071527,
128
+ "f":0.7544827586
129
  },
130
  "obj":{
131
+ "p":0.7187851519,
132
+ "r":0.7045203969,
133
+ "f":0.7115812918
134
  },
135
  "det":{
136
+ "p":0.9036197122,
137
+ "r":0.9091706889,
138
+ "f":0.9063867017
139
  },
140
  "nmod":{
141
+ "p":0.6400742115,
142
+ "r":0.666023166,
143
+ "f":0.6527909177
144
  },
145
  "cop":{
146
+ "p":0.6457399103,
147
+ "r":0.679245283,
148
+ "f":0.6620689655
149
  },
150
  "appos":{
151
+ "p":0.4,
152
+ "r":0.256097561,
153
+ "f":0.312267658
154
  },
155
  "case":{
156
+ "p":0.9377123443,
157
+ "r":0.9419795222,
158
+ "f":0.9398410897
159
  },
160
  "acl":{
161
+ "p":0.4307692308,
162
+ "r":0.3414634146,
163
+ "f":0.380952381
164
  },
165
  "mark":{
166
+ "p":0.7863636364,
167
+ "r":0.8009259259,
168
+ "f":0.7935779817
169
  },
170
  "obl":{
171
+ "p":0.6577608142,
172
  "r":0.6494974874,
173
+ "f":0.6536030341
174
  },
175
  "nsubj:pass":{
176
+ "p":0.5222222222,
177
+ "r":0.4947368421,
178
+ "f":0.5081081081
179
  },
180
  "xcomp":{
181
+ "p":0.5155555556,
182
+ "r":0.4833333333,
183
+ "f":0.4989247312
184
  },
185
  "cc":{
186
+ "p":0.5595595596,
187
+ "r":0.5534653465,
188
+ "f":0.556495769
189
  },
190
  "conj":{
191
+ "p":0.4795783926,
192
+ "r":0.4727272727,
193
+ "f":0.4761281884
194
  },
195
  "dislocated":{
196
+ "p":0.0,
197
+ "r":0.0,
198
+ "f":0.0
199
  },
200
  "amod":{
201
+ "p":0.5900900901,
202
+ "r":0.6064814815,
203
+ "f":0.598173516
204
  },
205
  "parataxis":{
206
  "p":0.0,
 
213
  "f":0.0
214
  },
215
  "nummod":{
216
+ "p":0.8428571429,
217
+ "r":0.8428571429,
218
+ "f":0.8428571429
219
  },
220
  "fixed":{
221
+ "p":0.5,
222
+ "r":0.75,
223
+ "f":0.6
224
+ },
225
+ "orphan":{
226
+ "p":0.0714285714,
227
+ "r":0.0571428571,
228
+ "f":0.0634920635
229
  },
230
  "csubj:pass":{
231
  "p":0.0,
 
233
  "f":0.0
234
  },
235
  "obl:agent":{
236
+ "p":0.7,
237
  "r":0.3181818182,
238
+ "f":0.4375
 
 
 
 
 
239
  },
240
  "vocative":{
241
+ "p":0.74,
242
+ "r":0.606557377,
243
+ "f":0.6666666667
244
  },
245
  "aux":{
246
  "p":0.0,
 
258
  "f":0.8571428571
259
  }
260
  },
261
+ "lemma_acc":0.9646237044,
262
+ "speed":3911.0083510692
263
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -10,7 +10,7 @@ seed = 0
10
 
11
  [nlp]
12
  lang = "grc"
13
- pipeline = ["tok2vec","morphologizer","tagger","parser","senter","lemmatizer","attribute_ruler"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
@@ -86,26 +86,6 @@ nO = null
86
  width = ${components.tok2vec.model.encode.width}
87
  upstream = "tok2vec"
88
 
89
- [components.senter]
90
- factory = "senter"
91
- overwrite = false
92
- scorer = {"@scorers":"spacy.senter_scorer.v1"}
93
-
94
- [components.senter.model]
95
- @architectures = "spacy.Tagger.v2"
96
- nO = null
97
- normalize = false
98
-
99
- [components.senter.model.tok2vec]
100
- @architectures = "spacy.HashEmbedCNN.v2"
101
- pretrained_vectors = null
102
- width = 12
103
- depth = 1
104
- embed_size = 2000
105
- window_size = 1
106
- maxout_pieces = 2
107
- subword_features = true
108
-
109
  [components.tagger]
110
  factory = "tagger"
111
  neg_prefix = "!"
@@ -171,7 +151,7 @@ patience = 5000
171
  max_epochs = 0
172
  max_steps = 20000
173
  eval_frequency = 200
174
- frozen_components = ["lemmatizer","senter"]
175
  annotating_components = []
176
  before_to_disk = null
177
  before_update = null
@@ -191,7 +171,7 @@ t = 0.0
191
 
192
  [training.logger]
193
  @loggers = "spacy.WandbLogger.v3"
194
- project_name = "proiel"
195
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
196
  log_dataset_dir = "./corpus"
197
  model_log_interval = 1000
 
10
 
11
  [nlp]
12
  lang = "grc"
13
+ pipeline = ["tok2vec","morphologizer","tagger","parser","lemmatizer","attribute_ruler"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
 
86
  width = ${components.tok2vec.model.encode.width}
87
  upstream = "tok2vec"
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  [components.tagger]
90
  factory = "tagger"
91
  neg_prefix = "!"
 
151
  max_epochs = 0
152
  max_steps = 20000
153
  eval_frequency = 200
154
+ frozen_components = ["lemmatizer"]
155
  annotating_components = []
156
  before_to_disk = null
157
  before_update = null
 
171
 
172
  [training.logger]
173
  @loggers = "spacy.WandbLogger.v3"
174
+ project_name = "greCy"
175
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
176
  log_dataset_dir = "./corpus"
177
  model_log_interval = 1000
grc_proiel_sm-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f268ee61c24883c7957b96c24d93f3a0390e57f6398979a5cef32aea91dbee3
3
- size 59722529
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca9dc9f460a7ee7eb9c5946a3c15c14d3a3690df38ef2a69693a851620490e6
3
+ size 60071653
lemmatizer/cfg CHANGED
The diff for this file is too large to render. See raw diff
 
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:930386d436927c541305cb480afa8a4ca75d03894abfee89a23fd4e76e8c6d73
3
- size 24138324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7943319b989596449853f63213d3598cab16ee8576e94756e1406f352f95469c
3
+ size 24263260
lemmatizer/trees CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:889486752498ce47ce48b7888f1249c92d5fb5c505a20574ad38a4a77476c9b1
3
- size 4833057
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b4cacd6282ecfa887b731fe8aed793709e236050f81662e72cd06fe73a6458
3
+ size 5318689
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"grc",
3
  "name":"proiel_sm",
4
- "version":"3.5.2",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.5.2,<3.6.0",
11
- "spacy_git_version":"Unknown",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -1117,7 +1117,6 @@
1117
  "morphologizer",
1118
  "tagger",
1119
  "parser",
1120
- "senter",
1121
  "lemmatizer",
1122
  "attribute_ruler"
1123
  ],
@@ -1126,7 +1125,6 @@
1126
  "morphologizer",
1127
  "tagger",
1128
  "parser",
1129
- "senter",
1130
  "lemmatizer",
1131
  "attribute_ruler"
1132
  ],
@@ -1134,33 +1132,33 @@
1134
 
1135
  ],
1136
  "performance":{
1137
- "pos_acc":0.9567096396,
1138
- "morph_acc":0.874890126,
1139
  "morph_per_feat":{
1140
  "Case":{
1141
- "p":0.9508922921,
1142
- "r":0.9481322564,
1143
- "f":0.9495102686
1144
  },
1145
  "Gender":{
1146
- "p":0.9039268014,
1147
- "r":0.9076177109,
1148
- "f":0.9057684961
1149
  },
1150
  "Number":{
1151
- "p":0.9815295216,
1152
- "r":0.9780787923,
1153
- "f":0.9798011187
1154
  },
1155
  "Person":{
1156
- "p":0.9555555556,
1157
- "r":0.9523809524,
1158
- "f":0.9539656129
1159
  },
1160
  "PronType":{
1161
- "p":0.9743005856,
1162
- "r":0.9724025974,
1163
- "f":0.9733506662
1164
  },
1165
  "Polarity":{
1166
  "p":1.0,
@@ -1168,39 +1166,39 @@
1168
  "f":0.9921671018
1169
  },
1170
  "Aspect":{
1171
- "p":0.9124343257,
1172
- "r":0.8870601589,
1173
- "f":0.8995683453
1174
  },
1175
  "Mood":{
1176
- "p":0.948156682,
1177
- "r":0.9421866056,
1178
- "f":0.9451622165
1179
  },
1180
  "Tense":{
1181
- "p":0.8952446589,
1182
- "r":0.8924768121,
1183
- "f":0.8938585928
1184
  },
1185
  "VerbForm":{
1186
- "p":0.9634734666,
1187
- "r":0.9598352214,
1188
- "f":0.9616509028
1189
  },
1190
  "Voice":{
1191
- "p":0.916609235,
1192
- "r":0.9137753349,
1193
- "f":0.9151900912
1194
  },
1195
  "Degree":{
1196
- "p":0.8770131772,
1197
- "r":0.8051075269,
1198
- "f":0.8395234758
1199
  },
1200
  "Definite":{
1201
- "p":0.9855537721,
1202
- "r":0.9951377634,
1203
- "f":0.9903225806
1204
  },
1205
  "Reflex":{
1206
  "p":1.0,
@@ -1208,164 +1206,164 @@
1208
  "f":1.0
1209
  },
1210
  "Poss":{
1211
- "p":0.9230769231,
1212
- "r":0.6315789474,
1213
- "f":0.75
1214
  }
1215
  },
1216
- "tag_acc":0.9602988573,
1217
- "dep_uas":0.7571784354,
1218
- "dep_las":0.7027541752,
1219
  "dep_las_per_type":{
1220
  "nsubj":{
1221
- "p":0.6351351351,
1222
- "r":0.661971831,
1223
- "f":0.6482758621
1224
  },
1225
  "discourse":{
1226
- "p":0.7537619699,
1227
- "r":0.7568681319,
1228
- "f":0.7553118574
1229
  },
1230
  "mark":{
1231
- "p":0.8008298755,
1232
- "r":0.7782258065,
1233
- "f":0.7893660532
1234
  },
1235
  "advmod":{
1236
- "p":0.6583941606,
1237
- "r":0.6701337296,
1238
- "f":0.6642120766
1239
  },
1240
  "advcl":{
1241
- "p":0.6144578313,
1242
- "r":0.6270491803,
1243
- "f":0.6206896552
1244
  },
1245
  "xcomp":{
1246
- "p":0.476635514,
1247
- "r":0.408,
1248
- "f":0.4396551724
1249
  },
1250
  "cop":{
1251
- "p":0.668161435,
1252
- "r":0.6962616822,
1253
- "f":0.6819221968
1254
  },
1255
  "root":{
1256
- "p":0.7114517583,
1257
- "r":0.7742885182,
1258
- "f":0.7415413534
1259
  },
1260
  "det":{
1261
- "p":0.8999133449,
1262
- "r":0.9006938422,
1263
- "f":0.9003034244
1264
  },
1265
  "nmod":{
1266
- "p":0.6956521739,
1267
  "r":0.6478873239,
1268
- "f":0.6709206928
1269
  },
1270
  "obj":{
1271
- "p":0.7286432161,
1272
- "r":0.7528556594,
1273
- "f":0.7405515832
1274
  },
1275
  "case":{
1276
- "p":0.939524838,
1277
- "r":0.9539473684,
1278
- "f":0.9466811752
1279
  },
1280
  "obl":{
1281
- "p":0.6840826245,
1282
- "r":0.6874236874,
1283
- "f":0.6857490865
1284
  },
1285
  "cc":{
1286
- "p":0.5803757829,
1287
- "r":0.5720164609,
1288
- "f":0.5761658031
1289
  },
1290
  "conj":{
1291
- "p":0.5060240964,
1292
  "r":0.4960629921,
1293
- "f":0.5009940358
1294
  },
1295
  "obl:agent":{
1296
- "p":0.5909090909,
1297
- "r":0.3513513514,
1298
- "f":0.4406779661
1299
  },
1300
  "ccomp":{
1301
- "p":0.48,
1302
- "r":0.4179104478,
1303
- "f":0.4468085106
1304
  },
1305
  "nsubj:pass":{
1306
- "p":0.5479452055,
1307
- "r":0.3738317757,
1308
- "f":0.4444444444
1309
  },
1310
  "amod":{
1311
- "p":0.6871165644,
1312
- "r":0.5258215962,
1313
- "f":0.5957446809
1314
  },
1315
  "acl":{
1316
- "p":0.4227642276,
1317
- "r":0.3151515152,
1318
- "f":0.3611111111
1319
  },
1320
  "iobj":{
1321
- "p":0.6423982869,
1322
- "r":0.6928406467,
1323
  "f":0.6666666667
1324
  },
1325
- "appos":{
1326
- "p":0.3425925926,
1327
- "r":0.2624113475,
1328
- "f":0.297188755
1329
  },
1330
  "dep":{
1331
  "p":0.0,
1332
  "r":0.0,
1333
  "f":0.0
1334
  },
 
 
 
 
 
1335
  "nummod":{
1336
- "p":0.8032786885,
1337
- "r":0.7205882353,
1338
- "f":0.7596899225
1339
  },
1340
  "vocative":{
1341
- "p":0.7636363636,
1342
- "r":0.6086956522,
1343
- "f":0.6774193548
1344
- },
1345
- "orphan":{
1346
- "p":0.1428571429,
1347
- "r":0.0465116279,
1348
- "f":0.0701754386
1349
  },
1350
- "parataxis":{
1351
- "p":0.0,
1352
- "r":0.0,
1353
- "f":0.0
1354
  },
1355
  "flat:name":{
1356
- "p":0.7142857143,
1357
- "r":0.4545454545,
1358
- "f":0.5555555556
1359
  },
1360
  "dislocated":{
1361
- "p":0.4,
1362
- "r":0.1538461538,
1363
- "f":0.2222222222
1364
  },
1365
  "csubj:pass":{
1366
- "p":0.25,
1367
- "r":0.2,
1368
- "f":0.2222222222
1369
  },
1370
  "aux:pass":{
1371
  "p":0.0,
@@ -1373,9 +1371,9 @@
1373
  "f":0.0
1374
  },
1375
  "fixed":{
1376
- "p":0.75,
1377
  "r":0.6,
1378
- "f":0.6666666667
1379
  },
1380
  "aux":{
1381
  "p":0.0,
@@ -1383,14 +1381,14 @@
1383
  "f":0.0
1384
  }
1385
  },
1386
- "sents_p":0.4968440036,
1387
- "sents_r":0.5407262022,
1388
- "sents_f":0.5178571429,
1389
- "lemma_acc":0.9265309112,
1390
- "tok2vec_loss":36259.8938428532,
1391
- "morphologizer_loss":2608.0591230392,
1392
- "tagger_loss":826.4760194123,
1393
- "parser_loss":28099.7688408634
1394
  },
1395
  "requirements":[
1396
 
 
1
  {
2
  "lang":"grc",
3
  "name":"proiel_sm",
4
+ "version":"3.5.3",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.5.3,<3.6.0",
11
+ "spacy_git_version":"512241e12",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
1117
  "morphologizer",
1118
  "tagger",
1119
  "parser",
 
1120
  "lemmatizer",
1121
  "attribute_ruler"
1122
  ],
 
1125
  "morphologizer",
1126
  "tagger",
1127
  "parser",
 
1128
  "lemmatizer",
1129
  "attribute_ruler"
1130
  ],
 
1132
 
1133
  ],
1134
  "performance":{
1135
+ "pos_acc":0.9557573982,
1136
+ "morph_acc":0.8745238793,
1137
  "morph_per_feat":{
1138
  "Case":{
1139
+ "p":0.9518209408,
1140
+ "r":0.9498990409,
1141
+ "f":0.9508590197
1142
  },
1143
  "Gender":{
1144
+ "p":0.905669972,
1145
+ "r":0.9090213092,
1146
+ "f":0.907342546
1147
  },
1148
  "Number":{
1149
+ "p":0.9802863665,
1150
+ "r":0.9769413711,
1151
+ "f":0.9786110104
1152
  },
1153
  "Person":{
1154
+ "p":0.9588169225,
1155
+ "r":0.9453672942,
1156
+ "f":0.9520446097
1157
  },
1158
  "PronType":{
1159
+ "p":0.9773918742,
1160
+ "r":0.9685064935,
1161
+ "f":0.9729288976
1162
  },
1163
  "Polarity":{
1164
  "p":1.0,
 
1166
  "f":0.9921671018
1167
  },
1168
  "Aspect":{
1169
+ "p":0.9186460808,
1170
+ "r":0.8779795687,
1171
+ "f":0.8978525827
1172
  },
1173
  "Mood":{
1174
+ "p":0.9375722543,
1175
+ "r":0.9284487693,
1176
+ "f":0.9329882082
1177
  },
1178
  "Tense":{
1179
+ "p":0.891966759,
1180
+ "r":0.8849192717,
1181
+ "f":0.8884290395
1182
  },
1183
  "VerbForm":{
1184
+ "p":0.9650277008,
1185
+ "r":0.9567456231,
1186
+ "f":0.9608688157
1187
  },
1188
  "Voice":{
1189
+ "p":0.9193213296,
1190
+ "r":0.9120577121,
1191
+ "f":0.9156751164
1192
  },
1193
  "Degree":{
1194
+ "p":0.8700564972,
1195
+ "r":0.8279569892,
1196
+ "f":0.8484848485
1197
  },
1198
  "Definite":{
1199
+ "p":0.9829333333,
1200
+ "r":0.9956780119,
1201
+ "f":0.9892646269
1202
  },
1203
  "Reflex":{
1204
  "p":1.0,
 
1206
  "f":1.0
1207
  },
1208
  "Poss":{
1209
+ "p":1.0,
1210
+ "r":0.6842105263,
1211
+ "f":0.8125
1212
  }
1213
  },
1214
+ "tag_acc":0.9605186053,
1215
+ "dep_uas":0.7632581307,
1216
+ "dep_las":0.706782889,
1217
  "dep_las_per_type":{
1218
  "nsubj":{
1219
+ "p":0.664910432,
1220
+ "r":0.6836403034,
1221
+ "f":0.6741452991
1222
  },
1223
  "discourse":{
1224
+ "p":0.7728531856,
1225
+ "r":0.7664835165,
1226
+ "f":0.7696551724
1227
  },
1228
  "mark":{
1229
+ "p":0.756302521,
1230
+ "r":0.7258064516,
1231
+ "f":0.7407407407
1232
  },
1233
  "advmod":{
1234
+ "p":0.6637931034,
1235
+ "r":0.6864784547,
1236
+ "f":0.6749452155
1237
  },
1238
  "advcl":{
1239
+ "p":0.5973509934,
1240
+ "r":0.6161202186,
1241
+ "f":0.6065904506
1242
  },
1243
  "xcomp":{
1244
+ "p":0.5022222222,
1245
+ "r":0.452,
1246
+ "f":0.4757894737
1247
  },
1248
  "cop":{
1249
+ "p":0.7417840376,
1250
+ "r":0.738317757,
1251
+ "f":0.7400468384
1252
  },
1253
  "root":{
1254
+ "p":0.725526075,
1255
+ "r":0.7782139352,
1256
+ "f":0.7509469697
1257
  },
1258
  "det":{
1259
+ "p":0.9003864319,
1260
+ "r":0.909366869,
1261
+ "f":0.9048543689
1262
  },
1263
  "nmod":{
1264
+ "p":0.64,
1265
  "r":0.6478873239,
1266
+ "f":0.6439195101
1267
  },
1268
  "obj":{
1269
+ "p":0.7393117831,
1270
+ "r":0.7362409138,
1271
+ "f":0.737773153
1272
  },
1273
  "case":{
1274
+ "p":0.9390642002,
1275
+ "r":0.9462719298,
1276
+ "f":0.9426542873
1277
  },
1278
  "obl":{
1279
+ "p":0.6793611794,
1280
+ "r":0.6752136752,
1281
+ "f":0.6772810778
1282
  },
1283
  "cc":{
1284
+ "p":0.5799793602,
1285
+ "r":0.5781893004,
1286
+ "f":0.5790829469
1287
  },
1288
  "conj":{
1289
+ "p":0.5361702128,
1290
  "r":0.4960629921,
1291
+ "f":0.5153374233
1292
  },
1293
  "obl:agent":{
1294
+ "p":0.8888888889,
1295
+ "r":0.4324324324,
1296
+ "f":0.5818181818
1297
  },
1298
  "ccomp":{
1299
+ "p":0.432748538,
1300
+ "r":0.368159204,
1301
+ "f":0.3978494624
1302
  },
1303
  "nsubj:pass":{
1304
+ "p":0.5274725275,
1305
+ "r":0.4485981308,
1306
+ "f":0.4848484848
1307
  },
1308
  "amod":{
1309
+ "p":0.6176470588,
1310
+ "r":0.5915492958,
1311
+ "f":0.6043165468
1312
  },
1313
  "acl":{
1314
+ "p":0.4066666667,
1315
+ "r":0.3696969697,
1316
+ "f":0.3873015873
1317
  },
1318
  "iobj":{
1319
+ "p":0.6753554502,
1320
+ "r":0.6581986143,
1321
  "f":0.6666666667
1322
  },
1323
+ "parataxis":{
1324
+ "p":0.125,
1325
+ "r":0.05,
1326
+ "f":0.0714285714
1327
  },
1328
  "dep":{
1329
  "p":0.0,
1330
  "r":0.0,
1331
  "f":0.0
1332
  },
1333
+ "orphan":{
1334
+ "p":0.1578947368,
1335
+ "r":0.0697674419,
1336
+ "f":0.0967741935
1337
+ },
1338
  "nummod":{
1339
+ "p":0.78125,
1340
+ "r":0.7352941176,
1341
+ "f":0.7575757576
1342
  },
1343
  "vocative":{
1344
+ "p":0.6923076923,
1345
+ "r":0.652173913,
1346
+ "f":0.671641791
 
 
 
 
 
1347
  },
1348
+ "appos":{
1349
+ "p":0.3596491228,
1350
+ "r":0.2907801418,
1351
+ "f":0.3215686275
1352
  },
1353
  "flat:name":{
1354
+ "p":0.8,
1355
+ "r":0.5454545455,
1356
+ "f":0.6486486486
1357
  },
1358
  "dislocated":{
1359
+ "p":0.4545454545,
1360
+ "r":0.1923076923,
1361
+ "f":0.2702702703
1362
  },
1363
  "csubj:pass":{
1364
+ "p":0.0,
1365
+ "r":0.0,
1366
+ "f":0.0
1367
  },
1368
  "aux:pass":{
1369
  "p":0.0,
 
1371
  "f":0.0
1372
  },
1373
  "fixed":{
1374
+ "p":0.5454545455,
1375
  "r":0.6,
1376
+ "f":0.5714285714
1377
  },
1378
  "aux":{
1379
  "p":0.0,
 
1381
  "f":0.0
1382
  }
1383
  },
1384
+ "sents_p":0.505946935,
1385
+ "sents_r":0.5426889107,
1386
+ "sents_f":0.5236742424,
1387
+ "lemma_acc":0.966891298,
1388
+ "tok2vec_loss":41113.3756506196,
1389
+ "morphologizer_loss":1305.2082611322,
1390
+ "tagger_loss":386.3325280659,
1391
+ "parser_loss":19471.5545363693
1392
  },
1393
  "requirements":[
1394
 
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e85469af6ab0c82c2e26235682a66ce6f5f11c361ad71e5ed8e919d05ff5093
3
  size 1058262
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b7ee6c20379d3c111219186138f1d3c73cfc003b04be262c3d2b24639b0eb1f
3
  size 1058262
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7d14f0614908730204629df551833e7ce45ae35e285793657ac3cc4306b9eec
3
  size 1782009
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3eb61c9d2bc2165c69fec042210aca3c5e21f3eaa13802090e1517c0142dffb
3
  size 1782009
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:556f6f56b28473ed6c184b7f0cbf67c70093298e74bece927d83f37897138345
3
  size 34875837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cb4218a151193e173376f503b898f17b2f7072d1bd1e0eef0d44b26f732cbd2
3
  size 34875837
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8576f3f8599b2427e01b94d56a4dff756c38b196234b22bbdb2aa05a7990523
3
- size 12456672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6495dc4bdfaf7e158ce3d30a90606674a3df4dcd5b13bb716ff2ad1fca297a65
3
+ size 17984960