Jacobo commited on
Commit
0b87d4f
1 Parent(s): 1f93c5c

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -39,3 +39,4 @@ morphologizer/model filter=lfs diff=lfs merge=lfs -text
39
  parser/model filter=lfs diff=lfs merge=lfs -text
40
  transformer/model filter=lfs diff=lfs merge=lfs -text
41
  vocab/strings.json filter=lfs diff=lfs merge=lfs -text
 
 
39
  parser/model filter=lfs diff=lfs merge=lfs -text
40
  transformer/model filter=lfs diff=lfs merge=lfs -text
41
  vocab/strings.json filter=lfs diff=lfs merge=lfs -text
42
+ vocab/vectors filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -13,35 +13,35 @@ model-index:
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
- value: 0.9821271609
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
- value: 0.9811016701
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
- value: 0.9390565485
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
- value: 0.966891298
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
- value: 0.8559185467
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
@@ -55,16 +55,16 @@ model-index:
55
  metrics:
56
  - name: Sentences F-Score
57
  type: f_score
58
- value: 0.6955280685
59
  ---
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_proiel_trf` |
63
- | **Version** | `3.5.3` |
64
- | **spaCy** | `>=3.5.3,<3.6.0` |
65
  | **Default Pipeline** | `transformer`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
66
  | **Components** | `transformer`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
67
- | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
68
  | **Sources** | n/a |
69
  | **License** | n/a |
70
  | **Author** | [n/a]() |
@@ -87,17 +87,17 @@ model-index:
87
 
88
  | Type | Score |
89
  | --- | --- |
90
- | `POS_ACC` | 98.11 |
91
- | `MORPH_ACC` | 93.91 |
92
- | `TAG_ACC` | 98.21 |
93
- | `DEP_UAS` | 85.59 |
94
  | `DEP_LAS` | 82.30 |
95
- | `SENTS_P` | 67.50 |
96
- | `SENTS_R` | 71.74 |
97
- | `SENTS_F` | 69.55 |
98
- | `LEMMA_ACC` | 96.69 |
99
- | `TRANSFORMER_LOSS` | 1805484.27 |
100
- | `MORPHOLOGIZER_LOSS` | 332984.27 |
101
- | `TAGGER_LOSS` | 281293.80 |
102
- | `PARSER_LOSS` | 2539504.11 |
103
- | `SENTER_LOSS` | 2409406.81 |
 
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
+ value: 0.9852768825
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
+ value: 0.9845443891
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
+ value: 0.9417667741
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
+ value: 0.9656460592
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
+ value: 0.8578962789
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
 
55
  metrics:
56
  - name: Sentences F-Score
57
  type: f_score
58
+ value: 0.7273612463
59
  ---
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_proiel_trf` |
63
+ | **Version** | `3.6.0` |
64
+ | **spaCy** | `>=3.6.0,<3.7.0` |
65
  | **Default Pipeline** | `transformer`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
66
  | **Components** | `transformer`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
67
+ | **Vectors** | -1 keys, 200000 unique vectors (300 dimensions) |
68
  | **Sources** | n/a |
69
  | **License** | n/a |
70
  | **Author** | [n/a]() |
 
87
 
88
  | Type | Score |
89
  | --- | --- |
90
+ | `POS_ACC` | 98.45 |
91
+ | `MORPH_ACC` | 94.18 |
92
+ | `TAG_ACC` | 98.53 |
93
+ | `DEP_UAS` | 85.79 |
94
  | `DEP_LAS` | 82.30 |
95
+ | `SENTS_P` | 72.17 |
96
+ | `SENTS_R` | 73.31 |
97
+ | `SENTS_F` | 72.74 |
98
+ | `LEMMA_ACC` | 96.56 |
99
+ | `TRANSFORMER_LOSS` | 1073869.67 |
100
+ | `MORPHOLOGIZER_LOSS` | 10669.96 |
101
+ | `TAGGER_LOSS` | 3055.26 |
102
+ | `PARSER_LOSS` | 2280839.70 |
103
+ | `SENTER_LOSS` | 2448545.61 |
accuracy.json CHANGED
@@ -3,51 +3,51 @@
3
  "token_p":1.0,
4
  "token_r":1.0,
5
  "token_f":1.0,
6
- "pos_acc":0.9794201592,
7
- "morph_acc":0.9387111311,
8
- "morph_micro_p":0.980821794,
9
- "morph_micro_r":0.9804010116,
10
- "morph_micro_f":0.9806113577,
11
  "morph_per_feat":{
12
  "Case":{
13
- "p":0.9874934176,
14
- "r":0.9865842431,
15
- "f":0.987038621
16
  },
17
  "Gender":{
18
- "p":0.9403991014,
19
- "r":0.9436414269,
20
- "f":0.9420174742
21
  },
22
  "Number":{
23
- "p":0.9954887218,
24
- "r":0.9944206009,
25
- "f":0.9949543747
26
  },
27
  "Aspect":{
28
- "p":0.9852320675,
29
- "r":0.981779958,
30
- "f":0.9835029835
31
  },
32
  "Mood":{
33
- "p":0.9906651109,
34
- "r":0.9889341875,
35
- "f":0.9897988925
36
  },
37
  "Person":{
38
- "p":0.9904543719,
39
- "r":0.9863117871,
40
- "f":0.9883787388
41
  },
42
  "Tense":{
43
- "p":0.9850146199,
44
- "r":0.9835766423,
45
- "f":0.9842951059
46
  },
47
  "VerbForm":{
48
- "p":0.9963450292,
49
- "r":0.9945275447,
50
- "f":0.9954354574
51
  },
52
  "Voice":{
53
  "p":0.9773391813,
@@ -60,14 +60,14 @@
60
  "f":0.9862554475
61
  },
62
  "Degree":{
63
- "p":0.9476372925,
64
- "r":0.9712041885,
65
- "f":0.9592760181
66
  },
67
  "Definite":{
68
- "p":0.9944320713,
69
- "r":0.9949860724,
70
- "f":0.9947089947
71
  },
72
  "Reflex":{
73
  "p":1.0,
@@ -81,126 +81,126 @@
81
  },
82
  "Poss":{
83
  "p":1.0,
84
- "r":1.0,
85
- "f":1.0
86
  }
87
  },
88
- "tag_acc":0.9804716839,
89
- "sents_p":0.6654445463,
90
- "sents_r":0.6934097421,
91
- "sents_f":0.6791393826,
92
- "dep_uas":0.8455760853,
93
- "dep_las":0.8112513144,
94
  "dep_las_per_type":{
95
  "iobj":{
96
- "p":0.7904328018,
97
- "r":0.7940503432,
98
- "f":0.7922374429
99
  },
100
  "root":{
101
- "p":0.8350137489,
102
- "r":0.8701050621,
103
- "f":0.8521983162
104
  },
105
  "nsubj":{
106
- "p":0.8205714286,
107
- "r":0.8049327354,
108
- "f":0.8126768534
109
  },
110
  "advmod":{
111
- "p":0.7709251101,
112
- "r":0.7291666667,
113
- "f":0.7494646681
114
  },
115
  "advcl":{
116
  "p":0.75,
117
- "r":0.7711442786,
118
- "f":0.760425184
119
  },
120
  "ccomp":{
121
- "p":0.7222222222,
122
- "r":0.6419753086,
123
- "f":0.6797385621
124
  },
125
  "discourse":{
126
- "p":0.8557558946,
127
- "r":0.84869326,
128
- "f":0.8522099448
129
  },
130
  "obj":{
131
- "p":0.8727876106,
132
- "r":0.8699007718,
133
- "f":0.8713418001
134
  },
135
  "det":{
136
- "p":0.9402722881,
137
- "r":0.9394471259,
138
- "f":0.9398595259
139
  },
140
  "nmod":{
141
- "p":0.772815534,
142
- "r":0.7683397683,
143
- "f":0.770571152
144
  },
145
  "cop":{
146
- "p":0.7882882883,
147
- "r":0.8254716981,
148
- "f":0.8064516129
149
  },
150
  "appos":{
151
- "p":0.4867256637,
152
- "r":0.3353658537,
153
- "f":0.3971119134
154
  },
155
  "case":{
156
- "p":0.9615384615,
157
- "r":0.9670079636,
158
- "f":0.9642654566
159
  },
160
  "acl":{
161
- "p":0.6298701299,
162
- "r":0.5914634146,
163
- "f":0.6100628931
164
  },
165
  "mark":{
166
- "p":0.8904109589,
167
- "r":0.9027777778,
168
- "f":0.8965517241
169
  },
170
  "obl":{
171
- "p":0.7578419072,
172
- "r":0.7587939698,
173
- "f":0.7583176397
174
  },
175
  "nsubj:pass":{
176
- "p":0.7653061224,
177
- "r":0.7894736842,
178
- "f":0.7772020725
179
  },
180
  "xcomp":{
181
- "p":0.7179487179,
182
- "r":0.7,
183
- "f":0.7088607595
184
  },
185
  "cc":{
186
- "p":0.7248255234,
187
- "r":0.7198019802,
188
- "f":0.7223050174
189
  },
190
  "conj":{
191
- "p":0.6662337662,
192
- "r":0.6662337662,
193
- "f":0.6662337662
194
  },
195
  "dislocated":{
196
- "p":0.5555555556,
197
- "r":0.1785714286,
198
- "f":0.2702702703
199
  },
200
  "amod":{
201
- "p":0.7652173913,
202
- "r":0.8148148148,
203
- "f":0.7892376682
204
  },
205
  "parataxis":{
206
  "p":0.0,
@@ -213,34 +213,34 @@
213
  "f":0.0
214
  },
215
  "orphan":{
216
- "p":0.2105263158,
217
  "r":0.2285714286,
218
- "f":0.2191780822
219
  },
220
  "nummod":{
221
- "p":0.875,
222
- "r":0.9,
223
- "f":0.8873239437
224
  },
225
- "fixed":{
226
- "p":0.7777777778,
227
- "r":0.875,
228
- "f":0.8235294118
229
  },
230
  "obl:agent":{
231
- "p":0.75,
232
- "r":0.5454545455,
233
- "f":0.6315789474
234
- },
235
- "csubj:pass":{
236
- "p":1.0,
237
- "r":0.2857142857,
238
- "f":0.4444444444
239
  },
240
  "vocative":{
241
- "p":0.7868852459,
242
  "r":0.7868852459,
243
- "f":0.7868852459
 
 
 
 
 
244
  },
245
  "aux":{
246
  "p":0.0,
@@ -258,6 +258,6 @@
258
  "f":0.9230769231
259
  }
260
  },
261
- "lemma_acc":0.9646237044,
262
- "speed":1064.2582610019
263
  }
 
3
  "token_p":1.0,
4
  "token_r":1.0,
5
  "token_f":1.0,
6
+ "pos_acc":0.9824996245,
7
+ "morph_acc":0.9411897251,
8
+ "morph_micro_p":0.9815321323,
9
+ "morph_micro_r":0.9804461705,
10
+ "morph_micro_f":0.9809888509,
11
  "morph_per_feat":{
12
  "Case":{
13
+ "p":0.988937179,
14
+ "r":0.9876364593,
15
+ "f":0.9882863912
16
  },
17
  "Gender":{
18
+ "p":0.9431442549,
19
+ "r":0.9458957698,
20
+ "f":0.9445180085
21
  },
22
  "Number":{
23
+ "p":0.9958091554,
24
+ "r":0.9943133047,
25
+ "f":0.9950606679
26
  },
27
  "Aspect":{
28
+ "p":0.9838709677,
29
+ "r":0.9831814996,
30
+ "f":0.9835261129
31
  },
32
  "Mood":{
33
+ "p":0.9883245768,
34
+ "r":0.9860221316,
35
+ "f":0.9871720117
36
  },
37
  "Person":{
38
+ "p":0.990797546,
39
+ "r":0.9825095057,
40
+ "f":0.9866361207
41
  },
42
  "Tense":{
43
+ "p":0.9831871345,
44
+ "r":0.9817518248,
45
+ "f":0.9824689554
46
  },
47
  "VerbForm":{
48
+ "p":0.9974424552,
49
+ "r":0.9959868661,
50
+ "f":0.9967141292
51
  },
52
  "Voice":{
53
  "p":0.9773391813,
 
60
  "f":0.9862554475
61
  },
62
  "Degree":{
63
+ "p":0.9509043928,
64
+ "r":0.9633507853,
65
+ "f":0.9570871261
66
  },
67
  "Definite":{
68
+ "p":0.9949832776,
69
+ "r":0.9944289694,
70
+ "f":0.9947060463
71
  },
72
  "Reflex":{
73
  "p":1.0,
 
81
  },
82
  "Poss":{
83
  "p":1.0,
84
+ "r":0.9230769231,
85
+ "f":0.96
86
  }
87
  },
88
+ "tag_acc":0.9825747334,
89
+ "sents_p":0.6873212583,
90
+ "sents_r":0.6886341929,
91
+ "sents_f":0.6879770992,
92
+ "dep_uas":0.8455009764,
93
+ "dep_las":0.8129788193,
94
  "dep_las_per_type":{
95
  "iobj":{
96
+ "p":0.8152424942,
97
+ "r":0.8077803204,
98
+ "f":0.8114942529
99
  },
100
  "root":{
101
+ "p":0.8551000953,
102
+ "r":0.8567335244,
103
+ "f":0.8559160305
104
  },
105
  "nsubj":{
106
+ "p":0.8157303371,
107
+ "r":0.8139013453,
108
+ "f":0.8148148148
109
  },
110
  "advmod":{
111
+ "p":0.7598828697,
112
+ "r":0.7208333333,
113
+ "f":0.7398431932
114
  },
115
  "advcl":{
116
  "p":0.75,
117
+ "r":0.7810945274,
118
+ "f":0.7652315191
119
  },
120
  "ccomp":{
121
+ "p":0.6858974359,
122
+ "r":0.6604938272,
123
+ "f":0.6729559748
124
  },
125
  "discourse":{
126
+ "p":0.8534246575,
127
+ "r":0.8569463549,
128
+ "f":0.8551818806
129
  },
130
  "obj":{
131
+ "p":0.8644808743,
132
+ "r":0.8721058434,
133
+ "f":0.8682766191
134
  },
135
  "det":{
136
+ "p":0.9452780229,
137
+ "r":0.9398859149,
138
+ "f":0.9425742574
139
  },
140
  "nmod":{
141
+ "p":0.7895791583,
142
+ "r":0.7606177606,
143
+ "f":0.7748279253
144
  },
145
  "cop":{
146
+ "p":0.7772727273,
147
+ "r":0.8066037736,
148
+ "f":0.7916666667
149
  },
150
  "appos":{
151
+ "p":0.5130434783,
152
+ "r":0.3597560976,
153
+ "f":0.4229390681
154
  },
155
  "case":{
156
+ "p":0.9704880817,
157
+ "r":0.9726962457,
158
+ "f":0.9715909091
159
  },
160
  "acl":{
161
+ "p":0.5894039735,
162
+ "r":0.5426829268,
163
+ "f":0.5650793651
164
  },
165
  "mark":{
166
+ "p":0.8933333333,
167
+ "r":0.9305555556,
168
+ "f":0.9115646259
169
  },
170
  "obl":{
171
+ "p":0.7699876999,
172
+ "r":0.7864321608,
173
+ "f":0.7781230578
174
  },
175
  "nsubj:pass":{
176
+ "p":0.7920792079,
177
+ "r":0.8421052632,
178
+ "f":0.8163265306
179
  },
180
  "xcomp":{
181
+ "p":0.7203389831,
182
+ "r":0.7083333333,
183
+ "f":0.7142857143
184
  },
185
  "cc":{
186
+ "p":0.7224975223,
187
+ "r":0.7217821782,
188
+ "f":0.7221396731
189
  },
190
  "conj":{
191
+ "p":0.6566579634,
192
+ "r":0.6532467532,
193
+ "f":0.6549479167
194
  },
195
  "dislocated":{
196
+ "p":0.2307692308,
197
+ "r":0.1071428571,
198
+ "f":0.1463414634
199
  },
200
  "amod":{
201
+ "p":0.8027522936,
202
+ "r":0.8101851852,
203
+ "f":0.8064516129
204
  },
205
  "parataxis":{
206
  "p":0.0,
 
213
  "f":0.0
214
  },
215
  "orphan":{
216
+ "p":0.2962962963,
217
  "r":0.2285714286,
218
+ "f":0.2580645161
219
  },
220
  "nummod":{
221
+ "p":0.8333333333,
222
+ "r":0.8571428571,
223
+ "f":0.8450704225
224
  },
225
+ "csubj:pass":{
226
+ "p":0.6,
227
+ "r":0.4285714286,
228
+ "f":0.5
229
  },
230
  "obl:agent":{
231
+ "p":0.6428571429,
232
+ "r":0.4090909091,
233
+ "f":0.5
 
 
 
 
 
234
  },
235
  "vocative":{
236
+ "p":0.8421052632,
237
  "r":0.7868852459,
238
+ "f":0.813559322
239
+ },
240
+ "fixed":{
241
+ "p":0.75,
242
+ "r":0.75,
243
+ "f":0.75
244
  },
245
  "aux":{
246
  "p":0.0,
 
258
  "f":0.9230769231
259
  }
260
  },
261
+ "lemma_acc":0.9643232687,
262
+ "speed":1260.1395624501
263
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -1,12 +1,12 @@
1
  [paths]
2
  train = "corpus/train/grc_proiel-ud-train.spacy"
3
  dev = "corpus/dev/grc_proiel-ud-dev.spacy"
4
- vectors = null
5
  init_tok2vec = null
6
 
7
  [system]
8
  gpu_allocator = "pytorch"
9
- seed = 0
10
 
11
  [nlp]
12
  lang = "grc"
@@ -51,19 +51,19 @@ subword_features = true
51
  [components.morphologizer]
52
  factory = "morphologizer"
53
  extend = false
 
54
  overwrite = true
55
  scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
56
 
57
  [components.morphologizer.model]
58
- @architectures = "spacy.Tagger.v2"
59
  nO = null
60
- normalize = false
61
 
62
  [components.morphologizer.model.tok2vec]
63
  @architectures = "spacy-transformers.TransformerListener.v1"
64
  grad_factor = 1.0
65
  pooling = {"@layers":"reduce_mean.v1"}
66
- upstream = "*"
67
 
68
  [components.parser]
69
  factory = "parser"
@@ -86,7 +86,7 @@ nO = null
86
  @architectures = "spacy-transformers.TransformerListener.v1"
87
  grad_factor = 1.0
88
  pooling = {"@layers":"reduce_mean.v1"}
89
- upstream = "*"
90
 
91
  [components.senter]
92
  factory = "senter"
@@ -110,20 +110,20 @@ subword_features = true
110
 
111
  [components.tagger]
112
  factory = "tagger"
 
113
  neg_prefix = "!"
114
  overwrite = false
115
  scorer = {"@scorers":"spacy.tagger_scorer.v1"}
116
 
117
  [components.tagger.model]
118
- @architectures = "spacy.Tagger.v2"
119
  nO = null
120
- normalize = false
121
 
122
  [components.tagger.model.tok2vec]
123
  @architectures = "spacy-transformers.TransformerListener.v1"
124
  grad_factor = 1.0
125
  pooling = {"@layers":"reduce_mean.v1"}
126
- upstream = "*"
127
 
128
  [components.transformer]
129
  factory = "transformer"
@@ -177,7 +177,7 @@ max_epochs = 0
177
  max_steps = 20000
178
  eval_frequency = 200
179
  frozen_components = ["lemmatizer"]
180
- annotating_components = []
181
  before_to_disk = null
182
  before_update = null
183
 
 
1
  [paths]
2
  train = "corpus/train/grc_proiel-ud-train.spacy"
3
  dev = "corpus/dev/grc_proiel-ud-dev.spacy"
4
+ vectors = "vectors/large"
5
  init_tok2vec = null
6
 
7
  [system]
8
  gpu_allocator = "pytorch"
9
+ seed = 1
10
 
11
  [nlp]
12
  lang = "grc"
 
51
  [components.morphologizer]
52
  factory = "morphologizer"
53
  extend = false
54
+ label_smoothing = 0.0
55
  overwrite = true
56
  scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
57
 
58
  [components.morphologizer.model]
59
+ @architectures = "spacy.Tagger.v1"
60
  nO = null
 
61
 
62
  [components.morphologizer.model.tok2vec]
63
  @architectures = "spacy-transformers.TransformerListener.v1"
64
  grad_factor = 1.0
65
  pooling = {"@layers":"reduce_mean.v1"}
66
+ upstream = "transformer"
67
 
68
  [components.parser]
69
  factory = "parser"
 
86
  @architectures = "spacy-transformers.TransformerListener.v1"
87
  grad_factor = 1.0
88
  pooling = {"@layers":"reduce_mean.v1"}
89
+ upstream = "transformer"
90
 
91
  [components.senter]
92
  factory = "senter"
 
110
 
111
  [components.tagger]
112
  factory = "tagger"
113
+ label_smoothing = 0.0
114
  neg_prefix = "!"
115
  overwrite = false
116
  scorer = {"@scorers":"spacy.tagger_scorer.v1"}
117
 
118
  [components.tagger.model]
119
+ @architectures = "spacy.Tagger.v1"
120
  nO = null
 
121
 
122
  [components.tagger.model.tok2vec]
123
  @architectures = "spacy-transformers.TransformerListener.v1"
124
  grad_factor = 1.0
125
  pooling = {"@layers":"reduce_mean.v1"}
126
+ upstream = "transformer"
127
 
128
  [components.transformer]
129
  factory = "transformer"
 
177
  max_steps = 20000
178
  eval_frequency = 200
179
  frozen_components = ["lemmatizer"]
180
+ annotating_components = ["lemmatizer"]
181
  before_to_disk = null
182
  before_update = null
183
 
grc_proiel_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5f50028ae2e0dc052a6c7e6891d1ce4977ae81cd19f413075c0d2fd984188cf
3
- size 448766207
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e1947146c2f6560f3dfc53e527fea434c6b33b3bfe358380e5741dbf64ce0a
3
+ size 666859704
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7943319b989596449853f63213d3598cab16ee8576e94756e1406f352f95469c
3
  size 24263260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c2c081c979e7cb5953f63ec0f8c30fb603c65e6cb7315730d37776e684fa40
3
  size 24263260
meta.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
  "lang":"grc",
3
  "name":"proiel_trf",
4
- "version":"3.5.3",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.5.3,<3.6.0",
11
- "spacy_git_version":"512241e12",
12
  "vectors":{
13
- "width":0,
14
- "vectors":0,
15
- "keys":0,
16
- "name":null
17
  },
18
  "labels":{
19
  "transformer":[
@@ -1134,73 +1134,73 @@
1134
 
1135
  ],
1136
  "performance":{
1137
- "pos_acc":0.9811016701,
1138
- "morph_acc":0.9390565485,
1139
  "morph_per_feat":{
1140
  "Case":{
1141
- "p":0.9864796563,
1142
- "r":0.9852347299,
1143
- "f":0.9858568001
1144
  },
1145
  "Gender":{
1146
- "p":0.9393554935,
1147
- "r":0.9447492663,
1148
- "f":0.9420446593
1149
  },
1150
  "Number":{
1151
- "p":0.9964792379,
1152
- "r":0.9950367077,
1153
- "f":0.9957574503
1154
  },
1155
  "Person":{
1156
- "p":0.9925871016,
1157
- "r":0.988556663,
1158
- "f":0.9905677825
1159
  },
1160
  "PronType":{
1161
- "p":0.9911908646,
1162
- "r":0.9863636364,
1163
- "f":0.9887713588
1164
  },
1165
  "Polarity":{
1166
  "p":1.0,
1167
- "r":0.9844559585,
1168
- "f":0.9921671018
1169
  },
1170
  "Aspect":{
1171
- "p":0.9908727895,
1172
  "r":0.9858115778,
1173
- "f":0.9883357041
1174
  },
1175
  "Mood":{
1176
- "p":0.9902467011,
1177
- "r":0.9879793932,
1178
- "f":0.9891117479
1179
  },
1180
  "Tense":{
1181
- "p":0.9903780069,
1182
- "r":0.9900377877,
1183
- "f":0.9902078681
1184
  },
1185
  "VerbForm":{
1186
- "p":0.9972518035,
1187
  "r":0.9965671129,
1188
- "f":0.9969093407
1189
  },
1190
  "Voice":{
1191
- "p":0.9817869416,
1192
- "r":0.9814496737,
1193
- "f":0.9816182786
1194
  },
1195
  "Degree":{
1196
- "p":0.960916442,
1197
- "r":0.9583333333,
1198
- "f":0.9596231494
1199
  },
1200
  "Definite":{
1201
- "p":0.9962325081,
1202
- "r":1.0,
1203
- "f":0.9981126988
1204
  },
1205
  "Reflex":{
1206
  "p":1.0,
@@ -1213,134 +1213,134 @@
1213
  "f":0.972972973
1214
  }
1215
  },
1216
- "tag_acc":0.9821271609,
1217
- "dep_uas":0.8559185467,
1218
  "dep_las":0.8229563434,
1219
  "dep_las_per_type":{
1220
  "nsubj":{
1221
- "p":0.8456449835,
1222
- "r":0.8309859155,
1223
- "f":0.8382513661
1224
  },
1225
  "discourse":{
1226
- "p":0.8473177442,
1227
- "r":0.8461538462,
1228
- "f":0.8467353952
1229
  },
1230
  "mark":{
1231
- "p":0.9173553719,
1232
- "r":0.8951612903,
1233
- "f":0.906122449
1234
  },
1235
  "advmod":{
1236
- "p":0.7729323308,
1237
- "r":0.7637444279,
1238
- "f":0.7683109118
1239
  },
1240
  "advcl":{
1241
- "p":0.7948717949,
1242
- "r":0.8046448087,
1243
- "f":0.7997284453
1244
  },
1245
  "xcomp":{
1246
- "p":0.7335907336,
1247
- "r":0.76,
1248
- "f":0.7465618861
1249
  },
1250
  "cop":{
1251
- "p":0.7945205479,
1252
- "r":0.8130841121,
1253
- "f":0.8036951501
1254
  },
1255
  "root":{
1256
- "p":0.8430286242,
1257
- "r":0.8959764475,
1258
- "f":0.8686964795
1259
  },
1260
  "det":{
1261
- "p":0.9446366782,
1262
- "r":0.947094536,
1263
- "f":0.9458640104
1264
  },
1265
  "nmod":{
1266
- "p":0.800744879,
1267
- "r":0.7570422535,
1268
- "f":0.778280543
1269
  },
1270
  "obj":{
1271
- "p":0.8803329865,
1272
- "r":0.8785046729,
1273
- "f":0.8794178794
1274
  },
1275
  "case":{
1276
- "p":0.9607843137,
1277
- "r":0.9671052632,
1278
- "f":0.9639344262
1279
  },
1280
  "obl":{
1281
- "p":0.7754137116,
1282
- "r":0.800976801,
1283
- "f":0.787987988
1284
  },
1285
  "cc":{
1286
- "p":0.7473903967,
1287
- "r":0.7366255144,
1288
- "f":0.7419689119
1289
  },
1290
  "conj":{
1291
- "p":0.7067773167,
1292
- "r":0.6706036745,
1293
- "f":0.6882154882
1294
  },
1295
  "obl:agent":{
1296
- "p":0.7777777778,
1297
- "r":0.5675675676,
1298
- "f":0.65625
1299
  },
1300
  "ccomp":{
1301
- "p":0.7093023256,
1302
- "r":0.6069651741,
1303
- "f":0.654155496
1304
  },
1305
  "nsubj:pass":{
1306
- "p":0.7416666667,
1307
- "r":0.8317757009,
1308
- "f":0.7841409692
1309
  },
1310
  "amod":{
1311
- "p":0.7663551402,
1312
- "r":0.7699530516,
1313
- "f":0.7681498829
1314
  },
1315
  "acl":{
1316
- "p":0.5844155844,
1317
- "r":0.5454545455,
1318
- "f":0.5642633229
1319
  },
1320
  "iobj":{
1321
- "p":0.7899543379,
1322
- "r":0.7990762125,
1323
- "f":0.794489093
1324
- },
1325
- "appos":{
1326
- "p":0.5135135135,
1327
- "r":0.4042553191,
1328
- "f":0.4523809524
1329
  },
1330
  "nummod":{
1331
- "p":0.8387096774,
1332
  "r":0.7647058824,
1333
- "f":0.8
1334
  },
1335
  "vocative":{
1336
- "p":0.7878787879,
1337
- "r":0.7536231884,
1338
- "f":0.7703703704
1339
  },
1340
  "orphan":{
1341
- "p":0.4285714286,
1342
  "r":0.2093023256,
1343
- "f":0.28125
 
 
 
 
 
1344
  },
1345
  "dep":{
1346
  "p":0.0,
@@ -1352,20 +1352,20 @@
1352
  "r":0.0,
1353
  "f":0.0
1354
  },
1355
- "flat:name":{
1356
- "p":0.8125,
1357
- "r":0.5909090909,
1358
- "f":0.6842105263
1359
- },
1360
  "dislocated":{
1361
- "p":0.5,
1362
- "r":0.1538461538,
1363
- "f":0.2352941176
1364
  },
1365
  "csubj:pass":{
1366
  "p":0.5,
1367
- "r":0.2,
1368
- "f":0.2857142857
 
 
 
 
 
1369
  },
1370
  "aux:pass":{
1371
  "p":0.0,
@@ -1373,9 +1373,9 @@
1373
  "f":0.0
1374
  },
1375
  "fixed":{
1376
- "p":0.8333333333,
1377
  "r":0.5,
1378
- "f":0.625
1379
  },
1380
  "aux":{
1381
  "p":0.0,
@@ -1383,17 +1383,17 @@
1383
  "f":0.0
1384
  }
1385
  },
1386
- "sents_p":0.674976916,
1387
- "sents_r":0.7173699706,
1388
- "sents_f":0.6955280685,
1389
- "lemma_acc":0.966891298,
1390
- "transformer_loss":18054.842733931,
1391
- "morphologizer_loss":3329.8427455486,
1392
- "tagger_loss":2812.9380097912,
1393
- "parser_loss":25395.0410599641,
1394
- "senter_loss":24094.0680769682
1395
  },
1396
  "requirements":[
1397
- "spacy-transformers>=1.1.9,<1.2.0"
1398
  ]
1399
  }
 
1
  {
2
  "lang":"grc",
3
  "name":"proiel_trf",
4
+ "version":"3.6.0",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.6.0,<3.7.0",
11
+ "spacy_git_version":"6fc153a26",
12
  "vectors":{
13
+ "width":300,
14
+ "vectors":200000,
15
+ "keys":-1,
16
+ "name":"grc_pipeline.vectors"
17
  },
18
  "labels":{
19
  "transformer":[
 
1134
 
1135
  ],
1136
  "performance":{
1137
+ "pos_acc":0.9845443891,
1138
+ "morph_acc":0.9417667741,
1139
  "morph_per_feat":{
1140
  "Case":{
1141
+ "p":0.9865992415,
1142
+ "r":0.9848561333,
1143
+ "f":0.9857269168
1144
  },
1145
  "Gender":{
1146
+ "p":0.9410943253,
1147
+ "r":0.9458976649,
1148
+ "f":0.9434898816
1149
  },
1150
  "Number":{
1151
+ "p":0.9962717481,
1152
+ "r":0.9947265019,
1153
+ "f":0.9954985254
1154
  },
1155
  "Person":{
1156
+ "p":0.9933382679,
1157
+ "r":0.9907715024,
1158
+ "f":0.9920532249
1159
  },
1160
  "PronType":{
1161
+ "p":0.9921722114,
1162
+ "r":0.9876623377,
1163
+ "f":0.989912138
1164
  },
1165
  "Polarity":{
1166
  "p":1.0,
1167
+ "r":0.9792746114,
1168
+ "f":0.9895287958
1169
  },
1170
  "Aspect":{
1171
+ "p":0.9931389365,
1172
  "r":0.9858115778,
1173
+ "f":0.9894616918
1174
  },
1175
  "Mood":{
1176
+ "p":0.9914089347,
1177
+ "r":0.9908414425,
1178
+ "f":0.9911251074
1179
  },
1180
  "Tense":{
1181
+ "p":0.9893543956,
1182
+ "r":0.9896942631,
1183
+ "f":0.9895243002
1184
  },
1185
  "VerbForm":{
1186
+ "p":0.9965671129,
1187
  "r":0.9965671129,
1188
+ "f":0.9965671129
1189
  },
1190
  "Voice":{
1191
+ "p":0.9821428571,
1192
+ "r":0.9824802473,
1193
+ "f":0.9823115233
1194
  },
1195
  "Degree":{
1196
+ "p":0.9752747253,
1197
+ "r":0.9543010753,
1198
+ "f":0.964673913
1199
  },
1200
  "Definite":{
1201
+ "p":0.9946236559,
1202
+ "r":0.9994597515,
1203
+ "f":0.9970358394
1204
  },
1205
  "Reflex":{
1206
  "p":1.0,
 
1213
  "f":0.972972973
1214
  }
1215
  },
1216
+ "tag_acc":0.9852768825,
1217
+ "dep_uas":0.8578962789,
1218
  "dep_las":0.8229563434,
1219
  "dep_las_per_type":{
1220
  "nsubj":{
1221
+ "p":0.8499452355,
1222
+ "r":0.8407367281,
1223
+ "f":0.8453159041
1224
  },
1225
  "discourse":{
1226
+ "p":0.8567530696,
1227
+ "r":0.8626373626,
1228
+ "f":0.8596851472
1229
  },
1230
  "mark":{
1231
+ "p":0.9156626506,
1232
+ "r":0.9193548387,
1233
+ "f":0.9175050302
1234
  },
1235
  "advmod":{
1236
+ "p":0.8061538462,
1237
+ "r":0.7786032689,
1238
+ "f":0.7921390779
1239
  },
1240
  "advcl":{
1241
+ "p":0.7795484728,
1242
+ "r":0.8019125683,
1243
+ "f":0.7905723906
1244
  },
1245
  "xcomp":{
1246
+ "p":0.7081712062,
1247
+ "r":0.728,
1248
+ "f":0.7179487179
1249
  },
1250
  "cop":{
1251
+ "p":0.823255814,
1252
+ "r":0.8271028037,
1253
+ "f":0.8251748252
1254
  },
1255
  "root":{
1256
+ "p":0.877294686,
1257
+ "r":0.8910696762,
1258
+ "f":0.8841285297
1259
  },
1260
  "det":{
1261
+ "p":0.9444685466,
1262
+ "r":0.9440589766,
1263
+ "f":0.9442637172
1264
  },
1265
  "nmod":{
1266
+ "p":0.7996254682,
1267
+ "r":0.7517605634,
1268
+ "f":0.7749546279
1269
  },
1270
  "obj":{
1271
+ "p":0.8683673469,
1272
+ "r":0.8836967809,
1273
+ "f":0.8759650026
1274
  },
1275
  "case":{
1276
+ "p":0.96069869,
1277
+ "r":0.9649122807,
1278
+ "f":0.9628008753
1279
  },
1280
  "obl":{
1281
+ "p":0.7661574618,
1282
+ "r":0.7960927961,
1283
+ "f":0.7808383234
1284
  },
1285
  "cc":{
1286
+ "p":0.7338877339,
1287
+ "r":0.7263374486,
1288
+ "f":0.7300930714
1289
  },
1290
  "conj":{
1291
+ "p":0.6679841897,
1292
+ "r":0.6653543307,
1293
+ "f":0.6666666667
1294
  },
1295
  "obl:agent":{
1296
+ "p":0.8275862069,
1297
+ "r":0.6486486486,
1298
+ "f":0.7272727273
1299
  },
1300
  "ccomp":{
1301
+ "p":0.687150838,
1302
+ "r":0.6119402985,
1303
+ "f":0.6473684211
1304
  },
1305
  "nsubj:pass":{
1306
+ "p":0.7479674797,
1307
+ "r":0.8598130841,
1308
+ "f":0.8
1309
  },
1310
  "amod":{
1311
+ "p":0.7960199005,
1312
+ "r":0.7511737089,
1313
+ "f":0.7729468599
1314
  },
1315
  "acl":{
1316
+ "p":0.5957446809,
1317
+ "r":0.5090909091,
1318
+ "f":0.5490196078
1319
  },
1320
  "iobj":{
1321
+ "p":0.7871853547,
1322
+ "r":0.7944572748,
1323
+ "f":0.7908045977
 
 
 
 
 
1324
  },
1325
  "nummod":{
1326
+ "p":0.8253968254,
1327
  "r":0.7647058824,
1328
+ "f":0.7938931298
1329
  },
1330
  "vocative":{
1331
+ "p":0.8307692308,
1332
+ "r":0.7826086957,
1333
+ "f":0.8059701493
1334
  },
1335
  "orphan":{
1336
+ "p":0.375,
1337
  "r":0.2093023256,
1338
+ "f":0.2686567164
1339
+ },
1340
+ "appos":{
1341
+ "p":0.4956521739,
1342
+ "r":0.4042553191,
1343
+ "f":0.4453125
1344
  },
1345
  "dep":{
1346
  "p":0.0,
 
1352
  "r":0.0,
1353
  "f":0.0
1354
  },
 
 
 
 
 
1355
  "dislocated":{
1356
+ "p":0.6,
1357
+ "r":0.2307692308,
1358
+ "f":0.3333333333
1359
  },
1360
  "csubj:pass":{
1361
  "p":0.5,
1362
+ "r":0.4,
1363
+ "f":0.4444444444
1364
+ },
1365
+ "flat:name":{
1366
+ "p":0.7647058824,
1367
+ "r":0.5909090909,
1368
+ "f":0.6666666667
1369
  },
1370
  "aux:pass":{
1371
  "p":0.0,
 
1373
  "f":0.0
1374
  },
1375
  "fixed":{
1376
+ "p":1.0,
1377
  "r":0.5,
1378
+ "f":0.6666666667
1379
  },
1380
  "aux":{
1381
  "p":0.0,
 
1383
  "f":0.0
1384
  }
1385
  },
1386
+ "sents_p":0.7217391304,
1387
+ "sents_r":0.7330716389,
1388
+ "sents_f":0.7273612463,
1389
+ "lemma_acc":0.9656460592,
1390
+ "transformer_loss":10738.6967398962,
1391
+ "morphologizer_loss":106.6996396534,
1392
+ "tagger_loss":30.5525559076,
1393
+ "parser_loss":22808.396971178,
1394
+ "senter_loss":24485.4560953379
1395
  },
1396
  "requirements":[
1397
+ "spacy-transformers>=1.2.5,<1.3.0"
1398
  ]
1399
  }
morphologizer/cfg CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "extend":false,
 
3
  "labels_morph":{
4
  "Case=Gen|Gender=Masc|Number=Sing|POS=PROPN":"Case=Gen|Gender=Masc|Number=Sing",
5
  "Case=Gen|Gender=Masc|Number=Sing|POS=NOUN":"Case=Gen|Gender=Masc|Number=Sing",
 
1
  {
2
  "extend":false,
3
+ "label_smoothing":0.0,
4
  "labels_morph":{
5
  "Case=Gen|Gender=Masc|Number=Sing|POS=PROPN":"Case=Gen|Gender=Masc|Number=Sing",
6
  "Case=Gen|Gender=Masc|Number=Sing|POS=NOUN":"Case=Gen|Gender=Masc|Number=Sing",
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44360c7ffba8d8f88a5cdf683e7cbb2076948c5341c221635e1bc28540cb553
3
  size 3165857
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b378d7100e403a6bf0cac91c43aa63eb2282340a25508745f4c509e59aacac
3
  size 3165857
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a749ed09c980bff4a38a0ea6c7cd612fbe62f0f18f5113ef51382b15834a0af
3
  size 1906923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ade37f2c48e4e494a1b433aeea742403875b4bc0ff276248319227f89efc68b
3
  size 1906923
senter/model CHANGED
Binary files a/senter/model and b/senter/model differ
 
tagger/cfg CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "labels":[
3
  "A-",
4
  "C-",
 
1
  {
2
+ "label_smoothing":0.0,
3
  "labels":[
4
  "A-",
5
  "C-",
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55a8d769cf27518483e23cf1f7b1be39653e2ea008f8f91793e5ecf40300355e
3
- size 453382057
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fa825955db9adbe09690195bcc4e48d99b29859cbb03e3bbc20e2276639448
3
+ size 453382075
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6495dc4bdfaf7e158ce3d30a90606674a3df4dcd5b13bb716ff2ad1fca297a65
3
- size 17984960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2421d0e7e603eec3b52a74f6e7fb725aa230d010823a6341ac1c90715d843d7
3
+ size 17991543
vocab/vectors CHANGED
Binary files a/vocab/vectors and b/vocab/vectors differ
 
vocab/vectors.cfg CHANGED
@@ -1,3 +1,10 @@
1
  {
2
- "mode":"default"
 
 
 
 
 
 
 
3
  }
 
1
  {
2
+ "mode":"floret",
3
+ "minn":2,
4
+ "maxn":10,
5
+ "hash_count":2,
6
+ "hash_seed":2166136261,
7
+ "bow":"<",
8
+ "eow":">",
9
+ "attr":65
10
  }