adrianeboyd commited on
Commit
d658625
1 Parent(s): 287eea0

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,59 +14,59 @@ model-index:
14
  metrics:
15
  - name: TAG (XPOS) Accuracy
16
  type: accuracy
17
- value: 0.9676386572
18
  - task:
19
  name: POS
20
  type: token-classification
21
  metrics:
22
  - name: POS (UPOS) Accuracy
23
  type: accuracy
24
- value: 0.9914495504
25
  - task:
26
  name: MORPH
27
  type: token-classification
28
  metrics:
29
  - name: Morph (UFeats) Accuracy
30
  type: accuracy
31
- value: 0.988017638
32
  - task:
33
  name: LEMMA
34
  type: token-classification
35
  metrics:
36
  - name: Lemma Accuracy
37
  type: accuracy
38
- value: 0.9678853802
39
  - task:
40
  name: UNLABELED_DEPENDENCIES
41
  type: token-classification
42
  metrics:
43
  - name: Unlabeled Attachment Score (UAS)
44
  type: f_score
45
- value: 0.9467628408
46
  - task:
47
  name: LABELED_DEPENDENCIES
48
  type: token-classification
49
  metrics:
50
  - name: Labeled Attachment Score (LAS)
51
  type: f_score
52
- value: 0.9292321773
53
  - task:
54
  name: SENTS
55
  type: token-classification
56
  metrics:
57
  - name: Sentences F-Score
58
  type: f_score
59
- value: 0.9700059988
60
  ---
61
  ### Details: https://spacy.io/models/es#es_dep_news_trf
62
 
63
- Spanish transformer pipeline (dccuchile/bert-base-spanish-wwm-cased). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.
64
 
65
  | Feature | Description |
66
  | --- | --- |
67
  | **Name** | `es_dep_news_trf` |
68
- | **Version** | `3.6.1` |
69
- | **spaCy** | `>=3.6.0,<3.7.0` |
70
  | **Default Pipeline** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
71
  | **Components** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
72
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -95,15 +95,15 @@ Spanish transformer pipeline (dccuchile/bert-base-spanish-wwm-cased). Components
95
  | `TOKEN_P` | 99.89 |
96
  | `TOKEN_R` | 99.95 |
97
  | `TOKEN_F` | 99.92 |
98
- | `POS_ACC` | 99.14 |
99
- | `MORPH_ACC` | 98.80 |
100
- | `MORPH_MICRO_P` | 99.67 |
101
- | `MORPH_MICRO_R` | 99.36 |
102
  | `MORPH_MICRO_F` | 99.51 |
103
- | `SENTS_P` | 96.25 |
104
- | `SENTS_R` | 97.76 |
105
- | `SENTS_F` | 97.00 |
106
- | `DEP_UAS` | 94.68 |
107
- | `DEP_LAS` | 92.92 |
108
- | `TAG_ACC` | 96.76 |
109
- | `LEMMA_ACC` | 96.79 |
14
  metrics:
15
  - name: TAG (XPOS) Accuracy
16
  type: accuracy
17
+ value: 0.9674401664
18
  - task:
19
  name: POS
20
  type: token-classification
21
  metrics:
22
  - name: POS (UPOS) Accuracy
23
  type: accuracy
24
+ value: 0.9913065148
25
  - task:
26
  name: MORPH
27
  type: token-classification
28
  metrics:
29
  - name: Morph (UFeats) Accuracy
30
  type: accuracy
31
+ value: 0.9877497028
32
  - task:
33
  name: LEMMA
34
  type: token-classification
35
  metrics:
36
  - name: Lemma Accuracy
37
  type: accuracy
38
+ value: 0.9685370645
39
  - task:
40
  name: UNLABELED_DEPENDENCIES
41
  type: token-classification
42
  metrics:
43
  - name: Unlabeled Attachment Score (UAS)
44
  type: f_score
45
+ value: 0.9484729289
46
  - task:
47
  name: LABELED_DEPENDENCIES
48
  type: token-classification
49
  metrics:
50
  - name: Labeled Attachment Score (LAS)
51
  type: f_score
52
+ value: 0.9310393672
53
  - task:
54
  name: SENTS
55
  type: token-classification
56
  metrics:
57
  - name: Sentences F-Score
58
  type: f_score
59
+ value: 0.9712057588
60
  ---
61
  ### Details: https://spacy.io/models/es#es_dep_news_trf
62
 
63
+ Spanish transformer pipeline (Transformer(name='dccuchile/bert-base-spanish-wwm-cased', piece_encoder='bert-wordpiece', stride=112, type='bert', width=768, window=158, vocab_size=31002)). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.
64
 
65
  | Feature | Description |
66
  | --- | --- |
67
  | **Name** | `es_dep_news_trf` |
68
+ | **Version** | `3.7.2` |
69
+ | **spaCy** | `>=3.7.0,<3.8.0` |
70
  | **Default Pipeline** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
71
  | **Components** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
72
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
95
  | `TOKEN_P` | 99.89 |
96
  | `TOKEN_R` | 99.95 |
97
  | `TOKEN_F` | 99.92 |
98
+ | `POS_ACC` | 99.13 |
99
+ | `MORPH_ACC` | 98.77 |
100
+ | `MORPH_MICRO_P` | 99.66 |
101
+ | `MORPH_MICRO_R` | 99.37 |
102
  | `MORPH_MICRO_F` | 99.51 |
103
+ | `SENTS_P` | 96.37 |
104
+ | `SENTS_R` | 97.88 |
105
+ | `SENTS_F` | 97.12 |
106
+ | `DEP_UAS` | 94.85 |
107
+ | `DEP_LAS` | 93.10 |
108
+ | `TAG_ACC` | 96.74 |
109
+ | `LEMMA_ACC` | 96.85 |
accuracy.json CHANGED
@@ -3,21 +3,21 @@
3
  "token_p": 0.9989078786,
4
  "token_r": 0.9995398685,
5
  "token_f": 0.9992237736,
6
- "pos_acc": 0.9914495504,
7
- "morph_acc": 0.988017638,
8
- "morph_micro_p": 0.9966965648,
9
- "morph_micro_r": 0.9935805248,
10
- "morph_micro_f": 0.9951361055,
11
  "morph_per_feat": {
12
  "Definite": {
13
- "p": 0.9985744833,
14
- "r": 0.9992867332,
15
- "f": 0.9989304813
16
  },
17
  "Gender": {
18
- "p": 0.9970247416,
19
- "r": 0.9924144022,
20
- "f": 0.9947142299
21
  },
22
  "Number": {
23
  "p": 0.998570932,
@@ -25,44 +25,44 @@
25
  "f": 0.9966270262
26
  },
27
  "PronType": {
28
- "p": 0.9979894686,
29
- "r": 0.9956064947,
30
- "f": 0.9967965575
31
  },
32
  "PunctType": {
33
- "p": 0.9998403321,
34
- "r": 0.9982464531,
35
- "f": 0.9990427569
36
  },
37
  "VerbForm": {
38
- "p": 0.9956195244,
39
- "r": 0.994375,
40
- "f": 0.994996873
41
  },
42
  "Mood": {
43
- "p": 0.9935210566,
44
- "r": 0.9910514541,
45
- "f": 0.9922847188
46
  },
47
  "Person": {
48
- "p": 0.9961592179,
49
- "r": 0.9932114883,
50
- "f": 0.9946831692
51
  },
52
  "Tense": {
53
- "p": 0.9927189989,
54
- "r": 0.9943026436,
55
- "f": 0.9935101901
56
  },
57
  "NumForm": {
58
- "p": 0.9917218543,
59
  "r": 0.9708265802,
60
- "f": 0.9811629812
61
  },
62
  "NumType": {
63
- "p": 0.9846022241,
64
- "r": 0.9680403701,
65
- "f": 0.9762510602
66
  },
67
  "Poss": {
68
  "p": 1.0,
@@ -75,34 +75,34 @@
75
  "f": 0.998998999
76
  },
77
  "Case": {
78
- "p": 0.9686137751,
79
- "r": 0.9610726644,
80
- "f": 0.9648284846
81
  },
82
  "PrepCase": {
83
- "p": 1.0,
84
- "r": 0.9927536232,
85
- "f": 0.9963636364
86
  },
87
  "Reflex": {
88
- "p": 0.9824561404,
89
- "r": 0.9903536977,
90
- "f": 0.9863891113
91
  },
92
  "Degree": {
93
- "p": 0.9963898917,
94
- "r": 0.9822064057,
95
- "f": 0.9892473118
96
  },
97
  "Polarity": {
98
  "p": 1.0,
99
- "r": 0.9935064935,
100
- "f": 0.996742671
101
  },
102
  "AdvType": {
103
- "p": 0.9414893617,
104
  "r": 0.9414893617,
105
- "f": 0.9414893617
106
  },
107
  "Number[psor]": {
108
  "p": 1.0,
@@ -115,156 +115,156 @@
115
  "f": 1.0
116
  }
117
  },
118
- "sents_p": 0.9625,
119
- "sents_r": 0.9776299879,
120
- "sents_f": 0.9700059988,
121
- "dep_uas": 0.9467628408,
122
- "dep_las": 0.9292321773,
123
  "dep_las_per_type": {
124
  "det": {
125
- "p": 0.9823645598,
126
- "r": 0.9851443124,
127
- "f": 0.9837524724
128
  },
129
  "nsubj": {
130
- "p": 0.9590792839,
131
- "r": 0.9601316752,
132
- "f": 0.959605191
133
  },
134
  "case": {
135
- "p": 0.9725823879,
136
- "r": 0.970746832,
137
- "f": 0.9716637431
138
  },
139
  "amod": {
140
- "p": 0.9549581006,
141
- "r": 0.9519665855,
142
- "f": 0.9534599965
143
  },
144
  "nmod": {
145
- "p": 0.897606383,
146
- "r": 0.8935663225,
147
- "f": 0.8955817965
148
  },
149
  "mark": {
150
- "p": 0.9343544858,
151
- "r": 0.9510022272,
152
- "f": 0.9426048565
153
  },
154
  "acl": {
155
- "p": 0.8706563707,
156
- "r": 0.859047619,
157
- "f": 0.8648130393
158
  },
159
  "advcl": {
160
- "p": 0.7264038232,
161
- "r": 0.7496917386,
162
- "f": 0.7378640777
163
  },
164
  "nummod": {
165
- "p": 0.9379562044,
166
- "r": 0.9065255732,
167
- "f": 0.9219730942
168
  },
169
  "obj": {
170
- "p": 0.9256061109,
171
- "r": 0.9277629827,
172
- "f": 0.9266832918
173
  },
174
  "obl": {
175
- "p": 0.8297690334,
176
- "r": 0.8372895986,
177
- "f": 0.8335123523
178
  },
179
  "fixed": {
180
- "p": 0.8718291055,
181
- "r": 0.8788694482,
182
- "f": 0.8753351206
183
  },
184
  "cc": {
185
- "p": 0.967630854,
186
- "r": 0.9656357388,
187
- "f": 0.9666322669
188
  },
189
  "conj": {
190
- "p": 0.8573273079,
191
- "r": 0.8474792597,
192
- "f": 0.8523748395
193
  },
194
  "root": {
195
- "p": 0.9494047619,
196
- "r": 0.9643288996,
197
- "f": 0.9568086383
198
  },
199
  "flat": {
200
- "p": 0.9544095665,
201
- "r": 0.9594290008,
202
- "f": 0.9569127014
203
  },
204
  "xcomp": {
205
- "p": 0.9116883117,
206
- "r": 0.8624078624,
207
- "f": 0.8863636364
208
  },
209
  "advmod": {
210
- "p": 0.8787198203,
211
- "r": 0.8821871477,
212
- "f": 0.8804500703
213
  },
214
  "compound": {
215
- "p": 0.8854625551,
216
- "r": 0.8072289157,
217
- "f": 0.8445378151
218
  },
219
  "expl:pv": {
220
- "p": 0.8854961832,
221
- "r": 0.8888888889,
222
- "f": 0.8871892925
223
  },
224
  "cop": {
225
- "p": 0.9267241379,
226
- "r": 0.9227467811,
227
- "f": 0.9247311828
228
  },
229
  "ccomp": {
230
- "p": 0.897810219,
231
- "r": 0.9230769231,
232
- "f": 0.9102682701
233
  },
234
  "iobj": {
235
- "p": 0.8288770053,
236
- "r": 0.7711442786,
237
- "f": 0.7989690722
238
  },
239
  "aux": {
240
- "p": 0.9615931721,
241
- "r": 0.9615931721,
242
- "f": 0.9615931721
243
  },
244
  "appos": {
245
- "p": 0.8324396783,
246
- "r": 0.8661087866,
247
- "f": 0.8489405332
248
  },
249
  "expl:pass": {
250
- "p": 0.9397590361,
251
- "r": 0.8432432432,
252
- "f": 0.8888888889
253
  },
254
  "csubj": {
255
- "p": 0.8775510204,
256
- "r": 0.9052631579,
257
- "f": 0.8911917098
258
  },
259
  "parataxis": {
260
- "p": 0.7818181818,
261
- "r": 0.5119047619,
262
- "f": 0.618705036
263
  },
264
  "expl:impers": {
265
- "p": 0.7142857143,
266
- "r": 0.9375,
267
- "f": 0.8108108108
268
  },
269
  "orphan": {
270
  "p": 0.0,
@@ -272,9 +272,9 @@
272
  "f": 0.0
273
  },
274
  "dep": {
275
- "p": 0.0882352941,
276
- "r": 0.3333333333,
277
- "f": 0.1395348837
278
  },
279
  "nsubj:pass": {
280
  "p": 0.0,
@@ -292,7 +292,7 @@
292
  "f": 0.0
293
  }
294
  },
295
- "tag_acc": 0.9676386572,
296
- "lemma_acc": 0.9678853802,
297
- "speed": 2487.5064877036
298
  }
3
  "token_p": 0.9989078786,
4
  "token_r": 0.9995398685,
5
  "token_f": 0.9992237736,
6
+ "pos_acc": 0.9913065148,
7
+ "morph_acc": 0.9877497028,
8
+ "morph_micro_p": 0.9966031189,
9
+ "morph_micro_r": 0.9936743158,
10
+ "morph_micro_f": 0.9951365624,
11
  "morph_per_feat": {
12
  "Definite": {
13
+ "p": 0.998716303,
14
+ "r": 0.9988587732,
15
+ "f": 0.998787533
16
  },
17
  "Gender": {
18
+ "p": 0.9968154529,
19
+ "r": 0.9920507092,
20
+ "f": 0.9944273736
21
  },
22
  "Number": {
23
  "p": 0.998570932,
25
  "f": 0.9966270262
26
  },
27
  "PronType": {
28
+ "p": 0.998181122,
29
+ "r": 0.9958930277,
30
+ "f": 0.9970357621
31
  },
32
  "PunctType": {
33
+ "p": 0.9998403576,
34
+ "r": 0.9984058664,
35
+ "f": 0.9991225971
36
  },
37
  "VerbForm": {
38
+ "p": 0.9953110347,
39
+ "r": 0.995,
40
+ "f": 0.995155493
41
  },
42
  "Mood": {
43
+ "p": 0.9925280199,
44
+ "r": 0.9905543127,
45
+ "f": 0.9915401841
46
  },
47
  "Person": {
48
+ "p": 0.9959888385,
49
+ "r": 0.9940818103,
50
+ "f": 0.9950344107
51
  },
52
  "Tense": {
53
+ "p": 0.9931693989,
54
+ "r": 0.9940747493,
55
+ "f": 0.9936218679
56
  },
57
  "NumForm": {
58
+ "p": 0.9900826446,
59
  "r": 0.9708265802,
60
+ "f": 0.9803600655
61
  },
62
  "NumType": {
63
+ "p": 0.9811965812,
64
+ "r": 0.9655172414,
65
+ "f": 0.9732937685
66
  },
67
  "Poss": {
68
  "p": 1.0,
75
  "f": 0.998998999
76
  },
77
  "Case": {
78
+ "p": 0.9713043478,
79
+ "r": 0.9662629758,
80
+ "f": 0.9687771032
81
  },
82
  "PrepCase": {
83
+ "p": 0.998960499,
84
+ "r": 0.9948240166,
85
+ "f": 0.9968879668
86
  },
87
  "Reflex": {
88
+ "p": 0.9778830964,
89
+ "r": 0.9951768489,
90
+ "f": 0.9864541833
91
  },
92
  "Degree": {
93
+ "p": 0.9928571429,
94
+ "r": 0.9893238434,
95
+ "f": 0.991087344
96
  },
97
  "Polarity": {
98
  "p": 1.0,
99
+ "r": 0.9902597403,
100
+ "f": 0.9951060359
101
  },
102
  "AdvType": {
103
+ "p": 0.9619565217,
104
  "r": 0.9414893617,
105
+ "f": 0.9516129032
106
  },
107
  "Number[psor]": {
108
  "p": 1.0,
115
  "f": 1.0
116
  }
117
  },
118
+ "sents_p": 0.9636904762,
119
+ "sents_r": 0.9788391778,
120
+ "sents_f": 0.9712057588,
121
+ "dep_uas": 0.9484729289,
122
+ "dep_las": 0.9310393672,
123
  "dep_las_per_type": {
124
  "det": {
125
+ "p": 0.984183025,
126
+ "r": 0.9859932088,
127
+ "f": 0.9850872853
128
  },
129
  "nsubj": {
130
+ "p": 0.9609203798,
131
+ "r": 0.9623262619,
132
+ "f": 0.961622807
133
  },
134
  "case": {
135
+ "p": 0.9732649203,
136
+ "r": 0.9716904826,
137
+ "f": 0.9724770642
138
  },
139
  "amod": {
140
+ "p": 0.9577218728,
141
+ "r": 0.9540549948,
142
+ "f": 0.9558849172
143
  },
144
  "nmod": {
145
+ "p": 0.9085300429,
146
+ "r": 0.8967434472,
147
+ "f": 0.9025982678
148
  },
149
  "mark": {
150
+ "p": 0.9381838074,
151
+ "r": 0.9548997773,
152
+ "f": 0.9464679912
153
  },
154
  "acl": {
155
+ "p": 0.8763285024,
156
+ "r": 0.8638095238,
157
+ "f": 0.8700239808
158
  },
159
  "advcl": {
160
+ "p": 0.7511961722,
161
+ "r": 0.774352651,
162
+ "f": 0.7625986642
163
  },
164
  "nummod": {
165
+ "p": 0.9267857143,
166
+ "r": 0.9153439153,
167
+ "f": 0.9210292813
168
  },
169
  "obj": {
170
+ "p": 0.925545996,
171
+ "r": 0.9310918775,
172
+ "f": 0.9283106538
173
  },
174
  "obl": {
175
+ "p": 0.8356282272,
176
+ "r": 0.8381527838,
177
+ "f": 0.8368886016
178
  },
179
  "fixed": {
180
+ "p": 0.875502008,
181
+ "r": 0.8802153432,
182
+ "f": 0.877852349
183
  },
184
  "cc": {
185
+ "p": 0.9649965683,
186
+ "r": 0.9663230241,
187
+ "f": 0.9656593407
188
  },
189
  "conj": {
190
+ "p": 0.8464516129,
191
+ "r": 0.8372686662,
192
+ "f": 0.8418350979
193
  },
194
  "root": {
195
+ "p": 0.9505952381,
196
+ "r": 0.9655380895,
197
+ "f": 0.9580083983
198
  },
199
  "flat": {
200
+ "p": 0.9494047619,
201
+ "r": 0.958677686,
202
+ "f": 0.9540186916
203
  },
204
  "xcomp": {
205
+ "p": 0.8974358974,
206
+ "r": 0.85995086,
207
+ "f": 0.878293601
208
  },
209
  "advmod": {
210
+ "p": 0.8801120448,
211
+ "r": 0.8855693348,
212
+ "f": 0.8828322563
213
  },
214
  "compound": {
215
+ "p": 0.8986784141,
216
+ "r": 0.8192771084,
217
+ "f": 0.8571428571
218
  },
219
  "expl:pv": {
220
+ "p": 0.8943396226,
221
+ "r": 0.908045977,
222
+ "f": 0.9011406844
223
  },
224
  "cop": {
225
+ "p": 0.9191489362,
226
+ "r": 0.9270386266,
227
+ "f": 0.9230769231
228
  },
229
  "ccomp": {
230
+ "p": 0.8882882883,
231
+ "r": 0.9249530957,
232
+ "f": 0.90625
233
  },
234
  "iobj": {
235
+ "p": 0.8020833333,
236
+ "r": 0.7661691542,
237
+ "f": 0.7837150127
238
  },
239
  "aux": {
240
+ "p": 0.9671428571,
241
+ "r": 0.9630156472,
242
+ "f": 0.9650748396
243
  },
244
  "appos": {
245
+ "p": 0.8389715832,
246
+ "r": 0.8647140865,
247
+ "f": 0.8516483516
248
  },
249
  "expl:pass": {
250
+ "p": 0.9401197605,
251
+ "r": 0.8486486486,
252
+ "f": 0.8920454545
253
  },
254
  "csubj": {
255
+ "p": 0.902173913,
256
+ "r": 0.8736842105,
257
+ "f": 0.8877005348
258
  },
259
  "parataxis": {
260
+ "p": 0.7,
261
+ "r": 0.5,
262
+ "f": 0.5833333333
263
  },
264
  "expl:impers": {
265
+ "p": 0.6086956522,
266
+ "r": 0.875,
267
+ "f": 0.7179487179
268
  },
269
  "orphan": {
270
  "p": 0.0,
272
  "f": 0.0
273
  },
274
  "dep": {
275
+ "p": 0.0322580645,
276
+ "r": 0.1111111111,
277
+ "f": 0.05
278
  },
279
  "nsubj:pass": {
280
  "p": 0.0,
292
  "f": 0.0
293
  }
294
  },
295
+ "tag_acc": 0.9674401664,
296
+ "lemma_acc": 0.9685370645,
297
+ "speed": 2047.9632793631
298
  }
config.cfg CHANGED
@@ -17,6 +17,7 @@ after_creation = null
17
  after_pipeline_creation = null
18
  batch_size = 64
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 
20
 
21
  [components]
22
 
@@ -45,10 +46,11 @@ nO = null
45
  normalize = false
46
 
47
  [components.morphologizer.model.tok2vec]
48
- @architectures = "spacy-transformers.TransformerListener.v1"
49
- grad_factor = 1.0
50
  upstream = "transformer"
51
  pooling = {"@layers":"reduce_mean.v1"}
 
52
 
53
  [components.parser]
54
  factory = "parser"
@@ -68,32 +70,44 @@ use_upper = false
68
  nO = null
69
 
70
  [components.parser.model.tok2vec]
71
- @architectures = "spacy-transformers.TransformerListener.v1"
72
- grad_factor = 1.0
73
  upstream = "transformer"
74
  pooling = {"@layers":"reduce_mean.v1"}
 
75
 
76
  [components.transformer]
77
- factory = "transformer"
78
- max_batch_items = 4096
79
- set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
80
 
81
  [components.transformer.model]
82
- name = "dccuchile/bert-base-spanish-wwm-cased"
83
- @architectures = "spacy-transformers.TransformerModel.v3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  mixed_precision = false
85
-
86
- [components.transformer.model.get_spans]
87
- @span_getters = "spacy-transformers.strided_spans.v1"
88
- window = 128
89
- stride = 96
90
 
91
  [components.transformer.model.grad_scaler_config]
92
 
93
- [components.transformer.model.tokenizer_config]
94
- use_fast = true
95
-
96
- [components.transformer.model.transformer_config]
 
97
 
98
  [corpora]
99
 
@@ -130,11 +144,11 @@ annotating_components = []
130
  before_update = null
131
 
132
  [training.batcher]
133
- @batchers = "spacy.batch_by_padded.v1"
134
- discard_oversize = true
135
- get_length = null
136
  size = 2000
137
- buffer = 256
 
138
 
139
  [training.logger]
140
  @loggers = "spacy.ConsoleLogger.v1"
@@ -197,6 +211,18 @@ require = false
197
  path = "corpus/labels/parser.json"
198
  require = false
199
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  [initialize.lookups]
201
  @misc = "spacy.LookupsDataLoader.v1"
202
  lang = ${nlp.lang}
17
  after_pipeline_creation = null
18
  batch_size = 64
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
46
  normalize = false
47
 
48
  [components.morphologizer.model.tok2vec]
49
+ @architectures = "spacy-curated-transformers.LastTransformerLayerListener.v1"
50
+ width = ${components.transformer.model.hidden_width}
51
  upstream = "transformer"
52
  pooling = {"@layers":"reduce_mean.v1"}
53
+ grad_factor = 1.0
54
 
55
  [components.parser]
56
  factory = "parser"
70
  nO = null
71
 
72
  [components.parser.model.tok2vec]
73
+ @architectures = "spacy-curated-transformers.LastTransformerLayerListener.v1"
74
+ width = ${components.transformer.model.hidden_width}
75
  upstream = "transformer"
76
  pooling = {"@layers":"reduce_mean.v1"}
77
+ grad_factor = 1.0
78
 
79
  [components.transformer]
80
+ factory = "curated_transformer"
81
+ all_layer_outputs = false
82
+ frozen = false
83
 
84
  [components.transformer.model]
85
+ @architectures = "spacy-curated-transformers.BertTransformer.v1"
86
+ vocab_size = 31002
87
+ hidden_width = 768
88
+ piece_encoder = {"@architectures":"spacy-curated-transformers.BertWordpieceEncoder.v1"}
89
+ attention_probs_dropout_prob = 0.1
90
+ hidden_act = "gelu"
91
+ hidden_dropout_prob = 0.1
92
+ intermediate_width = 3072
93
+ layer_norm_eps = 0.0
94
+ max_position_embeddings = 512
95
+ model_max_length = 512
96
+ num_attention_heads = 12
97
+ num_hidden_layers = 12
98
+ padding_idx = 0
99
+ type_vocab_size = 2
100
+ torchscript = false
101
  mixed_precision = false
102
+ wrapped_listener = null
 
 
 
 
103
 
104
  [components.transformer.model.grad_scaler_config]
105
 
106
+ [components.transformer.model.with_spans]
107
+ @architectures = "spacy-curated-transformers.WithStridedSpans.v1"
108
+ stride = 112
109
+ window = 158
110
+ batch_size = 384
111
 
112
  [corpora]
113
 
144
  before_update = null
145
 
146
  [training.batcher]
147
+ @batchers = "spacy.batch_by_words.v1"
148
+ discard_oversize = false
 
149
  size = 2000
150
+ tolerance = 0.2
151
+ get_length = null
152
 
153
  [training.logger]
154
  @loggers = "spacy.ConsoleLogger.v1"
211
  path = "corpus/labels/parser.json"
212
  require = false
213
 
214
+ [initialize.components.transformer]
215
+
216
+ [initialize.components.transformer.encoder_loader]
217
+ @model_loaders = "spacy-curated-transformers.HFTransformerEncoderLoader.v1"
218
+ name = "dccuchile/bert-base-spanish-wwm-cased"
219
+ revision = "main"
220
+
221
+ [initialize.components.transformer.piecer_loader]
222
+ @model_loaders = "spacy-curated-transformers.HFPieceEncoderLoader.v1"
223
+ name = "dccuchile/bert-base-spanish-wwm-cased"
224
+ revision = "main"
225
+
226
  [initialize.lookups]
227
  @misc = "spacy.LookupsDataLoader.v1"
228
  lang = ${nlp.lang}
es_dep_news_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edabd463df12371e90bcf6a67f8aeb6e110b75a9aa6ce800ed4b2f081f5bbabe
3
- size 410207101
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03117d52d9077a5af65a8163b419fb94569e2048039bdbacd6484b27bfcbd272
3
+ size 407804601
lemmatizer/lookups/lookups.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be833a634de5b55fa529aceee97fcc09a193e2017b5e4b34bafe103ad97600aa
3
- size 165037
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee3740fdad2ebc1cf79a63a8e5b2d2f3dd47b33c3a7a0bc9351ba5a1246b6a07
3
+ size 165225
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"es",
3
  "name":"dep_news_trf",
4
- "version":"3.6.1",
5
- "description":"Spanish transformer pipeline (dccuchile/bert-base-spanish-wwm-cased). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"GNU GPL 3.0",
10
- "spacy_version":">=3.6.0,<3.7.0",
11
- "spacy_git_version":"c067b5264",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -515,21 +515,21 @@
515
  "token_p":0.9989078786,
516
  "token_r":0.9995398685,
517
  "token_f":0.9992237736,
518
- "pos_acc":0.9914495504,
519
- "morph_acc":0.988017638,
520
- "morph_micro_p":0.9966965648,
521
- "morph_micro_r":0.9935805248,
522
- "morph_micro_f":0.9951361055,
523
  "morph_per_feat":{
524
  "Definite":{
525
- "p":0.9985744833,
526
- "r":0.9992867332,
527
- "f":0.9989304813
528
  },
529
  "Gender":{
530
- "p":0.9970247416,
531
- "r":0.9924144022,
532
- "f":0.9947142299
533
  },
534
  "Number":{
535
  "p":0.998570932,
@@ -537,44 +537,44 @@
537
  "f":0.9966270262
538
  },
539
  "PronType":{
540
- "p":0.9979894686,
541
- "r":0.9956064947,
542
- "f":0.9967965575
543
  },
544
  "PunctType":{
545
- "p":0.9998403321,
546
- "r":0.9982464531,
547
- "f":0.9990427569
548
  },
549
  "VerbForm":{
550
- "p":0.9956195244,
551
- "r":0.994375,
552
- "f":0.994996873
553
  },
554
  "Mood":{
555
- "p":0.9935210566,
556
- "r":0.9910514541,
557
- "f":0.9922847188
558
  },
559
  "Person":{
560
- "p":0.9961592179,
561
- "r":0.9932114883,
562
- "f":0.9946831692
563
  },
564
  "Tense":{
565
- "p":0.9927189989,
566
- "r":0.9943026436,
567
- "f":0.9935101901
568
  },
569
  "NumForm":{
570
- "p":0.9917218543,
571
  "r":0.9708265802,
572
- "f":0.9811629812
573
  },
574
  "NumType":{
575
- "p":0.9846022241,
576
- "r":0.9680403701,
577
- "f":0.9762510602
578
  },
579
  "Poss":{
580
  "p":1.0,
@@ -587,34 +587,34 @@
587
  "f":0.998998999
588
  },
589
  "Case":{
590
- "p":0.9686137751,
591
- "r":0.9610726644,
592
- "f":0.9648284846
593
  },
594
  "PrepCase":{
595
- "p":1.0,
596
- "r":0.9927536232,
597
- "f":0.9963636364
598
  },
599
  "Reflex":{
600
- "p":0.9824561404,
601
- "r":0.9903536977,
602
- "f":0.9863891113
603
  },
604
  "Degree":{
605
- "p":0.9963898917,
606
- "r":0.9822064057,
607
- "f":0.9892473118
608
  },
609
  "Polarity":{
610
  "p":1.0,
611
- "r":0.9935064935,
612
- "f":0.996742671
613
  },
614
  "AdvType":{
615
- "p":0.9414893617,
616
  "r":0.9414893617,
617
- "f":0.9414893617
618
  },
619
  "Number[psor]":{
620
  "p":1.0,
@@ -627,156 +627,156 @@
627
  "f":1.0
628
  }
629
  },
630
- "sents_p":0.9625,
631
- "sents_r":0.9776299879,
632
- "sents_f":0.9700059988,
633
- "dep_uas":0.9467628408,
634
- "dep_las":0.9292321773,
635
  "dep_las_per_type":{
636
  "det":{
637
- "p":0.9823645598,
638
- "r":0.9851443124,
639
- "f":0.9837524724
640
  },
641
  "nsubj":{
642
- "p":0.9590792839,
643
- "r":0.9601316752,
644
- "f":0.959605191
645
  },
646
  "case":{
647
- "p":0.9725823879,
648
- "r":0.970746832,
649
- "f":0.9716637431
650
  },
651
  "amod":{
652
- "p":0.9549581006,
653
- "r":0.9519665855,
654
- "f":0.9534599965
655
  },
656
  "nmod":{
657
- "p":0.897606383,
658
- "r":0.8935663225,
659
- "f":0.8955817965
660
  },
661
  "mark":{
662
- "p":0.9343544858,
663
- "r":0.9510022272,
664
- "f":0.9426048565
665
  },
666
  "acl":{
667
- "p":0.8706563707,
668
- "r":0.859047619,
669
- "f":0.8648130393
670
  },
671
  "advcl":{
672
- "p":0.7264038232,
673
- "r":0.7496917386,
674
- "f":0.7378640777
675
  },
676
  "nummod":{
677
- "p":0.9379562044,
678
- "r":0.9065255732,
679
- "f":0.9219730942
680
  },
681
  "obj":{
682
- "p":0.9256061109,
683
- "r":0.9277629827,
684
- "f":0.9266832918
685
  },
686
  "obl":{
687
- "p":0.8297690334,
688
- "r":0.8372895986,
689
- "f":0.8335123523
690
  },
691
  "fixed":{
692
- "p":0.8718291055,
693
- "r":0.8788694482,
694
- "f":0.8753351206
695
  },
696
  "cc":{
697
- "p":0.967630854,
698
- "r":0.9656357388,
699
- "f":0.9666322669
700
  },
701
  "conj":{
702
- "p":0.8573273079,
703
- "r":0.8474792597,
704
- "f":0.8523748395
705
  },
706
  "root":{
707
- "p":0.9494047619,
708
- "r":0.9643288996,
709
- "f":0.9568086383
710
  },
711
  "flat":{
712
- "p":0.9544095665,
713
- "r":0.9594290008,
714
- "f":0.9569127014
715
  },
716
  "xcomp":{
717
- "p":0.9116883117,
718
- "r":0.8624078624,
719
- "f":0.8863636364
720
  },
721
  "advmod":{
722
- "p":0.8787198203,
723
- "r":0.8821871477,
724
- "f":0.8804500703
725
  },
726
  "compound":{
727
- "p":0.8854625551,
728
- "r":0.8072289157,
729
- "f":0.8445378151
730
  },
731
  "expl:pv":{
732
- "p":0.8854961832,
733
- "r":0.8888888889,
734
- "f":0.8871892925
735
  },
736
  "cop":{
737
- "p":0.9267241379,
738
- "r":0.9227467811,
739
- "f":0.9247311828
740
  },
741
  "ccomp":{
742
- "p":0.897810219,
743
- "r":0.9230769231,
744
- "f":0.9102682701
745
  },
746
  "iobj":{
747
- "p":0.8288770053,
748
- "r":0.7711442786,
749
- "f":0.7989690722
750
  },
751
  "aux":{
752
- "p":0.9615931721,
753
- "r":0.9615931721,
754
- "f":0.9615931721
755
  },
756
  "appos":{
757
- "p":0.8324396783,
758
- "r":0.8661087866,
759
- "f":0.8489405332
760
  },
761
  "expl:pass":{
762
- "p":0.9397590361,
763
- "r":0.8432432432,
764
- "f":0.8888888889
765
  },
766
  "csubj":{
767
- "p":0.8775510204,
768
- "r":0.9052631579,
769
- "f":0.8911917098
770
  },
771
  "parataxis":{
772
- "p":0.7818181818,
773
- "r":0.5119047619,
774
- "f":0.618705036
775
  },
776
  "expl:impers":{
777
- "p":0.7142857143,
778
- "r":0.9375,
779
- "f":0.8108108108
780
  },
781
  "orphan":{
782
  "p":0.0,
@@ -784,9 +784,9 @@
784
  "f":0.0
785
  },
786
  "dep":{
787
- "p":0.0882352941,
788
- "r":0.3333333333,
789
- "f":0.1395348837
790
  },
791
  "nsubj:pass":{
792
  "p":0.0,
@@ -804,9 +804,9 @@
804
  "f":0.0
805
  }
806
  },
807
- "tag_acc":0.9676386572,
808
- "lemma_acc":0.9678853802,
809
- "speed":2487.5064877036
810
  },
811
  "sources":[
812
  {
@@ -829,6 +829,6 @@
829
  }
830
  ],
831
  "requirements":[
832
- "spacy-transformers>=1.2.2,<1.3.0"
833
  ]
834
  }
1
  {
2
  "lang":"es",
3
  "name":"dep_news_trf",
4
+ "version":"3.7.2",
5
+ "description":"Spanish transformer pipeline (Transformer(name='dccuchile/bert-base-spanish-wwm-cased', piece_encoder='bert-wordpiece', stride=112, type='bert', width=768, window=158, vocab_size=31002)). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"GNU GPL 3.0",
10
+ "spacy_version":">=3.7.0,<3.8.0",
11
+ "spacy_git_version":"6b4f77441",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
515
  "token_p":0.9989078786,
516
  "token_r":0.9995398685,
517
  "token_f":0.9992237736,
518
+ "pos_acc":0.9913065148,
519
+ "morph_acc":0.9877497028,
520
+ "morph_micro_p":0.9966031189,
521
+ "morph_micro_r":0.9936743158,
522
+ "morph_micro_f":0.9951365624,
523
  "morph_per_feat":{
524
  "Definite":{
525
+ "p":0.998716303,
526
+ "r":0.9988587732,
527
+ "f":0.998787533
528
  },
529
  "Gender":{
530
+ "p":0.9968154529,
531
+ "r":0.9920507092,
532
+ "f":0.9944273736
533
  },
534
  "Number":{
535
  "p":0.998570932,
537
  "f":0.9966270262
538
  },
539
  "PronType":{
540
+ "p":0.998181122,
541
+ "r":0.9958930277,
542
+ "f":0.9970357621
543
  },
544
  "PunctType":{
545
+ "p":0.9998403576,
546
+ "r":0.9984058664,
547
+ "f":0.9991225971
548
  },
549
  "VerbForm":{
550
+ "p":0.9953110347,
551
+ "r":0.995,
552
+ "f":0.995155493
553
  },
554
  "Mood":{
555
+ "p":0.9925280199,
556
+ "r":0.9905543127,
557
+ "f":0.9915401841
558
  },
559
  "Person":{
560
+ "p":0.9959888385,
561
+ "r":0.9940818103,
562
+ "f":0.9950344107
563
  },
564
  "Tense":{
565
+ "p":0.9931693989,
566
+ "r":0.9940747493,
567
+ "f":0.9936218679
568
  },
569
  "NumForm":{
570
+ "p":0.9900826446,
571
  "r":0.9708265802,
572
+ "f":0.9803600655
573
  },
574
  "NumType":{
575
+ "p":0.9811965812,
576
+ "r":0.9655172414,
577
+ "f":0.9732937685
578
  },
579
  "Poss":{
580
  "p":1.0,
587
  "f":0.998998999
588
  },
589
  "Case":{
590
+ "p":0.9713043478,
591
+ "r":0.9662629758,
592
+ "f":0.9687771032
593
  },
594
  "PrepCase":{
595
+ "p":0.998960499,
596
+ "r":0.9948240166,
597
+ "f":0.9968879668
598
  },
599
  "Reflex":{
600
+ "p":0.9778830964,
601
+ "r":0.9951768489,
602
+ "f":0.9864541833
603
  },
604
  "Degree":{
605
+ "p":0.9928571429,
606
+ "r":0.9893238434,
607
+ "f":0.991087344
608
  },
609
  "Polarity":{
610
  "p":1.0,
611
+ "r":0.9902597403,
612
+ "f":0.9951060359
613
  },
614
  "AdvType":{
615
+ "p":0.9619565217,
616
  "r":0.9414893617,
617
+ "f":0.9516129032
618
  },
619
  "Number[psor]":{
620
  "p":1.0,
627
  "f":1.0
628
  }
629
  },
630
+ "sents_p":0.9636904762,
631
+ "sents_r":0.9788391778,
632
+ "sents_f":0.9712057588,
633
+ "dep_uas":0.9484729289,
634
+ "dep_las":0.9310393672,
635
  "dep_las_per_type":{
636
  "det":{
637
+ "p":0.984183025,
638
+ "r":0.9859932088,
639
+ "f":0.9850872853
640
  },
641
  "nsubj":{
642
+ "p":0.9609203798,
643
+ "r":0.9623262619,
644
+ "f":0.961622807
645
  },
646
  "case":{
647
+ "p":0.9732649203,
648
+ "r":0.9716904826,
649
+ "f":0.9724770642
650
  },
651
  "amod":{
652
+ "p":0.9577218728,
653
+ "r":0.9540549948,
654
+ "f":0.9558849172
655
  },
656
  "nmod":{
657
+ "p":0.9085300429,
658
+ "r":0.8967434472,
659
+ "f":0.9025982678
660
  },
661
  "mark":{
662
+ "p":0.9381838074,
663
+ "r":0.9548997773,
664
+ "f":0.9464679912
665
  },
666
  "acl":{
667
+ "p":0.8763285024,
668
+ "r":0.8638095238,
669
+ "f":0.8700239808
670
  },
671
  "advcl":{
672
+ "p":0.7511961722,
673
+ "r":0.774352651,
674
+ "f":0.7625986642
675
  },
676
  "nummod":{
677
+ "p":0.9267857143,
678
+ "r":0.9153439153,
679
+ "f":0.9210292813
680
  },
681
  "obj":{
682
+ "p":0.925545996,
683
+ "r":0.9310918775,
684
+ "f":0.9283106538
685
  },
686
  "obl":{
687
+ "p":0.8356282272,
688
+ "r":0.8381527838,
689
+ "f":0.8368886016
690
  },
691
  "fixed":{
692
+ "p":0.875502008,
693
+ "r":0.8802153432,
694
+ "f":0.877852349
695
  },
696
  "cc":{
697
+ "p":0.9649965683,
698
+ "r":0.9663230241,
699
+ "f":0.9656593407
700
  },
701
  "conj":{
702
+ "p":0.8464516129,
703
+ "r":0.8372686662,
704
+ "f":0.8418350979
705
  },
706
  "root":{
707
+ "p":0.9505952381,
708
+ "r":0.9655380895,
709
+ "f":0.9580083983
710
  },
711
  "flat":{
712
+ "p":0.9494047619,
713
+ "r":0.958677686,
714
+ "f":0.9540186916
715
  },
716
  "xcomp":{
717
+ "p":0.8974358974,
718
+ "r":0.85995086,
719
+ "f":0.878293601
720
  },
721
  "advmod":{
722
+ "p":0.8801120448,
723
+ "r":0.8855693348,
724
+ "f":0.8828322563
725
  },
726
  "compound":{
727
+ "p":0.8986784141,
728
+ "r":0.8192771084,
729
+ "f":0.8571428571
730
  },
731
  "expl:pv":{
732
+ "p":0.8943396226,
733
+ "r":0.908045977,
734
+ "f":0.9011406844
735
  },
736
  "cop":{
737
+ "p":0.9191489362,
738
+ "r":0.9270386266,
739
+ "f":0.9230769231
740
  },
741
  "ccomp":{
742
+ "p":0.8882882883,
743
+ "r":0.9249530957,
744
+ "f":0.90625
745
  },
746
  "iobj":{
747
+ "p":0.8020833333,
748
+ "r":0.7661691542,
749
+ "f":0.7837150127
750
  },
751
  "aux":{
752
+ "p":0.9671428571,
753
+ "r":0.9630156472,
754
+ "f":0.9650748396
755
  },
756
  "appos":{
757
+ "p":0.8389715832,
758
+ "r":0.8647140865,
759
+ "f":0.8516483516
760
  },
761
  "expl:pass":{
762
+ "p":0.9401197605,
763
+ "r":0.8486486486,
764
+ "f":0.8920454545
765
  },
766
  "csubj":{
767
+ "p":0.902173913,
768
+ "r":0.8736842105,
769
+ "f":0.8877005348
770
  },
771
  "parataxis":{
772
+ "p":0.7,
773
+ "r":0.5,
774
+ "f":0.5833333333
775
  },
776
  "expl:impers":{
777
+ "p":0.6086956522,
778
+ "r":0.875,
779
+ "f":0.7179487179
780
  },
781
  "orphan":{
782
  "p":0.0,
784
  "f":0.0
785
  },
786
  "dep":{
787
+ "p":0.0322580645,
788
+ "r":0.1111111111,
789
+ "f":0.05
790
  },
791
  "nsubj:pass":{
792
  "p":0.0,
804
  "f":0.0
805
  }
806
  },
807
+ "tag_acc":0.9674401664,
808
+ "lemma_acc":0.9685370645,
809
+ "speed":2047.9632793631
810
  },
811
  "sources":[
812
  {
829
  }
830
  ],
831
  "requirements":[
832
+ "spacy-curated-transformers>=0.2.0,<0.3.0"
833
  ]
834
  }
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d9d3c89534287b6a031248afcc8d007b6213c56d47113fe9ddaae5a8e23ffbb
3
- size 1329485
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef01402807b06a2f5ae708c2a377e7130393032599f0c93cf7b1175126bca4d6
3
+ size 1329569
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c08627fa1306458dfa33a31a3c7561d3febc636dcde41b87894f53dac8a1b6
3
- size 464493
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8cadd705d14793e0a1f9b6a7dab9a78785c4b42ac4a6cdcaeb7737f68ebfffb
3
+ size 464577
transformer/cfg CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "max_batch_items":4096
3
  }
1
  {
2
+
3
  }
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e51841a3d228516ec97dddeec216f5a72d7d8f78ec9bc8f66a3cbb26d498d84
3
- size 440456650
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2333301bf914bbeeeac13a27a6df66038ece5b01e411290c400866058d852ab6
3
+ size 437341445
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:325e7c86e20cec77e5c3769e682211918f21e4896793c79877cf40fa143ddadc
3
- size 892561
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa8314114de40d04550419b5adec7bf2fb0e50c090a1ef19c440ace7948a8ea
3
+ size 892571