adrianeboyd commited on
Commit
3ea0468
1 Parent(s): 37c84bc

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,59 +14,59 @@ model-index:
14
  metrics:
15
  - name: TAG (XPOS) Accuracy
16
  type: accuracy
17
- value: 0.9571539056
18
  - task:
19
  name: POS
20
  type: token-classification
21
  metrics:
22
  - name: POS (UPOS) Accuracy
23
  type: accuracy
24
- value: 0.9862335654
25
  - task:
26
  name: MORPH
27
  type: token-classification
28
  metrics:
29
  - name: Morph (UFeats) Accuracy
30
  type: accuracy
31
- value: 0.9794824209
32
  - task:
33
  name: LEMMA
34
  type: token-classification
35
  metrics:
36
  - name: Lemma Accuracy
37
  type: accuracy
38
- value: 0.9171804357
39
  - task:
40
  name: UNLABELED_DEPENDENCIES
41
  type: token-classification
42
  metrics:
43
  - name: Unlabeled Attachment Score (UAS)
44
  type: f_score
45
- value: 0.9435381233
46
  - task:
47
  name: LABELED_DEPENDENCIES
48
  type: token-classification
49
  metrics:
50
  - name: Labeled Attachment Score (LAS)
51
  type: f_score
52
- value: 0.9227477738
53
  - task:
54
  name: SENTS
55
  type: token-classification
56
  metrics:
57
  - name: Sentences F-Score
58
  type: f_score
59
- value: 0.9390681004
60
  ---
61
  ### Details: https://spacy.io/models/fr#fr_dep_news_trf
62
 
63
- French transformer pipeline (camembert-base). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.
64
 
65
  | Feature | Description |
66
  | --- | --- |
67
  | **Name** | `fr_dep_news_trf` |
68
- | **Version** | `3.6.1` |
69
- | **spaCy** | `>=3.6.0,<3.7.0` |
70
  | **Default Pipeline** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
71
  | **Components** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
72
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -95,15 +95,15 @@ French transformer pipeline (camembert-base). Components: transformer, morpholog
95
  | `TOKEN_P` | 98.44 |
96
  | `TOKEN_R` | 98.96 |
97
  | `TOKEN_F` | 98.70 |
98
- | `POS_ACC` | 98.62 |
99
- | `MORPH_ACC` | 97.95 |
100
- | `MORPH_MICRO_P` | 99.40 |
101
- | `MORPH_MICRO_R` | 99.09 |
102
- | `MORPH_MICRO_F` | 99.25 |
103
- | `SENTS_P` | 92.47 |
104
- | `SENTS_R` | 95.39 |
105
- | `SENTS_F` | 93.91 |
106
- | `DEP_UAS` | 94.35 |
107
- | `DEP_LAS` | 92.27 |
108
- | `TAG_ACC` | 95.72 |
109
- | `LEMMA_ACC` | 91.72 |
 
14
  metrics:
15
  - name: TAG (XPOS) Accuracy
16
  type: accuracy
17
+ value: 0.9595339966
18
  - task:
19
  name: POS
20
  type: token-classification
21
  metrics:
22
  - name: POS (UPOS) Accuracy
23
  type: accuracy
24
+ value: 0.9886076602
25
  - task:
26
  name: MORPH
27
  type: token-classification
28
  metrics:
29
  - name: Morph (UFeats) Accuracy
30
  type: accuracy
31
+ value: 0.9816475925
32
  - task:
33
  name: LEMMA
34
  type: token-classification
35
  metrics:
36
  - name: Lemma Accuracy
37
  type: accuracy
38
+ value: 0.9172831895
39
  - task:
40
  name: UNLABELED_DEPENDENCIES
41
  type: token-classification
42
  metrics:
43
  - name: Unlabeled Attachment Score (UAS)
44
  type: f_score
45
+ value: 0.9483856035
46
  - task:
47
  name: LABELED_DEPENDENCIES
48
  type: token-classification
49
  metrics:
50
  - name: Labeled Attachment Score (LAS)
51
  type: f_score
52
+ value: 0.9265922369
53
  - task:
54
  name: SENTS
55
  type: token-classification
56
  metrics:
57
  - name: Sentences F-Score
58
  type: f_score
59
+ value: 0.9269461078
60
  ---
61
  ### Details: https://spacy.io/models/fr#fr_dep_news_trf
62
 
63
+ French transformer pipeline (Transformer(name='camembert-base', piece_encoder='camembert-sentencepiece', stride=128, type='camembert', width=768, window=168, vocab_size=32005)). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.
64
 
65
  | Feature | Description |
66
  | --- | --- |
67
  | **Name** | `fr_dep_news_trf` |
68
+ | **Version** | `3.7.2` |
69
+ | **spaCy** | `>=3.7.0,<3.8.0` |
70
  | **Default Pipeline** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
71
  | **Components** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer` |
72
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
95
  | `TOKEN_P` | 98.44 |
96
  | `TOKEN_R` | 98.96 |
97
  | `TOKEN_F` | 98.70 |
98
+ | `POS_ACC` | 98.86 |
99
+ | `MORPH_ACC` | 98.16 |
100
+ | `MORPH_MICRO_P` | 99.45 |
101
+ | `MORPH_MICRO_R` | 99.26 |
102
+ | `MORPH_MICRO_F` | 99.36 |
103
+ | `SENTS_P` | 91.49 |
104
+ | `SENTS_R` | 93.93 |
105
+ | `SENTS_F` | 92.69 |
106
+ | `DEP_UAS` | 94.84 |
107
+ | `DEP_LAS` | 92.66 |
108
+ | `TAG_ACC` | 95.95 |
109
+ | `LEMMA_ACC` | 91.73 |
accuracy.json CHANGED
@@ -3,56 +3,56 @@
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
- "pos_acc": 0.9862335654,
7
- "morph_acc": 0.9794824209,
8
- "morph_micro_p": 0.9940108782,
9
- "morph_micro_r": 0.9909223833,
10
- "morph_micro_f": 0.992464228,
11
  "morph_per_feat": {
12
  "Definite": {
13
- "p": 0.9992679356,
14
  "r": 0.996350365,
15
- "f": 0.9978070175
16
  },
17
  "Number": {
18
- "p": 0.9957572404,
19
- "r": 0.9937407953,
20
- "f": 0.9947479959
21
  },
22
  "PronType": {
23
- "p": 0.9993564994,
24
- "r": 0.9936020473,
25
- "f": 0.9964709657
26
  },
27
  "Gender": {
28
- "p": 0.990223823,
29
- "r": 0.9836442627,
30
- "f": 0.9869230769
31
  },
32
  "Mood": {
33
- "p": 0.9964412811,
34
- "r": 0.9946714032,
35
- "f": 0.9955555556
36
  },
37
  "Person": {
38
- "p": 0.9962121212,
39
- "r": 0.9924528302,
40
- "f": 0.9943289225
41
  },
42
  "Tense": {
43
- "p": 0.9878419453,
44
  "r": 0.9959141982,
45
- "f": 0.991861648
46
  },
47
  "VerbForm": {
48
- "p": 0.9909315746,
49
- "r": 0.9950331126,
50
- "f": 0.9929781082
51
  },
52
  "NumType": {
53
- "p": 0.9928315412,
54
- "r": 0.9453924915,
55
- "f": 0.9685314685
56
  },
57
  "Reflex": {
58
  "p": 1.0,
@@ -60,9 +60,9 @@
60
  "f": 1.0
61
  },
62
  "Voice": {
63
- "p": 0.9568965517,
64
  "r": 0.9910714286,
65
- "f": 0.9736842105
66
  },
67
  "Poss": {
68
  "p": 1.0,
@@ -75,76 +75,76 @@
75
  "f": 1.0
76
  }
77
  },
78
- "sents_p": 0.9247058824,
79
- "sents_r": 0.9538834951,
80
- "sents_f": 0.9390681004,
81
- "dep_uas": 0.9435381233,
82
- "dep_las": 0.9227477738,
83
  "dep_las_per_type": {
84
  "det": {
85
- "p": 0.9903069467,
86
- "r": 0.9895076675,
87
- "f": 0.9899071457
88
  },
89
  "nsubj": {
90
- "p": 0.9587378641,
91
- "r": 0.9518072289,
92
- "f": 0.9552599758
93
  },
94
  "aux:tense": {
95
- "p": 0.968503937,
96
  "r": 0.984,
97
- "f": 0.9761904762
98
  },
99
  "root": {
100
- "p": 0.9411764706,
101
- "r": 0.9708737864,
102
- "f": 0.9557945042
103
  },
104
  "obj": {
105
- "p": 0.9447852761,
106
- "r": 0.9139465875,
107
- "f": 0.9291101056
108
  },
109
  "cc": {
110
- "p": 0.9403669725,
111
- "r": 0.9447004608,
112
- "f": 0.9425287356
113
  },
114
  "case": {
115
- "p": 0.9823609227,
116
- "r": 0.9863760218,
117
- "f": 0.984364378
118
  },
119
  "obl:mod": {
120
- "p": 0.8204334365,
121
- "r": 0.7910447761,
122
- "f": 0.8054711246
123
  },
124
  "nmod": {
125
- "p": 0.8779296875,
126
- "r": 0.8981018981,
127
- "f": 0.8879012346
128
  },
129
  "conj": {
130
- "p": 0.76171875,
131
- "r": 0.7677165354,
132
- "f": 0.7647058824
133
  },
134
  "nummod": {
135
- "p": 0.9101796407,
136
- "r": 0.899408284,
137
- "f": 0.9047619048
138
  },
139
  "amod": {
140
- "p": 0.9666048237,
141
- "r": 0.9489981785,
142
- "f": 0.9577205882
143
  },
144
  "acl": {
145
- "p": 0.8620689655,
146
- "r": 0.8670520231,
147
- "f": 0.8645533141
148
  },
149
  "mark": {
150
  "p": 0.9688888889,
@@ -152,99 +152,99 @@
152
  "f": 0.9646017699
153
  },
154
  "xcomp": {
155
- "p": 0.9090909091,
156
- "r": 0.9271523179,
157
- "f": 0.9180327869
158
  },
159
  "flat:name": {
160
- "p": 0.9619047619,
161
- "r": 0.9619047619,
162
- "f": 0.9619047619
163
  },
164
  "cop": {
165
- "p": 0.9550561798,
166
  "r": 0.9444444444,
167
- "f": 0.9497206704
168
  },
169
  "advmod": {
170
- "p": 0.9344262295,
171
- "r": 0.8934169279,
172
- "f": 0.9134615385
173
  },
174
  "obl:arg": {
175
- "p": 0.8873873874,
176
- "r": 0.8954545455,
177
- "f": 0.8914027149
178
  },
179
  "appos": {
180
- "p": 0.6987951807,
181
- "r": 0.6987951807,
182
- "f": 0.6987951807
183
  },
184
  "nsubj:pass": {
185
- "p": 1.0,
186
- "r": 0.9647058824,
187
- "f": 0.9820359281
188
  },
189
  "aux:pass": {
190
- "p": 0.9910714286,
191
  "r": 0.9910714286,
192
- "f": 0.9910714286
193
  },
194
  "acl:relcl": {
195
- "p": 0.8588235294,
196
- "r": 0.8488372093,
197
- "f": 0.8538011696
198
  },
199
  "advcl": {
200
- "p": 0.7179487179,
201
- "r": 0.7179487179,
202
- "f": 0.7179487179
203
  },
204
  "fixed": {
205
- "p": 0.9186046512,
206
- "r": 0.79,
207
- "f": 0.8494623656
208
  },
209
  "dep": {
210
- "p": 0.2537313433,
211
  "r": 0.5862068966,
212
- "f": 0.3541666667
213
  },
214
  "expl:subj": {
215
- "p": 0.8235294118,
216
- "r": 0.875,
217
- "f": 0.8484848485
218
  },
219
  "expl:comp": {
220
- "p": 0.7435897436,
221
- "r": 0.9666666667,
222
- "f": 0.8405797101
223
  },
224
  "expl:pass": {
225
- "p": 0.75,
226
- "r": 0.4285714286,
227
- "f": 0.5454545455
228
- },
229
- "obl:agent": {
230
- "p": 0.8695652174,
231
- "r": 0.9523809524,
232
- "f": 0.9090909091
233
  },
234
  "ccomp": {
235
- "p": 0.8888888889,
236
- "r": 0.9411764706,
237
- "f": 0.9142857143
238
  },
239
  "parataxis": {
240
- "p": 0.75,
241
- "r": 0.6428571429,
242
- "f": 0.6923076923
243
  },
244
  "iobj": {
245
- "p": 0.8,
246
- "r": 0.64,
247
- "f": 0.7111111111
 
 
 
 
 
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
@@ -267,9 +267,9 @@
267
  "f": 0.0
268
  },
269
  "vocative": {
270
- "p": 0.7142857143,
271
  "r": 0.625,
272
- "f": 0.6666666667
273
  },
274
  "dislocated": {
275
  "p": 0.0,
@@ -277,9 +277,9 @@
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
- "p": 0.75,
281
- "r": 0.4285714286,
282
- "f": 0.5454545455
283
  },
284
  "orphan": {
285
  "p": 0.0,
@@ -297,7 +297,7 @@
297
  "f": 0.0
298
  }
299
  },
300
- "tag_acc": 0.9571539056,
301
- "lemma_acc": 0.9171804357,
302
- "speed": 601.7641603853
303
  }
 
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
+ "pos_acc": 0.9886076602,
7
+ "morph_acc": 0.9816475925,
8
+ "morph_micro_p": 0.9945065006,
9
+ "morph_micro_r": 0.9926282442,
10
+ "morph_micro_f": 0.9935664847,
11
  "morph_per_feat": {
12
  "Definite": {
13
+ "p": 0.9978070175,
14
  "r": 0.996350365,
15
+ "f": 0.9970781592
16
  },
17
  "Number": {
18
+ "p": 0.996126176,
19
+ "r": 0.9941089838,
20
+ "f": 0.9951165576
21
  },
22
  "PronType": {
23
+ "p": 0.9980756895,
24
+ "r": 0.9955214331,
25
+ "f": 0.996796925
26
  },
27
  "Gender": {
28
+ "p": 0.9907668633,
29
+ "r": 0.9872220802,
30
+ "f": 0.9889912954
31
  },
32
  "Mood": {
33
+ "p": 0.9893428064,
34
+ "r": 0.9893428064,
35
+ "f": 0.9893428064
36
  },
37
  "Person": {
38
+ "p": 0.998738966,
39
+ "r": 0.9962264151,
40
+ "f": 0.9974811083
41
  },
42
  "Tense": {
43
+ "p": 0.9888438134,
44
  "r": 0.9959141982,
45
+ "f": 0.9923664122
46
  },
47
  "VerbForm": {
48
+ "p": 0.9933938893,
49
+ "r": 0.9958609272,
50
+ "f": 0.9946258785
51
  },
52
  "NumType": {
53
+ "p": 1.0,
54
+ "r": 0.9726962457,
55
+ "f": 0.9861591696
56
  },
57
  "Reflex": {
58
  "p": 1.0,
 
60
  "f": 1.0
61
  },
62
  "Voice": {
63
+ "p": 0.9910714286,
64
  "r": 0.9910714286,
65
+ "f": 0.9910714286
66
  },
67
  "Poss": {
68
  "p": 1.0,
 
75
  "f": 1.0
76
  }
77
  },
78
+ "sents_p": 0.914893617,
79
+ "sents_r": 0.9393203883,
80
+ "sents_f": 0.9269461078,
81
+ "dep_uas": 0.9483856035,
82
+ "dep_las": 0.9265922369,
83
  "dep_las_per_type": {
84
  "det": {
85
+ "p": 0.99031477,
86
+ "r": 0.99031477,
87
+ "f": 0.99031477
88
  },
89
  "nsubj": {
90
+ "p": 0.95215311,
91
+ "r": 0.9590361446,
92
+ "f": 0.9555822329
93
  },
94
  "aux:tense": {
95
+ "p": 0.9389312977,
96
  "r": 0.984,
97
+ "f": 0.9609375
98
  },
99
  "root": {
100
+ "p": 0.9524940618,
101
+ "r": 0.9733009709,
102
+ "f": 0.962785114
103
  },
104
  "obj": {
105
+ "p": 0.9420731707,
106
+ "r": 0.9169139466,
107
+ "f": 0.9293233083
108
  },
109
  "cc": {
110
+ "p": 0.9497716895,
111
+ "r": 0.9585253456,
112
+ "f": 0.9541284404
113
  },
114
  "case": {
115
+ "p": 0.987704918,
116
+ "r": 0.985013624,
117
+ "f": 0.9863574352
118
  },
119
  "obl:mod": {
120
+ "p": 0.7959183673,
121
+ "r": 0.8149253731,
122
+ "f": 0.8053097345
123
  },
124
  "nmod": {
125
+ "p": 0.8931750742,
126
+ "r": 0.9020979021,
127
+ "f": 0.8976143141
128
  },
129
  "conj": {
130
+ "p": 0.7890625,
131
+ "r": 0.7952755906,
132
+ "f": 0.7921568627
133
  },
134
  "nummod": {
135
+ "p": 0.921686747,
136
+ "r": 0.9053254438,
137
+ "f": 0.9134328358
138
  },
139
  "amod": {
140
+ "p": 0.9626865672,
141
+ "r": 0.9398907104,
142
+ "f": 0.9511520737
143
  },
144
  "acl": {
145
+ "p": 0.8728323699,
146
+ "r": 0.8728323699,
147
+ "f": 0.8728323699
148
  },
149
  "mark": {
150
  "p": 0.9688888889,
 
152
  "f": 0.9646017699
153
  },
154
  "xcomp": {
155
+ "p": 0.9276315789,
156
+ "r": 0.9337748344,
157
+ "f": 0.9306930693
158
  },
159
  "flat:name": {
160
+ "p": 0.9519230769,
161
+ "r": 0.9428571429,
162
+ "f": 0.9473684211
163
  },
164
  "cop": {
165
+ "p": 0.9444444444,
166
  "r": 0.9444444444,
167
+ "f": 0.9444444444
168
  },
169
  "advmod": {
170
+ "p": 0.9417475728,
171
+ "r": 0.9122257053,
172
+ "f": 0.9267515924
173
  },
174
  "obl:arg": {
175
+ "p": 0.8976744186,
176
+ "r": 0.8772727273,
177
+ "f": 0.8873563218
178
  },
179
  "appos": {
180
+ "p": 0.7058823529,
181
+ "r": 0.7228915663,
182
+ "f": 0.7142857143
183
  },
184
  "nsubj:pass": {
185
+ "p": 0.9880952381,
186
+ "r": 0.9764705882,
187
+ "f": 0.9822485207
188
  },
189
  "aux:pass": {
190
+ "p": 0.982300885,
191
  "r": 0.9910714286,
192
+ "f": 0.9866666667
193
  },
194
  "acl:relcl": {
195
+ "p": 0.8863636364,
196
+ "r": 0.9069767442,
197
+ "f": 0.8965517241
198
  },
199
  "advcl": {
200
+ "p": 0.7375,
201
+ "r": 0.7564102564,
202
+ "f": 0.746835443
203
  },
204
  "fixed": {
205
+ "p": 0.9130434783,
206
+ "r": 0.84,
207
+ "f": 0.875
208
  },
209
  "dep": {
210
+ "p": 0.298245614,
211
  "r": 0.5862068966,
212
+ "f": 0.3953488372
213
  },
214
  "expl:subj": {
215
+ "p": 0.8888888889,
216
+ "r": 0.75,
217
+ "f": 0.813559322
218
  },
219
  "expl:comp": {
220
+ "p": 0.7058823529,
221
+ "r": 0.8,
222
+ "f": 0.75
223
  },
224
  "expl:pass": {
225
+ "p": 0.2222222222,
226
+ "r": 0.2857142857,
227
+ "f": 0.25
 
 
 
 
 
228
  },
229
  "ccomp": {
230
+ "p": 0.98,
231
+ "r": 0.9607843137,
232
+ "f": 0.9702970297
233
  },
234
  "parataxis": {
235
+ "p": 0.6538461538,
236
+ "r": 0.6071428571,
237
+ "f": 0.6296296296
238
  },
239
  "iobj": {
240
+ "p": 0.65,
241
+ "r": 0.52,
242
+ "f": 0.5777777778
243
+ },
244
+ "obl:agent": {
245
+ "p": 1.0,
246
+ "r": 0.9285714286,
247
+ "f": 0.962962963
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
 
267
  "f": 0.0
268
  },
269
  "vocative": {
270
+ "p": 1.0,
271
  "r": 0.625,
272
+ "f": 0.7692307692
273
  },
274
  "dislocated": {
275
  "p": 0.0,
 
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
+ "p": 1.0,
281
+ "r": 1.0,
282
+ "f": 1.0
283
  },
284
  "orphan": {
285
  "p": 0.0,
 
297
  "f": 0.0
298
  }
299
  },
300
+ "tag_acc": 0.9595339966,
301
+ "lemma_acc": 0.9172831895,
302
+ "speed": 483.9219438961
303
  }
config.cfg CHANGED
@@ -17,6 +17,7 @@ after_creation = null
17
  after_pipeline_creation = null
18
  batch_size = 64
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 
20
 
21
  [components]
22
 
@@ -45,10 +46,11 @@ nO = null
45
  normalize = false
46
 
47
  [components.morphologizer.model.tok2vec]
48
- @architectures = "spacy-transformers.TransformerListener.v1"
49
- grad_factor = 1.0
50
  upstream = "transformer"
51
  pooling = {"@layers":"reduce_mean.v1"}
 
52
 
53
  [components.parser]
54
  factory = "parser"
@@ -68,32 +70,44 @@ use_upper = false
68
  nO = null
69
 
70
  [components.parser.model.tok2vec]
71
- @architectures = "spacy-transformers.TransformerListener.v1"
72
- grad_factor = 1.0
73
  upstream = "transformer"
74
  pooling = {"@layers":"reduce_mean.v1"}
 
75
 
76
  [components.transformer]
77
- factory = "transformer"
78
- max_batch_items = 4096
79
- set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
80
 
81
  [components.transformer.model]
82
- name = "camembert-base"
83
- @architectures = "spacy-transformers.TransformerModel.v3"
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  mixed_precision = false
85
-
86
- [components.transformer.model.get_spans]
87
- @span_getters = "spacy-transformers.strided_spans.v1"
88
- window = 128
89
- stride = 96
90
 
91
  [components.transformer.model.grad_scaler_config]
92
 
93
- [components.transformer.model.tokenizer_config]
94
- use_fast = true
95
-
96
- [components.transformer.model.transformer_config]
 
97
 
98
  [corpora]
99
 
@@ -130,11 +144,11 @@ annotating_components = []
130
  before_update = null
131
 
132
  [training.batcher]
133
- @batchers = "spacy.batch_by_padded.v1"
134
- discard_oversize = true
135
- get_length = null
136
  size = 2000
137
- buffer = 256
 
138
 
139
  [training.logger]
140
  @loggers = "spacy.ConsoleLogger.v1"
@@ -197,6 +211,18 @@ require = false
197
  path = "corpus/labels/parser.json"
198
  require = false
199
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  [initialize.lookups]
201
  @misc = "spacy.LookupsDataLoader.v1"
202
  lang = ${nlp.lang}
 
17
  after_pipeline_creation = null
18
  batch_size = 64
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
 
46
  normalize = false
47
 
48
  [components.morphologizer.model.tok2vec]
49
+ @architectures = "spacy-curated-transformers.LastTransformerLayerListener.v1"
50
+ width = ${components.transformer.model.hidden_width}
51
  upstream = "transformer"
52
  pooling = {"@layers":"reduce_mean.v1"}
53
+ grad_factor = 1.0
54
 
55
  [components.parser]
56
  factory = "parser"
 
70
  nO = null
71
 
72
  [components.parser.model.tok2vec]
73
+ @architectures = "spacy-curated-transformers.LastTransformerLayerListener.v1"
74
+ width = ${components.transformer.model.hidden_width}
75
  upstream = "transformer"
76
  pooling = {"@layers":"reduce_mean.v1"}
77
+ grad_factor = 1.0
78
 
79
  [components.transformer]
80
+ factory = "curated_transformer"
81
+ all_layer_outputs = false
82
+ frozen = false
83
 
84
  [components.transformer.model]
85
+ @architectures = "spacy-curated-transformers.CamembertTransformer.v1"
86
+ vocab_size = 32005
87
+ hidden_width = 768
88
+ piece_encoder = {"@architectures":"spacy-curated-transformers.CamembertSentencepieceEncoder.v1"}
89
+ attention_probs_dropout_prob = 0.1
90
+ hidden_act = "gelu"
91
+ hidden_dropout_prob = 0.1
92
+ intermediate_width = 3072
93
+ layer_norm_eps = 0.00001
94
+ max_position_embeddings = 514
95
+ model_max_length = 512
96
+ num_attention_heads = 12
97
+ num_hidden_layers = 12
98
+ padding_idx = 1
99
+ type_vocab_size = 1
100
  mixed_precision = false
101
+ torchscript = false
102
+ wrapped_listener = null
 
 
 
103
 
104
  [components.transformer.model.grad_scaler_config]
105
 
106
+ [components.transformer.model.with_spans]
107
+ @architectures = "spacy-curated-transformers.WithStridedSpans.v1"
108
+ stride = 128
109
+ window = 168
110
+ batch_size = 384
111
 
112
  [corpora]
113
 
 
144
  before_update = null
145
 
146
  [training.batcher]
147
+ @batchers = "spacy.batch_by_words.v1"
148
+ discard_oversize = false
 
149
  size = 2000
150
+ tolerance = 0.2
151
+ get_length = null
152
 
153
  [training.logger]
154
  @loggers = "spacy.ConsoleLogger.v1"
 
211
  path = "corpus/labels/parser.json"
212
  require = false
213
 
214
+ [initialize.components.transformer]
215
+
216
+ [initialize.components.transformer.encoder_loader]
217
+ @model_loaders = "spacy-curated-transformers.HFTransformerEncoderLoader.v1"
218
+ name = "camembert-base"
219
+ revision = "main"
220
+
221
+ [initialize.components.transformer.piecer_loader]
222
+ @model_loaders = "spacy-curated-transformers.HFPieceEncoderLoader.v1"
223
+ name = "camembert-base"
224
+ revision = "main"
225
+
226
  [initialize.lookups]
227
  @misc = "spacy.LookupsDataLoader.v1"
228
  lang = ${nlp.lang}
fr_dep_news_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7632cf11044038b8428e50ebad9a022393ec8304e80379222ad839d28789895c
3
- size 400732603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9977592dee6ec11b9e0d11489b1df1cfd292a78b14b0b630d28993510e21faf8
3
+ size 397752313
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"fr",
3
  "name":"dep_news_trf",
4
- "version":"3.6.1",
5
- "description":"French transformer pipeline (camembert-base). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
- "spacy_version":">=3.6.0,<3.7.0",
11
- "spacy_git_version":"c067b5264",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -284,56 +284,56 @@
284
  "token_p":0.9844389844,
285
  "token_r":0.9896058454,
286
  "token_f":0.9870156531,
287
- "pos_acc":0.9862335654,
288
- "morph_acc":0.9794824209,
289
- "morph_micro_p":0.9940108782,
290
- "morph_micro_r":0.9909223833,
291
- "morph_micro_f":0.992464228,
292
  "morph_per_feat":{
293
  "Definite":{
294
- "p":0.9992679356,
295
  "r":0.996350365,
296
- "f":0.9978070175
297
  },
298
  "Number":{
299
- "p":0.9957572404,
300
- "r":0.9937407953,
301
- "f":0.9947479959
302
  },
303
  "PronType":{
304
- "p":0.9993564994,
305
- "r":0.9936020473,
306
- "f":0.9964709657
307
  },
308
  "Gender":{
309
- "p":0.990223823,
310
- "r":0.9836442627,
311
- "f":0.9869230769
312
  },
313
  "Mood":{
314
- "p":0.9964412811,
315
- "r":0.9946714032,
316
- "f":0.9955555556
317
  },
318
  "Person":{
319
- "p":0.9962121212,
320
- "r":0.9924528302,
321
- "f":0.9943289225
322
  },
323
  "Tense":{
324
- "p":0.9878419453,
325
  "r":0.9959141982,
326
- "f":0.991861648
327
  },
328
  "VerbForm":{
329
- "p":0.9909315746,
330
- "r":0.9950331126,
331
- "f":0.9929781082
332
  },
333
  "NumType":{
334
- "p":0.9928315412,
335
- "r":0.9453924915,
336
- "f":0.9685314685
337
  },
338
  "Reflex":{
339
  "p":1.0,
@@ -341,9 +341,9 @@
341
  "f":1.0
342
  },
343
  "Voice":{
344
- "p":0.9568965517,
345
  "r":0.9910714286,
346
- "f":0.9736842105
347
  },
348
  "Poss":{
349
  "p":1.0,
@@ -356,76 +356,76 @@
356
  "f":1.0
357
  }
358
  },
359
- "sents_p":0.9247058824,
360
- "sents_r":0.9538834951,
361
- "sents_f":0.9390681004,
362
- "dep_uas":0.9435381233,
363
- "dep_las":0.9227477738,
364
  "dep_las_per_type":{
365
  "det":{
366
- "p":0.9903069467,
367
- "r":0.9895076675,
368
- "f":0.9899071457
369
  },
370
  "nsubj":{
371
- "p":0.9587378641,
372
- "r":0.9518072289,
373
- "f":0.9552599758
374
  },
375
  "aux:tense":{
376
- "p":0.968503937,
377
  "r":0.984,
378
- "f":0.9761904762
379
  },
380
  "root":{
381
- "p":0.9411764706,
382
- "r":0.9708737864,
383
- "f":0.9557945042
384
  },
385
  "obj":{
386
- "p":0.9447852761,
387
- "r":0.9139465875,
388
- "f":0.9291101056
389
  },
390
  "cc":{
391
- "p":0.9403669725,
392
- "r":0.9447004608,
393
- "f":0.9425287356
394
  },
395
  "case":{
396
- "p":0.9823609227,
397
- "r":0.9863760218,
398
- "f":0.984364378
399
  },
400
  "obl:mod":{
401
- "p":0.8204334365,
402
- "r":0.7910447761,
403
- "f":0.8054711246
404
  },
405
  "nmod":{
406
- "p":0.8779296875,
407
- "r":0.8981018981,
408
- "f":0.8879012346
409
  },
410
  "conj":{
411
- "p":0.76171875,
412
- "r":0.7677165354,
413
- "f":0.7647058824
414
  },
415
  "nummod":{
416
- "p":0.9101796407,
417
- "r":0.899408284,
418
- "f":0.9047619048
419
  },
420
  "amod":{
421
- "p":0.9666048237,
422
- "r":0.9489981785,
423
- "f":0.9577205882
424
  },
425
  "acl":{
426
- "p":0.8620689655,
427
- "r":0.8670520231,
428
- "f":0.8645533141
429
  },
430
  "mark":{
431
  "p":0.9688888889,
@@ -433,99 +433,99 @@
433
  "f":0.9646017699
434
  },
435
  "xcomp":{
436
- "p":0.9090909091,
437
- "r":0.9271523179,
438
- "f":0.9180327869
439
  },
440
  "flat:name":{
441
- "p":0.9619047619,
442
- "r":0.9619047619,
443
- "f":0.9619047619
444
  },
445
  "cop":{
446
- "p":0.9550561798,
447
  "r":0.9444444444,
448
- "f":0.9497206704
449
  },
450
  "advmod":{
451
- "p":0.9344262295,
452
- "r":0.8934169279,
453
- "f":0.9134615385
454
  },
455
  "obl:arg":{
456
- "p":0.8873873874,
457
- "r":0.8954545455,
458
- "f":0.8914027149
459
  },
460
  "appos":{
461
- "p":0.6987951807,
462
- "r":0.6987951807,
463
- "f":0.6987951807
464
  },
465
  "nsubj:pass":{
466
- "p":1.0,
467
- "r":0.9647058824,
468
- "f":0.9820359281
469
  },
470
  "aux:pass":{
471
- "p":0.9910714286,
472
  "r":0.9910714286,
473
- "f":0.9910714286
474
  },
475
  "acl:relcl":{
476
- "p":0.8588235294,
477
- "r":0.8488372093,
478
- "f":0.8538011696
479
  },
480
  "advcl":{
481
- "p":0.7179487179,
482
- "r":0.7179487179,
483
- "f":0.7179487179
484
  },
485
  "fixed":{
486
- "p":0.9186046512,
487
- "r":0.79,
488
- "f":0.8494623656
489
  },
490
  "dep":{
491
- "p":0.2537313433,
492
  "r":0.5862068966,
493
- "f":0.3541666667
494
  },
495
  "expl:subj":{
496
- "p":0.8235294118,
497
- "r":0.875,
498
- "f":0.8484848485
499
  },
500
  "expl:comp":{
501
- "p":0.7435897436,
502
- "r":0.9666666667,
503
- "f":0.8405797101
504
  },
505
  "expl:pass":{
506
- "p":0.75,
507
- "r":0.4285714286,
508
- "f":0.5454545455
509
- },
510
- "obl:agent":{
511
- "p":0.8695652174,
512
- "r":0.9523809524,
513
- "f":0.9090909091
514
  },
515
  "ccomp":{
516
- "p":0.8888888889,
517
- "r":0.9411764706,
518
- "f":0.9142857143
519
  },
520
  "parataxis":{
521
- "p":0.75,
522
- "r":0.6428571429,
523
- "f":0.6923076923
524
  },
525
  "iobj":{
526
- "p":0.8,
527
- "r":0.64,
528
- "f":0.7111111111
 
 
 
 
 
529
  },
530
  "nsubj:caus":{
531
  "p":0.0,
@@ -548,9 +548,9 @@
548
  "f":0.0
549
  },
550
  "vocative":{
551
- "p":0.7142857143,
552
  "r":0.625,
553
- "f":0.6666666667
554
  },
555
  "dislocated":{
556
  "p":0.0,
@@ -558,9 +558,9 @@
558
  "f":0.0
559
  },
560
  "flat:foreign":{
561
- "p":0.75,
562
- "r":0.4285714286,
563
- "f":0.5454545455
564
  },
565
  "orphan":{
566
  "p":0.0,
@@ -578,9 +578,9 @@
578
  "f":0.0
579
  }
580
  },
581
- "tag_acc":0.9571539056,
582
- "lemma_acc":0.9171804357,
583
- "speed":601.7641603853
584
  },
585
  "sources":[
586
  {
@@ -603,7 +603,7 @@
603
  }
604
  ],
605
  "requirements":[
606
- "spacy-transformers>=1.2.2,<1.3.0",
607
  "sentencepiece>=0.1.91,!=0.1.92",
608
  "protobuf<3.21.0"
609
  ]
 
1
  {
2
  "lang":"fr",
3
  "name":"dep_news_trf",
4
+ "version":"3.7.2",
5
+ "description":"French transformer pipeline (Transformer(name='camembert-base', piece_encoder='camembert-sentencepiece', stride=128, type='camembert', width=768, window=168, vocab_size=32005)). Components: transformer, morphologizer, parser, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
+ "spacy_version":">=3.7.0,<3.8.0",
11
+ "spacy_git_version":"6b4f77441",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
284
  "token_p":0.9844389844,
285
  "token_r":0.9896058454,
286
  "token_f":0.9870156531,
287
+ "pos_acc":0.9886076602,
288
+ "morph_acc":0.9816475925,
289
+ "morph_micro_p":0.9945065006,
290
+ "morph_micro_r":0.9926282442,
291
+ "morph_micro_f":0.9935664847,
292
  "morph_per_feat":{
293
  "Definite":{
294
+ "p":0.9978070175,
295
  "r":0.996350365,
296
+ "f":0.9970781592
297
  },
298
  "Number":{
299
+ "p":0.996126176,
300
+ "r":0.9941089838,
301
+ "f":0.9951165576
302
  },
303
  "PronType":{
304
+ "p":0.9980756895,
305
+ "r":0.9955214331,
306
+ "f":0.996796925
307
  },
308
  "Gender":{
309
+ "p":0.9907668633,
310
+ "r":0.9872220802,
311
+ "f":0.9889912954
312
  },
313
  "Mood":{
314
+ "p":0.9893428064,
315
+ "r":0.9893428064,
316
+ "f":0.9893428064
317
  },
318
  "Person":{
319
+ "p":0.998738966,
320
+ "r":0.9962264151,
321
+ "f":0.9974811083
322
  },
323
  "Tense":{
324
+ "p":0.9888438134,
325
  "r":0.9959141982,
326
+ "f":0.9923664122
327
  },
328
  "VerbForm":{
329
+ "p":0.9933938893,
330
+ "r":0.9958609272,
331
+ "f":0.9946258785
332
  },
333
  "NumType":{
334
+ "p":1.0,
335
+ "r":0.9726962457,
336
+ "f":0.9861591696
337
  },
338
  "Reflex":{
339
  "p":1.0,
 
341
  "f":1.0
342
  },
343
  "Voice":{
344
+ "p":0.9910714286,
345
  "r":0.9910714286,
346
+ "f":0.9910714286
347
  },
348
  "Poss":{
349
  "p":1.0,
 
356
  "f":1.0
357
  }
358
  },
359
+ "sents_p":0.914893617,
360
+ "sents_r":0.9393203883,
361
+ "sents_f":0.9269461078,
362
+ "dep_uas":0.9483856035,
363
+ "dep_las":0.9265922369,
364
  "dep_las_per_type":{
365
  "det":{
366
+ "p":0.99031477,
367
+ "r":0.99031477,
368
+ "f":0.99031477
369
  },
370
  "nsubj":{
371
+ "p":0.95215311,
372
+ "r":0.9590361446,
373
+ "f":0.9555822329
374
  },
375
  "aux:tense":{
376
+ "p":0.9389312977,
377
  "r":0.984,
378
+ "f":0.9609375
379
  },
380
  "root":{
381
+ "p":0.9524940618,
382
+ "r":0.9733009709,
383
+ "f":0.962785114
384
  },
385
  "obj":{
386
+ "p":0.9420731707,
387
+ "r":0.9169139466,
388
+ "f":0.9293233083
389
  },
390
  "cc":{
391
+ "p":0.9497716895,
392
+ "r":0.9585253456,
393
+ "f":0.9541284404
394
  },
395
  "case":{
396
+ "p":0.987704918,
397
+ "r":0.985013624,
398
+ "f":0.9863574352
399
  },
400
  "obl:mod":{
401
+ "p":0.7959183673,
402
+ "r":0.8149253731,
403
+ "f":0.8053097345
404
  },
405
  "nmod":{
406
+ "p":0.8931750742,
407
+ "r":0.9020979021,
408
+ "f":0.8976143141
409
  },
410
  "conj":{
411
+ "p":0.7890625,
412
+ "r":0.7952755906,
413
+ "f":0.7921568627
414
  },
415
  "nummod":{
416
+ "p":0.921686747,
417
+ "r":0.9053254438,
418
+ "f":0.9134328358
419
  },
420
  "amod":{
421
+ "p":0.9626865672,
422
+ "r":0.9398907104,
423
+ "f":0.9511520737
424
  },
425
  "acl":{
426
+ "p":0.8728323699,
427
+ "r":0.8728323699,
428
+ "f":0.8728323699
429
  },
430
  "mark":{
431
  "p":0.9688888889,
 
433
  "f":0.9646017699
434
  },
435
  "xcomp":{
436
+ "p":0.9276315789,
437
+ "r":0.9337748344,
438
+ "f":0.9306930693
439
  },
440
  "flat:name":{
441
+ "p":0.9519230769,
442
+ "r":0.9428571429,
443
+ "f":0.9473684211
444
  },
445
  "cop":{
446
+ "p":0.9444444444,
447
  "r":0.9444444444,
448
+ "f":0.9444444444
449
  },
450
  "advmod":{
451
+ "p":0.9417475728,
452
+ "r":0.9122257053,
453
+ "f":0.9267515924
454
  },
455
  "obl:arg":{
456
+ "p":0.8976744186,
457
+ "r":0.8772727273,
458
+ "f":0.8873563218
459
  },
460
  "appos":{
461
+ "p":0.7058823529,
462
+ "r":0.7228915663,
463
+ "f":0.7142857143
464
  },
465
  "nsubj:pass":{
466
+ "p":0.9880952381,
467
+ "r":0.9764705882,
468
+ "f":0.9822485207
469
  },
470
  "aux:pass":{
471
+ "p":0.982300885,
472
  "r":0.9910714286,
473
+ "f":0.9866666667
474
  },
475
  "acl:relcl":{
476
+ "p":0.8863636364,
477
+ "r":0.9069767442,
478
+ "f":0.8965517241
479
  },
480
  "advcl":{
481
+ "p":0.7375,
482
+ "r":0.7564102564,
483
+ "f":0.746835443
484
  },
485
  "fixed":{
486
+ "p":0.9130434783,
487
+ "r":0.84,
488
+ "f":0.875
489
  },
490
  "dep":{
491
+ "p":0.298245614,
492
  "r":0.5862068966,
493
+ "f":0.3953488372
494
  },
495
  "expl:subj":{
496
+ "p":0.8888888889,
497
+ "r":0.75,
498
+ "f":0.813559322
499
  },
500
  "expl:comp":{
501
+ "p":0.7058823529,
502
+ "r":0.8,
503
+ "f":0.75
504
  },
505
  "expl:pass":{
506
+ "p":0.2222222222,
507
+ "r":0.2857142857,
508
+ "f":0.25
 
 
 
 
 
509
  },
510
  "ccomp":{
511
+ "p":0.98,
512
+ "r":0.9607843137,
513
+ "f":0.9702970297
514
  },
515
  "parataxis":{
516
+ "p":0.6538461538,
517
+ "r":0.6071428571,
518
+ "f":0.6296296296
519
  },
520
  "iobj":{
521
+ "p":0.65,
522
+ "r":0.52,
523
+ "f":0.5777777778
524
+ },
525
+ "obl:agent":{
526
+ "p":1.0,
527
+ "r":0.9285714286,
528
+ "f":0.962962963
529
  },
530
  "nsubj:caus":{
531
  "p":0.0,
 
548
  "f":0.0
549
  },
550
  "vocative":{
551
+ "p":1.0,
552
  "r":0.625,
553
+ "f":0.7692307692
554
  },
555
  "dislocated":{
556
  "p":0.0,
 
558
  "f":0.0
559
  },
560
  "flat:foreign":{
561
+ "p":1.0,
562
+ "r":1.0,
563
+ "f":1.0
564
  },
565
  "orphan":{
566
  "p":0.0,
 
578
  "f":0.0
579
  }
580
  },
581
+ "tag_acc":0.9595339966,
582
+ "lemma_acc":0.9172831895,
583
+ "speed":483.9219438961
584
  },
585
  "sources":[
586
  {
 
603
  }
604
  ],
605
  "requirements":[
606
+ "spacy-curated-transformers>=0.2.0,<0.3.0",
607
  "sentencepiece>=0.1.91,!=0.1.92",
608
  "protobuf<3.21.0"
609
  ]
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63a60dbc3fbb4d0a57e829c797e4b41d6c75c510badafaad2a62c356d28151bd
3
- size 603544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba70ca568390ce201ca97738dfa5b8af389f207b50a7d258a67650618f1ac86
3
+ size 603628
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16f1f86e9c4ac971cf95cc93e0e3642972756d7a0403eec5ccbef3cb165d7464
3
- size 397805
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d206798eb8c88b358f212f9b220e28d210afd26f2573076de8d0a8bd8655a5
3
+ size 397889
transformer/cfg CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "max_batch_items":4096
3
  }
 
1
  {
2
+
3
  }
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0fec091d1dcbc56e6f8a6502ec41bbd53598a8cd1b647722277b0e554f86bd7
3
- size 445799893
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8caad1ae9022468a0be9e2ab76b0dbaeffb3aebd846435dc175dc81b3a06692
3
+ size 440995064
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97ef211af70fba595843574fdd67f48f9683a67804bceca3df092c3d907fe6c3
3
- size 228640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87f36f8467d267264a8c5b4f2878759c28044581851ba683f04220eb4c8e524c
3
+ size 228660