adrianeboyd commited on
Commit
4752428
1 Parent(s): 6185031

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,62 +14,62 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.812993431
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8100156119
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8115017896
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9334226528
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
- value: 0.9620735855
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
- value: 0.952582208
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
- value: 0.9028976572
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
- value: 0.8742764529
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
- value: 0.8295618959
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
- value: 0.8719806763
73
  ---
74
  ### Details: https://spacy.io/models/fr#fr_core_news_sm
75
 
@@ -78,8 +78,8 @@ French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `fr_core_news_sm` |
81
- | **Version** | `3.4.0` |
82
- | **spaCy** | `>=3.4.0,<3.5.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -105,22 +105,22 @@ French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
105
 
106
  | Type | Score |
107
  | --- | --- |
108
- | `TOKEN_ACC` | 99.90 |
109
  | `TOKEN_P` | 98.44 |
110
  | `TOKEN_R` | 98.96 |
111
  | `TOKEN_F` | 98.70 |
112
- | `POS_ACC` | 96.21 |
113
- | `MORPH_ACC` | 95.26 |
114
- | `MORPH_MICRO_P` | 97.82 |
115
- | `MORPH_MICRO_R` | 96.71 |
116
- | `MORPH_MICRO_F` | 97.26 |
117
- | `SENTS_P` | 86.78 |
118
- | `SENTS_R` | 87.92 |
119
- | `SENTS_F` | 87.20 |
120
- | `DEP_UAS` | 87.43 |
121
- | `DEP_LAS` | 82.96 |
122
- | `TAG_ACC` | 93.34 |
123
- | `LEMMA_ACC` | 90.29 |
124
- | `ENTS_P` | 81.30 |
125
- | `ENTS_R` | 81.00 |
126
- | `ENTS_F` | 81.15 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.8148438757
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.8106360834
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8127345333
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.933216531
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
+ value: 0.9617644028
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
+ value: 0.9529502705
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
+ value: 0.9084463625
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
+ value: 0.8781984485
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
+ value: 0.8347514036
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
+ value: 0.861278649
73
  ---
74
  ### Details: https://spacy.io/models/fr#fr_core_news_sm
75
 
 
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `fr_core_news_sm` |
81
+ | **Version** | `3.5.0` |
82
+ | **spaCy** | `>=3.5.0,<3.6.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
105
 
106
  | Type | Score |
107
  | --- | --- |
108
+ | `TOKEN_ACC` | 99.80 |
109
  | `TOKEN_P` | 98.44 |
110
  | `TOKEN_R` | 98.96 |
111
  | `TOKEN_F` | 98.70 |
112
+ | `POS_ACC` | 96.18 |
113
+ | `MORPH_ACC` | 95.30 |
114
+ | `MORPH_MICRO_P` | 97.96 |
115
+ | `MORPH_MICRO_R` | 96.64 |
116
+ | `MORPH_MICRO_F` | 97.29 |
117
+ | `SENTS_P` | 85.61 |
118
+ | `SENTS_R` | 86.65 |
119
+ | `SENTS_F` | 86.13 |
120
+ | `DEP_UAS` | 87.82 |
121
+ | `DEP_LAS` | 83.48 |
122
+ | `TAG_ACC` | 93.32 |
123
+ | `LEMMA_ACC` | 90.84 |
124
+ | `ENTS_P` | 81.48 |
125
+ | `ENTS_R` | 81.06 |
126
+ | `ENTS_F` | 81.27 |
accuracy.json CHANGED
@@ -1,68 +1,68 @@
1
  {
2
- "token_acc": 0.9989751998,
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
- "pos_acc": 0.9620735855,
7
- "morph_acc": 0.952582208,
8
- "morph_micro_p": 0.978246133,
9
- "morph_micro_r": 0.967101255,
10
- "morph_micro_f": 0.9726417696,
11
  "morph_per_feat": {
12
  "Definite": {
13
- "p": 0.9846378932,
14
- "r": 0.9824817518,
15
- "f": 0.9835586408
16
  },
17
  "Number": {
18
- "p": 0.9907063197,
19
- "r": 0.9812223859,
20
- "f": 0.9859415464
21
  },
22
  "PronType": {
23
- "p": 0.995483871,
24
- "r": 0.9872040947,
25
- "f": 0.9913266945
26
  },
27
  "Gender": {
28
- "p": 0.9706563707,
29
- "r": 0.9637107079,
30
- "f": 0.9671710695
31
  },
32
  "Mood": {
33
- "p": 0.9539594843,
34
- "r": 0.920071048,
35
- "f": 0.9367088608
36
  },
37
  "Person": {
38
- "p": 0.9779507134,
39
- "r": 0.948427673,
40
- "f": 0.962962963
41
  },
42
  "Tense": {
43
- "p": 0.9447916667,
44
- "r": 0.9264555669,
45
- "f": 0.9355337803
46
  },
47
  "VerbForm": {
48
- "p": 0.9586846543,
49
- "r": 0.9412251656,
50
- "f": 0.9498746867
51
  },
52
  "NumType": {
53
- "p": 0.9929328622,
54
  "r": 0.9590443686,
55
- "f": 0.9756944444
56
  },
57
  "Reflex": {
58
- "p": 1.0,
59
- "r": 1.0,
60
- "f": 1.0
61
  },
62
  "Voice": {
63
- "p": 0.8793103448,
64
- "r": 0.9107142857,
65
- "f": 0.8947368421
66
  },
67
  "Poss": {
68
  "p": 1.0,
@@ -70,181 +70,181 @@
70
  "f": 1.0
71
  },
72
  "Polarity": {
73
- "p": 0.9882352941,
74
  "r": 0.9882352941,
75
- "f": 0.9882352941
76
  }
77
  },
78
- "sents_p": 0.8677884615,
79
- "sents_r": 0.8791762895,
80
- "sents_f": 0.8719806763,
81
- "dep_uas": 0.8742764529,
82
- "dep_las": 0.8295618959,
83
  "dep_las_per_type": {
84
  "det": {
85
- "p": 0.9675587997,
86
  "r": 0.9628732849,
87
- "f": 0.965210356
88
  },
89
  "nsubj": {
90
- "p": 0.8341463415,
91
- "r": 0.8240963855,
92
- "f": 0.8290909091
93
  },
94
  "aux:tense": {
95
- "p": 0.8976377953,
96
- "r": 0.912,
97
- "f": 0.9047619048
98
  },
99
  "root": {
100
- "p": 0.8474576271,
101
- "r": 0.8495145631,
102
- "f": 0.8484848485
103
  },
104
  "obj": {
105
- "p": 0.8058823529,
106
- "r": 0.8130563798,
107
- "f": 0.8094534712
108
  },
109
  "cc": {
110
- "p": 0.8630136986,
111
- "r": 0.8709677419,
112
- "f": 0.8669724771
113
  },
114
  "case": {
115
- "p": 0.9569603228,
116
- "r": 0.969346049,
117
- "f": 0.9631133672
118
  },
119
  "obl:mod": {
120
- "p": 0.6314102564,
121
  "r": 0.5880597015,
122
- "f": 0.6089644513
123
  },
124
  "nmod": {
125
- "p": 0.7830985915,
126
  "r": 0.8331668332,
127
- "f": 0.807357212
128
  },
129
  "conj": {
130
- "p": 0.4836065574,
131
- "r": 0.4645669291,
132
- "f": 0.4738955823
133
  },
134
  "nummod": {
135
- "p": 0.917721519,
136
- "r": 0.8579881657,
137
- "f": 0.8868501529
138
  },
139
  "amod": {
140
- "p": 0.8621323529,
141
- "r": 0.85428051,
142
- "f": 0.8581884721
143
  },
144
  "acl": {
145
- "p": 0.6503067485,
146
- "r": 0.612716763,
147
- "f": 0.630952381
148
  },
149
  "mark": {
150
- "p": 0.8812785388,
151
- "r": 0.8502202643,
152
- "f": 0.865470852
153
  },
154
  "xcomp": {
155
- "p": 0.7388535032,
156
- "r": 0.7682119205,
157
- "f": 0.7532467532
158
  },
159
  "flat:name": {
160
- "p": 0.8878504673,
161
- "r": 0.9047619048,
162
- "f": 0.8962264151
163
  },
164
  "cop": {
165
- "p": 0.8505747126,
166
- "r": 0.8222222222,
167
- "f": 0.8361581921
168
  },
169
  "advmod": {
170
- "p": 0.8161290323,
171
- "r": 0.7931034483,
172
- "f": 0.8044515103
173
  },
174
  "obl:arg": {
175
- "p": 0.6601941748,
176
- "r": 0.6181818182,
177
- "f": 0.6384976526
178
  },
179
  "appos": {
180
- "p": 0.4444444444,
181
- "r": 0.4337349398,
182
- "f": 0.4390243902
183
  },
184
  "nsubj:pass": {
185
- "p": 0.8292682927,
186
- "r": 0.8,
187
- "f": 0.8143712575
188
  },
189
  "aux:pass": {
190
- "p": 0.9035087719,
191
- "r": 0.9196428571,
192
- "f": 0.9115044248
193
  },
194
  "acl:relcl": {
195
- "p": 0.6626506024,
196
- "r": 0.6395348837,
197
- "f": 0.650887574
198
  },
199
  "advcl": {
200
- "p": 0.5057471264,
201
- "r": 0.5641025641,
202
- "f": 0.5333333333
203
  },
204
  "fixed": {
205
- "p": 0.8452380952,
206
- "r": 0.71,
207
- "f": 0.7717391304
208
  },
209
  "dep": {
210
- "p": 0.2777777778,
211
- "r": 0.5172413793,
212
- "f": 0.3614457831
213
  },
214
  "expl:subj": {
215
- "p": 0.7647058824,
216
- "r": 0.8125,
217
- "f": 0.7878787879
218
  },
219
  "expl:comp": {
220
- "p": 0.6666666667,
221
- "r": 0.8666666667,
222
- "f": 0.7536231884
223
  },
224
  "expl:pass": {
225
- "p": 0.25,
226
- "r": 0.1428571429,
227
- "f": 0.1818181818
228
- },
229
- "obl:agent": {
230
- "p": 0.8139534884,
231
- "r": 0.8333333333,
232
- "f": 0.8235294118
233
  },
234
  "ccomp": {
235
- "p": 0.7391304348,
236
- "r": 0.6666666667,
237
- "f": 0.7010309278
 
 
 
 
 
238
  },
239
  "parataxis": {
240
- "p": 0.5416666667,
241
- "r": 0.4642857143,
242
- "f": 0.5
243
  },
244
  "iobj": {
245
- "p": 0.6,
246
- "r": 0.48,
247
- "f": 0.5333333333
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
@@ -267,9 +267,9 @@
267
  "f": 0.0
268
  },
269
  "vocative": {
270
- "p": 1.0,
271
  "r": 0.625,
272
- "f": 0.7692307692
273
  },
274
  "dislocated": {
275
  "p": 0.0,
@@ -277,9 +277,9 @@
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
- "p": 1.0,
281
- "r": 0.2857142857,
282
- "f": 0.4444444444
283
  },
284
  "orphan": {
285
  "p": 0.0,
@@ -297,32 +297,32 @@
297
  "f": 0.0
298
  }
299
  },
300
- "tag_acc": 0.9334226528,
301
- "lemma_acc": 0.9028976572,
302
- "ents_p": 0.812993431,
303
- "ents_r": 0.8100156119,
304
- "ents_f": 0.8115017896,
305
  "ents_per_type": {
306
- "LOC": {
307
- "p": 0.8247443931,
308
- "r": 0.8368192086,
309
- "f": 0.8307379262
310
- },
311
  "PER": {
312
- "p": 0.8687411598,
313
- "r": 0.8801318335,
314
- "f": 0.8743994021
 
 
 
 
 
315
  },
316
  "ORG": {
317
- "p": 0.7622446956,
318
- "r": 0.7335877863,
319
- "f": 0.7476417388
320
  },
321
  "MISC": {
322
- "p": 0.6840694006,
323
- "r": 0.6323079166,
324
- "f": 0.6571709978
325
  }
326
  },
327
- "speed": 4128.958361591
328
  }
 
1
  {
2
+ "token_acc": 0.997952498,
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
+ "pos_acc": 0.9617644028,
7
+ "morph_acc": 0.9529502705,
8
+ "morph_micro_p": 0.9796195652,
9
+ "morph_micro_r": 0.9663701718,
10
+ "morph_micro_f": 0.9729497638,
11
  "morph_per_feat": {
12
  "Definite": {
13
+ "p": 0.986090776,
14
+ "r": 0.9832116788,
15
+ "f": 0.9846491228
16
  },
17
  "Number": {
18
+ "p": 0.9897655378,
19
+ "r": 0.979197349,
20
+ "f": 0.9844530816
21
  },
22
  "PronType": {
23
+ "p": 0.9935525467,
24
+ "r": 0.9859245042,
25
+ "f": 0.9897238279
26
  },
27
  "Gender": {
28
+ "p": 0.9710519514,
29
+ "r": 0.9601328904,
30
+ "f": 0.9655615523
31
  },
32
  "Mood": {
33
+ "p": 0.9597806216,
34
+ "r": 0.9325044405,
35
+ "f": 0.9459459459
36
  },
37
  "Person": {
38
+ "p": 0.9818181818,
39
+ "r": 0.9509433962,
40
+ "f": 0.9661341853
41
  },
42
  "Tense": {
43
+ "p": 0.9538784067,
44
+ "r": 0.9295199183,
45
+ "f": 0.9415416451
46
  },
47
  "VerbForm": {
48
+ "p": 0.9686174724,
49
+ "r": 0.9453642384,
50
+ "f": 0.956849602
51
  },
52
  "NumType": {
53
+ "p": 0.9790940767,
54
  "r": 0.9590443686,
55
+ "f": 0.9689655172
56
  },
57
  "Reflex": {
58
+ "p": 0.9772727273,
59
+ "r": 0.9772727273,
60
+ "f": 0.9772727273
61
  },
62
  "Voice": {
63
+ "p": 0.9090909091,
64
+ "r": 0.8928571429,
65
+ "f": 0.9009009009
66
  },
67
  "Poss": {
68
  "p": 1.0,
 
70
  "f": 1.0
71
  },
72
  "Polarity": {
73
+ "p": 1.0,
74
  "r": 0.9882352941,
75
+ "f": 0.9940828402
76
  }
77
  },
78
+ "sents_p": 0.8561151079,
79
+ "sents_r": 0.8665048544,
80
+ "sents_f": 0.861278649,
81
+ "dep_uas": 0.8781984485,
82
+ "dep_las": 0.8347514036,
83
  "dep_las_per_type": {
84
  "det": {
85
+ "p": 0.9707078926,
86
  "r": 0.9628732849,
87
+ "f": 0.9667747164
88
  },
89
  "nsubj": {
90
+ "p": 0.853960396,
91
+ "r": 0.8313253012,
92
+ "f": 0.8424908425
93
  },
94
  "aux:tense": {
95
+ "p": 0.905511811,
96
+ "r": 0.92,
97
+ "f": 0.9126984127
98
  },
99
  "root": {
100
+ "p": 0.8490566038,
101
+ "r": 0.8737864078,
102
+ "f": 0.8612440191
103
  },
104
  "obj": {
105
+ "p": 0.8179012346,
106
+ "r": 0.7863501484,
107
+ "f": 0.8018154312
108
  },
109
  "cc": {
110
+ "p": 0.866359447,
111
+ "r": 0.866359447,
112
+ "f": 0.866359447
113
  },
114
  "case": {
115
+ "p": 0.9595959596,
116
+ "r": 0.9707084469,
117
+ "f": 0.9651202167
118
  },
119
  "obl:mod": {
120
+ "p": 0.6396103896,
121
  "r": 0.5880597015,
122
+ "f": 0.6127527216
123
  },
124
  "nmod": {
125
+ "p": 0.7996164909,
126
  "r": 0.8331668332,
127
+ "f": 0.8160469667
128
  },
129
  "conj": {
130
+ "p": 0.5,
131
+ "r": 0.5,
132
+ "f": 0.5
133
  },
134
  "nummod": {
135
+ "p": 0.9079754601,
136
+ "r": 0.875739645,
137
+ "f": 0.8915662651
138
  },
139
  "amod": {
140
+ "p": 0.8850364964,
141
+ "r": 0.883424408,
142
+ "f": 0.8842297174
143
  },
144
  "acl": {
145
+ "p": 0.6445783133,
146
+ "r": 0.6184971098,
147
+ "f": 0.6312684366
148
  },
149
  "mark": {
150
+ "p": 0.8590909091,
151
+ "r": 0.8325991189,
152
+ "f": 0.8456375839
153
  },
154
  "xcomp": {
155
+ "p": 0.8068965517,
156
+ "r": 0.7748344371,
157
+ "f": 0.7905405405
158
  },
159
  "flat:name": {
160
+ "p": 0.8691588785,
161
+ "r": 0.8857142857,
162
+ "f": 0.8773584906
163
  },
164
  "cop": {
165
+ "p": 0.8636363636,
166
+ "r": 0.8444444444,
167
+ "f": 0.8539325843
168
  },
169
  "advmod": {
170
+ "p": 0.8264984227,
171
+ "r": 0.8213166144,
172
+ "f": 0.8238993711
173
  },
174
  "obl:arg": {
175
+ "p": 0.6409090909,
176
+ "r": 0.6409090909,
177
+ "f": 0.6409090909
178
  },
179
  "appos": {
180
+ "p": 0.5063291139,
181
+ "r": 0.4819277108,
182
+ "f": 0.4938271605
183
  },
184
  "nsubj:pass": {
185
+ "p": 0.8295454545,
186
+ "r": 0.8588235294,
187
+ "f": 0.8439306358
188
  },
189
  "aux:pass": {
190
+ "p": 0.905982906,
191
+ "r": 0.9464285714,
192
+ "f": 0.9257641921
193
  },
194
  "acl:relcl": {
195
+ "p": 0.5697674419,
196
+ "r": 0.5697674419,
197
+ "f": 0.5697674419
198
  },
199
  "advcl": {
200
+ "p": 0.475,
201
+ "r": 0.4871794872,
202
+ "f": 0.4810126582
203
  },
204
  "fixed": {
205
+ "p": 0.8372093023,
206
+ "r": 0.72,
207
+ "f": 0.7741935484
208
  },
209
  "dep": {
210
+ "p": 0.2388059701,
211
+ "r": 0.5517241379,
212
+ "f": 0.3333333333
213
  },
214
  "expl:subj": {
215
+ "p": 0.6857142857,
216
+ "r": 0.75,
217
+ "f": 0.7164179104
218
  },
219
  "expl:comp": {
220
+ "p": 0.6388888889,
221
+ "r": 0.7666666667,
222
+ "f": 0.696969697
223
  },
224
  "expl:pass": {
225
+ "p": 0.5,
226
+ "r": 0.2857142857,
227
+ "f": 0.3636363636
 
 
 
 
 
228
  },
229
  "ccomp": {
230
+ "p": 0.7307692308,
231
+ "r": 0.7450980392,
232
+ "f": 0.7378640777
233
+ },
234
+ "obl:agent": {
235
+ "p": 0.8717948718,
236
+ "r": 0.8095238095,
237
+ "f": 0.8395061728
238
  },
239
  "parataxis": {
240
+ "p": 0.4285714286,
241
+ "r": 0.3214285714,
242
+ "f": 0.3673469388
243
  },
244
  "iobj": {
245
+ "p": 0.75,
246
+ "r": 0.6,
247
+ "f": 0.6666666667
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
 
267
  "f": 0.0
268
  },
269
  "vocative": {
270
+ "p": 0.8333333333,
271
  "r": 0.625,
272
+ "f": 0.7142857143
273
  },
274
  "dislocated": {
275
  "p": 0.0,
 
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
+ "p": 0.0,
281
+ "r": 0.0,
282
+ "f": 0.0
283
  },
284
  "orphan": {
285
  "p": 0.0,
 
297
  "f": 0.0
298
  }
299
  },
300
+ "tag_acc": 0.933216531,
301
+ "lemma_acc": 0.9084463625,
302
+ "ents_p": 0.8148438757,
303
+ "ents_r": 0.8106360834,
304
+ "ents_f": 0.8127345333,
305
  "ents_per_type": {
 
 
 
 
 
306
  "PER": {
307
+ "p": 0.8671110481,
308
+ "r": 0.8761195099,
309
+ "f": 0.8715920026
310
+ },
311
+ "LOC": {
312
+ "p": 0.8253083637,
313
+ "r": 0.8424663264,
314
+ "f": 0.8337990851
315
  },
316
  "ORG": {
317
+ "p": 0.7624443545,
318
+ "r": 0.7190839695,
319
+ "f": 0.7401296405
320
  },
321
  "MISC": {
322
+ "p": 0.6976186671,
323
+ "r": 0.6363901443,
324
+ "f": 0.6655992681
325
  }
326
  },
327
+ "speed": 4063.2423336374
328
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -70,8 +70,8 @@ nO = null
70
  [components.ner.model.tok2vec.embed]
71
  @architectures = "spacy.MultiHashEmbed.v2"
72
  width = 96
73
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
74
- rows = [5000,1000,2500,2500,50]
75
  include_static_vectors = false
76
 
77
  [components.ner.model.tok2vec.encode]
@@ -139,8 +139,8 @@ factory = "tok2vec"
139
  [components.tok2vec.model.embed]
140
  @architectures = "spacy.MultiHashEmbed.v2"
141
  width = ${components.tok2vec.model.encode:width}
142
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
143
- rows = [5000,1000,2500,2500,50]
144
  include_static_vectors = false
145
 
146
  [components.tok2vec.model.encode]
@@ -182,6 +182,7 @@ eval_frequency = 1000
182
  frozen_components = []
183
  before_to_disk = null
184
  annotating_components = []
 
185
 
186
  [training.batcher]
187
  @batchers = "spacy.batch_by_words.v1"
 
70
  [components.ner.model.tok2vec.embed]
71
  @architectures = "spacy.MultiHashEmbed.v2"
72
  width = 96
73
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
74
+ rows = [5000,1000,2500,2500]
75
  include_static_vectors = false
76
 
77
  [components.ner.model.tok2vec.encode]
 
139
  [components.tok2vec.model.embed]
140
  @architectures = "spacy.MultiHashEmbed.v2"
141
  width = ${components.tok2vec.model.encode:width}
142
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
143
+ rows = [5000,1000,2500,2500,50,50]
144
  include_static_vectors = false
145
 
146
  [components.tok2vec.model.encode]
 
182
  frozen_components = []
183
  before_to_disk = null
184
  annotating_components = []
185
+ before_update = null
186
 
187
  [training.batcher]
188
  @batchers = "spacy.batch_by_words.v1"
fr_core_news_sm-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2f89186633f13e2726c32bb032d8162dc6c0324241af16fc2c79533a9528dab
3
- size 16263913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:632607dab3a5253e5079e1b1a094cedc87185c2930e4405080d2e1810fbf9800
3
+ size 16263906
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"fr",
3
  "name":"core_news_sm",
4
- "version":"3.4.0",
5
  "description":"French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
- "spacy_version":">=3.4.0,<3.5.0",
11
- "spacy_git_version":"dd038b536",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -290,70 +290,70 @@
290
  "senter"
291
  ],
292
  "performance":{
293
- "token_acc":0.9989751998,
294
  "token_p":0.9844389844,
295
  "token_r":0.9896058454,
296
  "token_f":0.9870156531,
297
- "pos_acc":0.9620735855,
298
- "morph_acc":0.952582208,
299
- "morph_micro_p":0.978246133,
300
- "morph_micro_r":0.967101255,
301
- "morph_micro_f":0.9726417696,
302
  "morph_per_feat":{
303
  "Definite":{
304
- "p":0.9846378932,
305
- "r":0.9824817518,
306
- "f":0.9835586408
307
  },
308
  "Number":{
309
- "p":0.9907063197,
310
- "r":0.9812223859,
311
- "f":0.9859415464
312
  },
313
  "PronType":{
314
- "p":0.995483871,
315
- "r":0.9872040947,
316
- "f":0.9913266945
317
  },
318
  "Gender":{
319
- "p":0.9706563707,
320
- "r":0.9637107079,
321
- "f":0.9671710695
322
  },
323
  "Mood":{
324
- "p":0.9539594843,
325
- "r":0.920071048,
326
- "f":0.9367088608
327
  },
328
  "Person":{
329
- "p":0.9779507134,
330
- "r":0.948427673,
331
- "f":0.962962963
332
  },
333
  "Tense":{
334
- "p":0.9447916667,
335
- "r":0.9264555669,
336
- "f":0.9355337803
337
  },
338
  "VerbForm":{
339
- "p":0.9586846543,
340
- "r":0.9412251656,
341
- "f":0.9498746867
342
  },
343
  "NumType":{
344
- "p":0.9929328622,
345
  "r":0.9590443686,
346
- "f":0.9756944444
347
  },
348
  "Reflex":{
349
- "p":1.0,
350
- "r":1.0,
351
- "f":1.0
352
  },
353
  "Voice":{
354
- "p":0.8793103448,
355
- "r":0.9107142857,
356
- "f":0.8947368421
357
  },
358
  "Poss":{
359
  "p":1.0,
@@ -361,181 +361,181 @@
361
  "f":1.0
362
  },
363
  "Polarity":{
364
- "p":0.9882352941,
365
  "r":0.9882352941,
366
- "f":0.9882352941
367
  }
368
  },
369
- "sents_p":0.8677884615,
370
- "sents_r":0.8791762895,
371
- "sents_f":0.8719806763,
372
- "dep_uas":0.8742764529,
373
- "dep_las":0.8295618959,
374
  "dep_las_per_type":{
375
  "det":{
376
- "p":0.9675587997,
377
  "r":0.9628732849,
378
- "f":0.965210356
379
  },
380
  "nsubj":{
381
- "p":0.8341463415,
382
- "r":0.8240963855,
383
- "f":0.8290909091
384
  },
385
  "aux:tense":{
386
- "p":0.8976377953,
387
- "r":0.912,
388
- "f":0.9047619048
389
  },
390
  "root":{
391
- "p":0.8474576271,
392
- "r":0.8495145631,
393
- "f":0.8484848485
394
  },
395
  "obj":{
396
- "p":0.8058823529,
397
- "r":0.8130563798,
398
- "f":0.8094534712
399
  },
400
  "cc":{
401
- "p":0.8630136986,
402
- "r":0.8709677419,
403
- "f":0.8669724771
404
  },
405
  "case":{
406
- "p":0.9569603228,
407
- "r":0.969346049,
408
- "f":0.9631133672
409
  },
410
  "obl:mod":{
411
- "p":0.6314102564,
412
  "r":0.5880597015,
413
- "f":0.6089644513
414
  },
415
  "nmod":{
416
- "p":0.7830985915,
417
  "r":0.8331668332,
418
- "f":0.807357212
419
  },
420
  "conj":{
421
- "p":0.4836065574,
422
- "r":0.4645669291,
423
- "f":0.4738955823
424
  },
425
  "nummod":{
426
- "p":0.917721519,
427
- "r":0.8579881657,
428
- "f":0.8868501529
429
  },
430
  "amod":{
431
- "p":0.8621323529,
432
- "r":0.85428051,
433
- "f":0.8581884721
434
  },
435
  "acl":{
436
- "p":0.6503067485,
437
- "r":0.612716763,
438
- "f":0.630952381
439
  },
440
  "mark":{
441
- "p":0.8812785388,
442
- "r":0.8502202643,
443
- "f":0.865470852
444
  },
445
  "xcomp":{
446
- "p":0.7388535032,
447
- "r":0.7682119205,
448
- "f":0.7532467532
449
  },
450
  "flat:name":{
451
- "p":0.8878504673,
452
- "r":0.9047619048,
453
- "f":0.8962264151
454
  },
455
  "cop":{
456
- "p":0.8505747126,
457
- "r":0.8222222222,
458
- "f":0.8361581921
459
  },
460
  "advmod":{
461
- "p":0.8161290323,
462
- "r":0.7931034483,
463
- "f":0.8044515103
464
  },
465
  "obl:arg":{
466
- "p":0.6601941748,
467
- "r":0.6181818182,
468
- "f":0.6384976526
469
  },
470
  "appos":{
471
- "p":0.4444444444,
472
- "r":0.4337349398,
473
- "f":0.4390243902
474
  },
475
  "nsubj:pass":{
476
- "p":0.8292682927,
477
- "r":0.8,
478
- "f":0.8143712575
479
  },
480
  "aux:pass":{
481
- "p":0.9035087719,
482
- "r":0.9196428571,
483
- "f":0.9115044248
484
  },
485
  "acl:relcl":{
486
- "p":0.6626506024,
487
- "r":0.6395348837,
488
- "f":0.650887574
489
  },
490
  "advcl":{
491
- "p":0.5057471264,
492
- "r":0.5641025641,
493
- "f":0.5333333333
494
  },
495
  "fixed":{
496
- "p":0.8452380952,
497
- "r":0.71,
498
- "f":0.7717391304
499
  },
500
  "dep":{
501
- "p":0.2777777778,
502
- "r":0.5172413793,
503
- "f":0.3614457831
504
  },
505
  "expl:subj":{
506
- "p":0.7647058824,
507
- "r":0.8125,
508
- "f":0.7878787879
509
  },
510
  "expl:comp":{
511
- "p":0.6666666667,
512
- "r":0.8666666667,
513
- "f":0.7536231884
514
  },
515
  "expl:pass":{
516
- "p":0.25,
517
- "r":0.1428571429,
518
- "f":0.1818181818
519
- },
520
- "obl:agent":{
521
- "p":0.8139534884,
522
- "r":0.8333333333,
523
- "f":0.8235294118
524
  },
525
  "ccomp":{
526
- "p":0.7391304348,
527
- "r":0.6666666667,
528
- "f":0.7010309278
 
 
 
 
 
529
  },
530
  "parataxis":{
531
- "p":0.5416666667,
532
- "r":0.4642857143,
533
- "f":0.5
534
  },
535
  "iobj":{
536
- "p":0.6,
537
- "r":0.48,
538
- "f":0.5333333333
539
  },
540
  "nsubj:caus":{
541
  "p":0.0,
@@ -558,9 +558,9 @@
558
  "f":0.0
559
  },
560
  "vocative":{
561
- "p":1.0,
562
  "r":0.625,
563
- "f":0.7692307692
564
  },
565
  "dislocated":{
566
  "p":0.0,
@@ -568,9 +568,9 @@
568
  "f":0.0
569
  },
570
  "flat:foreign":{
571
- "p":1.0,
572
- "r":0.2857142857,
573
- "f":0.4444444444
574
  },
575
  "orphan":{
576
  "p":0.0,
@@ -588,34 +588,34 @@
588
  "f":0.0
589
  }
590
  },
591
- "tag_acc":0.9334226528,
592
- "lemma_acc":0.9028976572,
593
- "ents_p":0.812993431,
594
- "ents_r":0.8100156119,
595
- "ents_f":0.8115017896,
596
  "ents_per_type":{
597
- "LOC":{
598
- "p":0.8247443931,
599
- "r":0.8368192086,
600
- "f":0.8307379262
601
- },
602
  "PER":{
603
- "p":0.8687411598,
604
- "r":0.8801318335,
605
- "f":0.8743994021
 
 
 
 
 
606
  },
607
  "ORG":{
608
- "p":0.7622446956,
609
- "r":0.7335877863,
610
- "f":0.7476417388
611
  },
612
  "MISC":{
613
- "p":0.6840694006,
614
- "r":0.6323079166,
615
- "f":0.6571709978
616
  }
617
  },
618
- "speed":4128.958361591
619
  },
620
  "sources":[
621
  {
 
1
  {
2
  "lang":"fr",
3
  "name":"core_news_sm",
4
+ "version":"3.5.0",
5
  "description":"French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
+ "spacy_version":">=3.5.0,<3.6.0",
11
+ "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
290
  "senter"
291
  ],
292
  "performance":{
293
+ "token_acc":0.997952498,
294
  "token_p":0.9844389844,
295
  "token_r":0.9896058454,
296
  "token_f":0.9870156531,
297
+ "pos_acc":0.9617644028,
298
+ "morph_acc":0.9529502705,
299
+ "morph_micro_p":0.9796195652,
300
+ "morph_micro_r":0.9663701718,
301
+ "morph_micro_f":0.9729497638,
302
  "morph_per_feat":{
303
  "Definite":{
304
+ "p":0.986090776,
305
+ "r":0.9832116788,
306
+ "f":0.9846491228
307
  },
308
  "Number":{
309
+ "p":0.9897655378,
310
+ "r":0.979197349,
311
+ "f":0.9844530816
312
  },
313
  "PronType":{
314
+ "p":0.9935525467,
315
+ "r":0.9859245042,
316
+ "f":0.9897238279
317
  },
318
  "Gender":{
319
+ "p":0.9710519514,
320
+ "r":0.9601328904,
321
+ "f":0.9655615523
322
  },
323
  "Mood":{
324
+ "p":0.9597806216,
325
+ "r":0.9325044405,
326
+ "f":0.9459459459
327
  },
328
  "Person":{
329
+ "p":0.9818181818,
330
+ "r":0.9509433962,
331
+ "f":0.9661341853
332
  },
333
  "Tense":{
334
+ "p":0.9538784067,
335
+ "r":0.9295199183,
336
+ "f":0.9415416451
337
  },
338
  "VerbForm":{
339
+ "p":0.9686174724,
340
+ "r":0.9453642384,
341
+ "f":0.956849602
342
  },
343
  "NumType":{
344
+ "p":0.9790940767,
345
  "r":0.9590443686,
346
+ "f":0.9689655172
347
  },
348
  "Reflex":{
349
+ "p":0.9772727273,
350
+ "r":0.9772727273,
351
+ "f":0.9772727273
352
  },
353
  "Voice":{
354
+ "p":0.9090909091,
355
+ "r":0.8928571429,
356
+ "f":0.9009009009
357
  },
358
  "Poss":{
359
  "p":1.0,
 
361
  "f":1.0
362
  },
363
  "Polarity":{
364
+ "p":1.0,
365
  "r":0.9882352941,
366
+ "f":0.9940828402
367
  }
368
  },
369
+ "sents_p":0.8561151079,
370
+ "sents_r":0.8665048544,
371
+ "sents_f":0.861278649,
372
+ "dep_uas":0.8781984485,
373
+ "dep_las":0.8347514036,
374
  "dep_las_per_type":{
375
  "det":{
376
+ "p":0.9707078926,
377
  "r":0.9628732849,
378
+ "f":0.9667747164
379
  },
380
  "nsubj":{
381
+ "p":0.853960396,
382
+ "r":0.8313253012,
383
+ "f":0.8424908425
384
  },
385
  "aux:tense":{
386
+ "p":0.905511811,
387
+ "r":0.92,
388
+ "f":0.9126984127
389
  },
390
  "root":{
391
+ "p":0.8490566038,
392
+ "r":0.8737864078,
393
+ "f":0.8612440191
394
  },
395
  "obj":{
396
+ "p":0.8179012346,
397
+ "r":0.7863501484,
398
+ "f":0.8018154312
399
  },
400
  "cc":{
401
+ "p":0.866359447,
402
+ "r":0.866359447,
403
+ "f":0.866359447
404
  },
405
  "case":{
406
+ "p":0.9595959596,
407
+ "r":0.9707084469,
408
+ "f":0.9651202167
409
  },
410
  "obl:mod":{
411
+ "p":0.6396103896,
412
  "r":0.5880597015,
413
+ "f":0.6127527216
414
  },
415
  "nmod":{
416
+ "p":0.7996164909,
417
  "r":0.8331668332,
418
+ "f":0.8160469667
419
  },
420
  "conj":{
421
+ "p":0.5,
422
+ "r":0.5,
423
+ "f":0.5
424
  },
425
  "nummod":{
426
+ "p":0.9079754601,
427
+ "r":0.875739645,
428
+ "f":0.8915662651
429
  },
430
  "amod":{
431
+ "p":0.8850364964,
432
+ "r":0.883424408,
433
+ "f":0.8842297174
434
  },
435
  "acl":{
436
+ "p":0.6445783133,
437
+ "r":0.6184971098,
438
+ "f":0.6312684366
439
  },
440
  "mark":{
441
+ "p":0.8590909091,
442
+ "r":0.8325991189,
443
+ "f":0.8456375839
444
  },
445
  "xcomp":{
446
+ "p":0.8068965517,
447
+ "r":0.7748344371,
448
+ "f":0.7905405405
449
  },
450
  "flat:name":{
451
+ "p":0.8691588785,
452
+ "r":0.8857142857,
453
+ "f":0.8773584906
454
  },
455
  "cop":{
456
+ "p":0.8636363636,
457
+ "r":0.8444444444,
458
+ "f":0.8539325843
459
  },
460
  "advmod":{
461
+ "p":0.8264984227,
462
+ "r":0.8213166144,
463
+ "f":0.8238993711
464
  },
465
  "obl:arg":{
466
+ "p":0.6409090909,
467
+ "r":0.6409090909,
468
+ "f":0.6409090909
469
  },
470
  "appos":{
471
+ "p":0.5063291139,
472
+ "r":0.4819277108,
473
+ "f":0.4938271605
474
  },
475
  "nsubj:pass":{
476
+ "p":0.8295454545,
477
+ "r":0.8588235294,
478
+ "f":0.8439306358
479
  },
480
  "aux:pass":{
481
+ "p":0.905982906,
482
+ "r":0.9464285714,
483
+ "f":0.9257641921
484
  },
485
  "acl:relcl":{
486
+ "p":0.5697674419,
487
+ "r":0.5697674419,
488
+ "f":0.5697674419
489
  },
490
  "advcl":{
491
+ "p":0.475,
492
+ "r":0.4871794872,
493
+ "f":0.4810126582
494
  },
495
  "fixed":{
496
+ "p":0.8372093023,
497
+ "r":0.72,
498
+ "f":0.7741935484
499
  },
500
  "dep":{
501
+ "p":0.2388059701,
502
+ "r":0.5517241379,
503
+ "f":0.3333333333
504
  },
505
  "expl:subj":{
506
+ "p":0.6857142857,
507
+ "r":0.75,
508
+ "f":0.7164179104
509
  },
510
  "expl:comp":{
511
+ "p":0.6388888889,
512
+ "r":0.7666666667,
513
+ "f":0.696969697
514
  },
515
  "expl:pass":{
516
+ "p":0.5,
517
+ "r":0.2857142857,
518
+ "f":0.3636363636
 
 
 
 
 
519
  },
520
  "ccomp":{
521
+ "p":0.7307692308,
522
+ "r":0.7450980392,
523
+ "f":0.7378640777
524
+ },
525
+ "obl:agent":{
526
+ "p":0.8717948718,
527
+ "r":0.8095238095,
528
+ "f":0.8395061728
529
  },
530
  "parataxis":{
531
+ "p":0.4285714286,
532
+ "r":0.3214285714,
533
+ "f":0.3673469388
534
  },
535
  "iobj":{
536
+ "p":0.75,
537
+ "r":0.6,
538
+ "f":0.6666666667
539
  },
540
  "nsubj:caus":{
541
  "p":0.0,
 
558
  "f":0.0
559
  },
560
  "vocative":{
561
+ "p":0.8333333333,
562
  "r":0.625,
563
+ "f":0.7142857143
564
  },
565
  "dislocated":{
566
  "p":0.0,
 
568
  "f":0.0
569
  },
570
  "flat:foreign":{
571
+ "p":0.0,
572
+ "r":0.0,
573
+ "f":0.0
574
  },
575
  "orphan":{
576
  "p":0.0,
 
588
  "f":0.0
589
  }
590
  },
591
+ "tag_acc":0.933216531,
592
+ "lemma_acc":0.9084463625,
593
+ "ents_p":0.8148438757,
594
+ "ents_r":0.8106360834,
595
+ "ents_f":0.8127345333,
596
  "ents_per_type":{
 
 
 
 
 
597
  "PER":{
598
+ "p":0.8671110481,
599
+ "r":0.8761195099,
600
+ "f":0.8715920026
601
+ },
602
+ "LOC":{
603
+ "p":0.8253083637,
604
+ "r":0.8424663264,
605
+ "f":0.8337990851
606
  },
607
  "ORG":{
608
+ "p":0.7624443545,
609
+ "r":0.7190839695,
610
+ "f":0.7401296405
611
  },
612
  "MISC":{
613
+ "p":0.6976186671,
614
+ "r":0.6363901443,
615
+ "f":0.6655992681
616
  }
617
  },
618
+ "speed":4063.2423336374
619
  },
620
  "sources":[
621
  {
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:806f02132eb3b11950cbf3639226b162f85e4a2171cd24d92f49b474e9ee617a
3
  size 76873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b107090256b81828760423ae7cdff7cef8d7f2aabca750ca52c903a0c8cfc3
3
  size 76873
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac9bc4c8202e30bf81dc9f41f65f2cbabdfc9f6ad9f5df0b7f04d5660f599555
3
- size 6270202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0436bc30aba4abc3d00eadc7906c2fcf6dbeaa23ffbfb3ff4242b2cc83e15539
3
+ size 6140040
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41755be5b0060b60302900a469c6121a19acfa15a832cfe9c2cf81cd2a1c5fea
3
  size 304828
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a5f61d6c7eee53601a1beb27b2c66bf994c6da4044be6e0cec24539cf65ab4
3
  size 304828
senter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d7ce6a8f4cad0a3b9a5f950e826fe8765b70f8750cce21a79546119075ac652
3
  size 197089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85402d9a62f28e4fda385d3e25a8f8bb0c94e6face773e7a3110f2d488ce6c1b
3
  size 197089
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42e67a30c10ea413262981f099c5f8d5789c6e5aa5aa7a9387552afd1534178
3
- size 6139229
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16f8372daec050731a15a34630ba399e60b9b2e2ed9fc8a571a4ef207e172c90
3
+ size 6269370
tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d399a4e4c03d3ff8f9f14701678b318876e8f6799c64e01f4d342fa86cf6ac3
3
- size 1515364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c5994780fdf69912547eff13b24e61764efefae670274e56112354430afe0f4
3
+ size 1515440