adrianeboyd commited on
Commit
a012f8c
1 Parent(s): 545dda3

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,62 +14,62 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.8336070499
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8330731356
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8333400072
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9432561975
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
- value: 0.9720146369
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
- value: 0.9609157471
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
- value: 0.9076243321
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
- value: 0.8998842593
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
- value: 0.8596552123
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
- value: 0.9097472924
73
  ---
74
  ### Details: https://spacy.io/models/fr#fr_core_news_md
75
 
@@ -78,8 +78,8 @@ French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `fr_core_news_md` |
81
- | **Version** | `3.4.0` |
82
- | **spaCy** | `>=3.4.0,<3.5.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 500000 keys, 20000 unique vectors (300 dimensions) |
@@ -105,22 +105,22 @@ French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
105
 
106
  | Type | Score |
107
  | --- | --- |
108
- | `TOKEN_ACC` | 99.90 |
109
  | `TOKEN_P` | 98.44 |
110
  | `TOKEN_R` | 98.96 |
111
  | `TOKEN_F` | 98.70 |
112
- | `POS_ACC` | 97.20 |
113
- | `MORPH_ACC` | 96.09 |
114
- | `MORPH_MICRO_P` | 98.55 |
115
- | `MORPH_MICRO_R` | 97.49 |
116
- | `MORPH_MICRO_F` | 98.02 |
117
- | `SENTS_P` | 90.21 |
118
- | `SENTS_R` | 91.75 |
119
- | `SENTS_F` | 90.97 |
120
- | `DEP_UAS` | 89.99 |
121
- | `DEP_LAS` | 85.97 |
122
- | `TAG_ACC` | 94.33 |
123
- | `LEMMA_ACC` | 90.76 |
124
- | `ENTS_P` | 83.36 |
125
- | `ENTS_R` | 83.31 |
126
- | `ENTS_F` | 83.33 |
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.8317031703
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.8322525119
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8319777504
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.9450741962
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
+ value: 0.9737221764
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
+ value: 0.9648987166
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
+ value: 0.9134812988
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
+ value: 0.8947398877
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
+ value: 0.8562998959
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
+ value: 0.8851269649
73
  ---
74
  ### Details: https://spacy.io/models/fr#fr_core_news_md
75
 
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `fr_core_news_md` |
81
+ | **Version** | `3.5.0` |
82
+ | **spaCy** | `>=3.5.0,<3.6.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 500000 keys, 20000 unique vectors (300 dimensions) |
105
 
106
  | Type | Score |
107
  | --- | --- |
108
+ | `TOKEN_ACC` | 99.80 |
109
  | `TOKEN_P` | 98.44 |
110
  | `TOKEN_R` | 98.96 |
111
  | `TOKEN_F` | 98.70 |
112
+ | `POS_ACC` | 97.37 |
113
+ | `MORPH_ACC` | 96.49 |
114
+ | `MORPH_MICRO_P` | 98.68 |
115
+ | `MORPH_MICRO_R` | 97.98 |
116
+ | `MORPH_MICRO_F` | 98.33 |
117
+ | `SENTS_P` | 88.19 |
118
+ | `SENTS_R` | 89.46 |
119
+ | `SENTS_F` | 88.51 |
120
+ | `DEP_UAS` | 89.47 |
121
+ | `DEP_LAS` | 85.63 |
122
+ | `TAG_ACC` | 94.51 |
123
+ | `LEMMA_ACC` | 91.35 |
124
+ | `ENTS_P` | 83.17 |
125
+ | `ENTS_R` | 83.23 |
126
+ | `ENTS_F` | 83.20 |
accuracy.json CHANGED
@@ -1,68 +1,68 @@
1
  {
2
- "token_acc": 0.9989751998,
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
- "pos_acc": 0.9720146369,
7
- "morph_acc": 0.9609157471,
8
- "morph_micro_p": 0.9854661904,
9
- "morph_micro_r": 0.9748994761,
10
- "morph_micro_f": 0.980154355,
11
  "morph_per_feat": {
12
  "Definite": {
13
- "p": 0.989010989,
14
- "r": 0.9854014599,
15
- "f": 0.987202925
16
  },
17
  "Number": {
18
- "p": 0.9931239547,
19
- "r": 0.9837997054,
20
- "f": 0.9884398409
21
  },
22
  "PronType": {
23
- "p": 0.9961265332,
24
- "r": 0.9872040947,
25
- "f": 0.9916452442
26
  },
27
  "Gender": {
28
- "p": 0.9808884298,
29
- "r": 0.9706107846,
30
- "f": 0.9757225434
31
  },
32
  "Mood": {
33
- "p": 0.9654545455,
34
- "r": 0.9431616341,
35
- "f": 0.9541778976
36
  },
37
  "Person": {
38
- "p": 0.9858611825,
39
- "r": 0.9647798742,
40
- "f": 0.9752066116
41
  },
42
  "Tense": {
43
- "p": 0.9637681159,
44
- "r": 0.9509703779,
45
- "f": 0.9573264781
46
  },
47
  "VerbForm": {
48
- "p": 0.9765886288,
49
- "r": 0.9668874172,
50
- "f": 0.9717138103
51
  },
52
  "NumType": {
53
  "p": 1.0,
54
- "r": 0.95221843,
55
- "f": 0.9755244755
56
  },
57
  "Reflex": {
58
- "p": 1.0,
59
  "r": 1.0,
60
- "f": 1.0
61
  },
62
  "Voice": {
63
- "p": 0.905982906,
64
- "r": 0.9464285714,
65
- "f": 0.9257641921
66
  },
67
  "Poss": {
68
  "p": 1.0,
@@ -70,181 +70,181 @@
70
  "f": 1.0
71
  },
72
  "Polarity": {
73
- "p": 1.0,
74
  "r": 0.9882352941,
75
- "f": 0.9940828402
76
  }
77
  },
78
- "sents_p": 0.9021479714,
79
- "sents_r": 0.9174757282,
80
- "sents_f": 0.9097472924,
81
- "dep_uas": 0.8998842593,
82
- "dep_las": 0.8596552123,
83
  "dep_las_per_type": {
84
  "det": {
85
- "p": 0.9813463098,
86
- "r": 0.9765940274,
87
- "f": 0.9789644013
88
  },
89
  "nsubj": {
90
- "p": 0.8962962963,
91
  "r": 0.8746987952,
92
- "f": 0.8853658537
93
  },
94
  "aux:tense": {
95
- "p": 0.9596774194,
96
  "r": 0.952,
97
- "f": 0.9558232932
98
  },
99
  "root": {
100
- "p": 0.8875598086,
101
- "r": 0.9004854369,
102
- "f": 0.8939759036
103
  },
104
  "obj": {
105
- "p": 0.8618618619,
106
- "r": 0.8516320475,
107
- "f": 0.8567164179
108
  },
109
  "cc": {
110
- "p": 0.8894009217,
111
- "r": 0.8894009217,
112
- "f": 0.8894009217
113
  },
114
  "case": {
115
- "p": 0.9682860999,
116
- "r": 0.977520436,
117
- "f": 0.9728813559
118
  },
119
  "obl:mod": {
120
- "p": 0.6645768025,
121
- "r": 0.6328358209,
122
- "f": 0.6483180428
123
  },
124
  "nmod": {
125
- "p": 0.8249027237,
126
- "r": 0.8471528472,
127
- "f": 0.8358797437
128
  },
129
  "conj": {
130
- "p": 0.5983606557,
131
- "r": 0.5748031496,
132
- "f": 0.5863453815
133
  },
134
  "nummod": {
135
- "p": 0.9108280255,
136
- "r": 0.8461538462,
137
- "f": 0.8773006135
138
  },
139
  "amod": {
140
- "p": 0.9257884972,
141
  "r": 0.9089253188,
142
- "f": 0.9172794118
143
  },
144
  "acl": {
145
- "p": 0.6971428571,
146
- "r": 0.7052023121,
147
- "f": 0.7011494253
148
  },
149
  "mark": {
150
- "p": 0.872246696,
151
- "r": 0.872246696,
152
- "f": 0.872246696
153
  },
154
  "xcomp": {
155
- "p": 0.8943661972,
156
- "r": 0.8410596026,
157
- "f": 0.866894198
158
  },
159
  "flat:name": {
160
- "p": 0.9285714286,
161
- "r": 0.8666666667,
162
- "f": 0.8965517241
163
  },
164
  "cop": {
165
- "p": 0.8876404494,
166
  "r": 0.8777777778,
167
- "f": 0.8826815642
168
  },
169
  "advmod": {
170
- "p": 0.8307210031,
171
- "r": 0.8307210031,
172
- "f": 0.8307210031
173
  },
174
  "obl:arg": {
175
- "p": 0.6767241379,
176
- "r": 0.7136363636,
177
- "f": 0.6946902655
178
  },
179
  "appos": {
180
- "p": 0.5657894737,
181
- "r": 0.5180722892,
182
- "f": 0.5408805031
183
  },
184
  "nsubj:pass": {
185
- "p": 0.8720930233,
186
- "r": 0.8823529412,
187
- "f": 0.8771929825
188
  },
189
  "aux:pass": {
190
- "p": 0.9310344828,
191
  "r": 0.9642857143,
192
- "f": 0.9473684211
193
  },
194
  "acl:relcl": {
195
- "p": 0.7108433735,
196
- "r": 0.6860465116,
197
- "f": 0.6982248521
198
  },
199
  "advcl": {
200
- "p": 0.488372093,
201
- "r": 0.5384615385,
202
- "f": 0.512195122
203
  },
204
  "fixed": {
205
- "p": 0.7425742574,
206
- "r": 0.75,
207
- "f": 0.7462686567
208
  },
209
  "dep": {
210
- "p": 0.1971830986,
211
  "r": 0.4827586207,
212
- "f": 0.28
213
  },
214
  "expl:subj": {
215
- "p": 0.7575757576,
216
- "r": 0.78125,
217
- "f": 0.7692307692
218
  },
219
  "expl:comp": {
220
- "p": 0.625,
221
- "r": 0.8333333333,
222
- "f": 0.7142857143
223
  },
224
  "expl:pass": {
225
- "p": 0.6666666667,
226
  "r": 0.2857142857,
227
- "f": 0.4
228
  },
229
  "ccomp": {
230
- "p": 0.7254901961,
231
- "r": 0.7254901961,
232
- "f": 0.7254901961
233
  },
234
  "parataxis": {
235
- "p": 0.45,
236
- "r": 0.3214285714,
237
- "f": 0.375
238
  },
239
  "iobj": {
240
- "p": 0.6666666667,
241
- "r": 0.56,
242
- "f": 0.6086956522
243
  },
244
  "obl:agent": {
245
- "p": 0.8333333333,
246
- "r": 0.7142857143,
247
- "f": 0.7692307692
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
@@ -267,9 +267,9 @@
267
  "f": 0.0
268
  },
269
  "vocative": {
270
- "p": 0.8333333333,
271
  "r": 0.625,
272
- "f": 0.7142857143
273
  },
274
  "dislocated": {
275
  "p": 0.0,
@@ -277,9 +277,9 @@
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
- "p": 0.5,
281
  "r": 0.2857142857,
282
- "f": 0.3636363636
283
  },
284
  "orphan": {
285
  "p": 0.0,
@@ -297,32 +297,32 @@
297
  "f": 0.0
298
  }
299
  },
300
- "tag_acc": 0.9432561975,
301
- "lemma_acc": 0.9076243321,
302
- "ents_p": 0.8336070499,
303
- "ents_r": 0.8330731356,
304
- "ents_f": 0.8333400072,
305
  "ents_per_type": {
306
  "PER": {
307
- "p": 0.8969594595,
308
- "r": 0.9130902056,
309
- "f": 0.9049529558
310
  },
311
  "LOC": {
312
- "p": 0.8416312027,
313
- "r": 0.8520873421,
314
- "f": 0.846826997
315
  },
316
  "ORG": {
317
- "p": 0.7680062124,
318
- "r": 0.7549618321,
319
- "f": 0.761428159
320
  },
321
  "MISC": {
322
- "p": 0.7148241206,
323
- "r": 0.6636535938,
324
- "f": 0.6882891056
325
  }
326
  },
327
- "speed": 4200.9996712213
328
  }
1
  {
2
+ "token_acc": 0.997952498,
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
+ "pos_acc": 0.9737221764,
7
+ "morph_acc": 0.9648987166,
8
+ "morph_micro_p": 0.9868073879,
9
+ "morph_micro_r": 0.9797733642,
10
+ "morph_micro_f": 0.9832777965,
11
  "morph_per_feat": {
12
  "Definite": {
13
+ "p": 0.9890430972,
14
+ "r": 0.9883211679,
15
+ "f": 0.9886820007
16
  },
17
  "Number": {
18
+ "p": 0.9927751019,
19
+ "r": 0.9865611193,
20
+ "f": 0.9896583564
21
  },
22
  "PronType": {
23
+ "p": 0.9954954955,
24
+ "r": 0.9897632758,
25
+ "f": 0.99262111
26
  },
27
  "Gender": {
28
+ "p": 0.9822164948,
29
+ "r": 0.9739330437,
30
+ "f": 0.9780572308
31
  },
32
  "Mood": {
33
+ "p": 0.9712230216,
34
+ "r": 0.9591474245,
35
+ "f": 0.9651474531
36
  },
37
  "Person": {
38
+ "p": 0.9897959184,
39
+ "r": 0.9761006289,
40
+ "f": 0.98290057
41
  },
42
  "Tense": {
43
+ "p": 0.9691358025,
44
+ "r": 0.962206333,
45
+ "f": 0.9656586366
46
  },
47
  "VerbForm": {
48
+ "p": 0.9849498328,
49
+ "r": 0.9751655629,
50
+ "f": 0.9800332779
51
  },
52
  "NumType": {
53
  "p": 1.0,
54
+ "r": 0.9658703072,
55
+ "f": 0.9826388889
56
  },
57
  "Reflex": {
58
+ "p": 0.9777777778,
59
  "r": 1.0,
60
+ "f": 0.9887640449
61
  },
62
  "Voice": {
63
+ "p": 0.906779661,
64
+ "r": 0.9553571429,
65
+ "f": 0.9304347826
66
  },
67
  "Poss": {
68
  "p": 1.0,
70
  "f": 1.0
71
  },
72
  "Polarity": {
73
+ "p": 0.9882352941,
74
  "r": 0.9882352941,
75
+ "f": 0.9882352941
76
  }
77
  },
78
+ "sents_p": 0.8819277108,
79
+ "sents_r": 0.8946497306,
80
+ "sents_f": 0.8851269649,
81
+ "dep_uas": 0.8947398877,
82
+ "dep_las": 0.8562998959,
83
  "dep_las_per_type": {
84
  "det": {
85
+ "p": 0.9790153349,
86
+ "r": 0.9790153349,
87
+ "f": 0.9790153349
88
  },
89
  "nsubj": {
90
+ "p": 0.8663484487,
91
  "r": 0.8746987952,
92
+ "f": 0.8705035971
93
  },
94
  "aux:tense": {
95
+ "p": 0.952,
96
  "r": 0.952,
97
+ "f": 0.952
98
  },
99
  "root": {
100
+ "p": 0.8671497585,
101
+ "r": 0.8713592233,
102
+ "f": 0.8692493947
103
  },
104
  "obj": {
105
+ "p": 0.8450292398,
106
+ "r": 0.8575667656,
107
+ "f": 0.8512518409
108
  },
109
  "cc": {
110
+ "p": 0.8858447489,
111
+ "r": 0.8940092166,
112
+ "f": 0.8899082569
113
  },
114
  "case": {
115
+ "p": 0.9687712152,
116
+ "r": 0.9720708447,
117
+ "f": 0.9704182251
118
  },
119
  "obl:mod": {
120
+ "p": 0.6698412698,
121
+ "r": 0.6298507463,
122
+ "f": 0.6492307692
123
  },
124
  "nmod": {
125
+ "p": 0.812319538,
126
+ "r": 0.8431568432,
127
+ "f": 0.8274509804
128
  },
129
  "conj": {
130
+ "p": 0.5533596838,
131
+ "r": 0.5511811024,
132
+ "f": 0.5522682446
133
  },
134
  "nummod": {
135
+ "p": 0.9141104294,
136
+ "r": 0.8816568047,
137
+ "f": 0.8975903614
138
  },
139
  "amod": {
140
+ "p": 0.9189686924,
141
  "r": 0.9089253188,
142
+ "f": 0.9139194139
143
  },
144
  "acl": {
145
+ "p": 0.7100591716,
146
+ "r": 0.6936416185,
147
+ "f": 0.701754386
148
  },
149
  "mark": {
150
+ "p": 0.8883928571,
151
+ "r": 0.8766519824,
152
+ "f": 0.8824833703
153
  },
154
  "xcomp": {
155
+ "p": 0.8620689655,
156
+ "r": 0.8278145695,
157
+ "f": 0.8445945946
158
  },
159
  "flat:name": {
160
+ "p": 0.8952380952,
161
+ "r": 0.8952380952,
162
+ "f": 0.8952380952
163
  },
164
  "cop": {
165
+ "p": 0.8681318681,
166
  "r": 0.8777777778,
167
+ "f": 0.8729281768
168
  },
169
  "advmod": {
170
+ "p": 0.8553054662,
171
+ "r": 0.8338557994,
172
+ "f": 0.8444444444
173
  },
174
  "obl:arg": {
175
+ "p": 0.6772727273,
176
+ "r": 0.6772727273,
177
+ "f": 0.6772727273
178
  },
179
  "appos": {
180
+ "p": 0.5227272727,
181
+ "r": 0.5542168675,
182
+ "f": 0.5380116959
183
  },
184
  "nsubj:pass": {
185
+ "p": 0.8604651163,
186
+ "r": 0.8705882353,
187
+ "f": 0.865497076
188
  },
189
  "aux:pass": {
190
+ "p": 0.9557522124,
191
  "r": 0.9642857143,
192
+ "f": 0.96
193
  },
194
  "acl:relcl": {
195
+ "p": 0.6329113924,
196
+ "r": 0.5813953488,
197
+ "f": 0.6060606061
198
  },
199
  "advcl": {
200
+ "p": 0.4756097561,
201
+ "r": 0.5,
202
+ "f": 0.4875
203
  },
204
  "fixed": {
205
+ "p": 0.8202247191,
206
+ "r": 0.73,
207
+ "f": 0.7724867725
208
  },
209
  "dep": {
210
+ "p": 0.25,
211
  "r": 0.4827586207,
212
+ "f": 0.3294117647
213
  },
214
  "expl:subj": {
215
+ "p": 0.7647058824,
216
+ "r": 0.8125,
217
+ "f": 0.7878787879
218
  },
219
  "expl:comp": {
220
+ "p": 0.6666666667,
221
+ "r": 0.9333333333,
222
+ "f": 0.7777777778
223
  },
224
  "expl:pass": {
225
+ "p": 0.4,
226
  "r": 0.2857142857,
227
+ "f": 0.3333333333
228
  },
229
  "ccomp": {
230
+ "p": 0.7037037037,
231
+ "r": 0.7450980392,
232
+ "f": 0.7238095238
233
  },
234
  "parataxis": {
235
+ "p": 0.6875,
236
+ "r": 0.3928571429,
237
+ "f": 0.5
238
  },
239
  "iobj": {
240
+ "p": 0.7857142857,
241
+ "r": 0.44,
242
+ "f": 0.5641025641
243
  },
244
  "obl:agent": {
245
+ "p": 0.8974358974,
246
+ "r": 0.8333333333,
247
+ "f": 0.8641975309
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
267
  "f": 0.0
268
  },
269
  "vocative": {
270
+ "p": 1.0,
271
  "r": 0.625,
272
+ "f": 0.7692307692
273
  },
274
  "dislocated": {
275
  "p": 0.0,
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
+ "p": 1.0,
281
  "r": 0.2857142857,
282
+ "f": 0.4444444444
283
  },
284
  "orphan": {
285
  "p": 0.0,
297
  "f": 0.0
298
  }
299
  },
300
+ "tag_acc": 0.9450741962,
301
+ "lemma_acc": 0.9134812988,
302
+ "ents_p": 0.8317031703,
303
+ "ents_r": 0.8322525119,
304
+ "ents_f": 0.8319777504,
305
  "ents_per_type": {
306
  "PER": {
307
+ "p": 0.8970691842,
308
+ "r": 0.9123020706,
309
+ "f": 0.9046215055
310
  },
311
  "LOC": {
312
+ "p": 0.8359820952,
313
+ "r": 0.8515435456,
314
+ "f": 0.8436910707
315
  },
316
  "ORG": {
317
+ "p": 0.767491717,
318
+ "r": 0.7515267176,
319
+ "f": 0.7594253206
320
  },
321
  "MISC": {
322
+ "p": 0.7205253996,
323
+ "r": 0.6637993877,
324
+ "f": 0.6910001518
325
  }
326
  },
327
+ "speed": 4167.4553797123
328
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
config.cfg CHANGED
@@ -70,8 +70,8 @@ nO = null
70
  [components.ner.model.tok2vec.embed]
71
  @architectures = "spacy.MultiHashEmbed.v2"
72
  width = 96
73
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
74
- rows = [5000,1000,2500,2500,50]
75
  include_static_vectors = true
76
 
77
  [components.ner.model.tok2vec.encode]
@@ -139,8 +139,8 @@ factory = "tok2vec"
139
  [components.tok2vec.model.embed]
140
  @architectures = "spacy.MultiHashEmbed.v2"
141
  width = ${components.tok2vec.model.encode:width}
142
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
143
- rows = [5000,1000,2500,2500,50]
144
  include_static_vectors = true
145
 
146
  [components.tok2vec.model.encode]
@@ -182,6 +182,7 @@ eval_frequency = 1000
182
  frozen_components = []
183
  before_to_disk = null
184
  annotating_components = []
 
185
 
186
  [training.batcher]
187
  @batchers = "spacy.batch_by_words.v1"
70
  [components.ner.model.tok2vec.embed]
71
  @architectures = "spacy.MultiHashEmbed.v2"
72
  width = 96
73
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
74
+ rows = [5000,1000,2500,2500]
75
  include_static_vectors = true
76
 
77
  [components.ner.model.tok2vec.encode]
139
  [components.tok2vec.model.embed]
140
  @architectures = "spacy.MultiHashEmbed.v2"
141
  width = ${components.tok2vec.model.encode:width}
142
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
143
+ rows = [5000,1000,2500,2500,50,50]
144
  include_static_vectors = true
145
 
146
  [components.tok2vec.model.encode]
182
  frozen_components = []
183
  before_to_disk = null
184
  annotating_components = []
185
+ before_update = null
186
 
187
  [training.batcher]
188
  @batchers = "spacy.batch_by_words.v1"
fr_core_news_md-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7af020a5be75d7537ded4043390a7082a60cf51ec5177271eac940814215c6a5
3
- size 45833710
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc4fe173a387407e6a830914e483d9de12938cc648befca35fa016903a5b2e3d
3
+ size 45830715
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"fr",
3
  "name":"core_news_md",
4
- "version":"3.4.0",
5
  "description":"French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
- "spacy_version":">=3.4.0,<3.5.0",
11
- "spacy_git_version":"dd038b536",
12
  "vectors":{
13
  "width":300,
14
  "vectors":20000,
@@ -290,70 +290,70 @@
290
  "senter"
291
  ],
292
  "performance":{
293
- "token_acc":0.9989751998,
294
  "token_p":0.9844389844,
295
  "token_r":0.9896058454,
296
  "token_f":0.9870156531,
297
- "pos_acc":0.9720146369,
298
- "morph_acc":0.9609157471,
299
- "morph_micro_p":0.9854661904,
300
- "morph_micro_r":0.9748994761,
301
- "morph_micro_f":0.980154355,
302
  "morph_per_feat":{
303
  "Definite":{
304
- "p":0.989010989,
305
- "r":0.9854014599,
306
- "f":0.987202925
307
  },
308
  "Number":{
309
- "p":0.9931239547,
310
- "r":0.9837997054,
311
- "f":0.9884398409
312
  },
313
  "PronType":{
314
- "p":0.9961265332,
315
- "r":0.9872040947,
316
- "f":0.9916452442
317
  },
318
  "Gender":{
319
- "p":0.9808884298,
320
- "r":0.9706107846,
321
- "f":0.9757225434
322
  },
323
  "Mood":{
324
- "p":0.9654545455,
325
- "r":0.9431616341,
326
- "f":0.9541778976
327
  },
328
  "Person":{
329
- "p":0.9858611825,
330
- "r":0.9647798742,
331
- "f":0.9752066116
332
  },
333
  "Tense":{
334
- "p":0.9637681159,
335
- "r":0.9509703779,
336
- "f":0.9573264781
337
  },
338
  "VerbForm":{
339
- "p":0.9765886288,
340
- "r":0.9668874172,
341
- "f":0.9717138103
342
  },
343
  "NumType":{
344
  "p":1.0,
345
- "r":0.95221843,
346
- "f":0.9755244755
347
  },
348
  "Reflex":{
349
- "p":1.0,
350
  "r":1.0,
351
- "f":1.0
352
  },
353
  "Voice":{
354
- "p":0.905982906,
355
- "r":0.9464285714,
356
- "f":0.9257641921
357
  },
358
  "Poss":{
359
  "p":1.0,
@@ -361,181 +361,181 @@
361
  "f":1.0
362
  },
363
  "Polarity":{
364
- "p":1.0,
365
  "r":0.9882352941,
366
- "f":0.9940828402
367
  }
368
  },
369
- "sents_p":0.9021479714,
370
- "sents_r":0.9174757282,
371
- "sents_f":0.9097472924,
372
- "dep_uas":0.8998842593,
373
- "dep_las":0.8596552123,
374
  "dep_las_per_type":{
375
  "det":{
376
- "p":0.9813463098,
377
- "r":0.9765940274,
378
- "f":0.9789644013
379
  },
380
  "nsubj":{
381
- "p":0.8962962963,
382
  "r":0.8746987952,
383
- "f":0.8853658537
384
  },
385
  "aux:tense":{
386
- "p":0.9596774194,
387
  "r":0.952,
388
- "f":0.9558232932
389
  },
390
  "root":{
391
- "p":0.8875598086,
392
- "r":0.9004854369,
393
- "f":0.8939759036
394
  },
395
  "obj":{
396
- "p":0.8618618619,
397
- "r":0.8516320475,
398
- "f":0.8567164179
399
  },
400
  "cc":{
401
- "p":0.8894009217,
402
- "r":0.8894009217,
403
- "f":0.8894009217
404
  },
405
  "case":{
406
- "p":0.9682860999,
407
- "r":0.977520436,
408
- "f":0.9728813559
409
  },
410
  "obl:mod":{
411
- "p":0.6645768025,
412
- "r":0.6328358209,
413
- "f":0.6483180428
414
  },
415
  "nmod":{
416
- "p":0.8249027237,
417
- "r":0.8471528472,
418
- "f":0.8358797437
419
  },
420
  "conj":{
421
- "p":0.5983606557,
422
- "r":0.5748031496,
423
- "f":0.5863453815
424
  },
425
  "nummod":{
426
- "p":0.9108280255,
427
- "r":0.8461538462,
428
- "f":0.8773006135
429
  },
430
  "amod":{
431
- "p":0.9257884972,
432
  "r":0.9089253188,
433
- "f":0.9172794118
434
  },
435
  "acl":{
436
- "p":0.6971428571,
437
- "r":0.7052023121,
438
- "f":0.7011494253
439
  },
440
  "mark":{
441
- "p":0.872246696,
442
- "r":0.872246696,
443
- "f":0.872246696
444
  },
445
  "xcomp":{
446
- "p":0.8943661972,
447
- "r":0.8410596026,
448
- "f":0.866894198
449
  },
450
  "flat:name":{
451
- "p":0.9285714286,
452
- "r":0.8666666667,
453
- "f":0.8965517241
454
  },
455
  "cop":{
456
- "p":0.8876404494,
457
  "r":0.8777777778,
458
- "f":0.8826815642
459
  },
460
  "advmod":{
461
- "p":0.8307210031,
462
- "r":0.8307210031,
463
- "f":0.8307210031
464
  },
465
  "obl:arg":{
466
- "p":0.6767241379,
467
- "r":0.7136363636,
468
- "f":0.6946902655
469
  },
470
  "appos":{
471
- "p":0.5657894737,
472
- "r":0.5180722892,
473
- "f":0.5408805031
474
  },
475
  "nsubj:pass":{
476
- "p":0.8720930233,
477
- "r":0.8823529412,
478
- "f":0.8771929825
479
  },
480
  "aux:pass":{
481
- "p":0.9310344828,
482
  "r":0.9642857143,
483
- "f":0.9473684211
484
  },
485
  "acl:relcl":{
486
- "p":0.7108433735,
487
- "r":0.6860465116,
488
- "f":0.6982248521
489
  },
490
  "advcl":{
491
- "p":0.488372093,
492
- "r":0.5384615385,
493
- "f":0.512195122
494
  },
495
  "fixed":{
496
- "p":0.7425742574,
497
- "r":0.75,
498
- "f":0.7462686567
499
  },
500
  "dep":{
501
- "p":0.1971830986,
502
  "r":0.4827586207,
503
- "f":0.28
504
  },
505
  "expl:subj":{
506
- "p":0.7575757576,
507
- "r":0.78125,
508
- "f":0.7692307692
509
  },
510
  "expl:comp":{
511
- "p":0.625,
512
- "r":0.8333333333,
513
- "f":0.7142857143
514
  },
515
  "expl:pass":{
516
- "p":0.6666666667,
517
  "r":0.2857142857,
518
- "f":0.4
519
  },
520
  "ccomp":{
521
- "p":0.7254901961,
522
- "r":0.7254901961,
523
- "f":0.7254901961
524
  },
525
  "parataxis":{
526
- "p":0.45,
527
- "r":0.3214285714,
528
- "f":0.375
529
  },
530
  "iobj":{
531
- "p":0.6666666667,
532
- "r":0.56,
533
- "f":0.6086956522
534
  },
535
  "obl:agent":{
536
- "p":0.8333333333,
537
- "r":0.7142857143,
538
- "f":0.7692307692
539
  },
540
  "nsubj:caus":{
541
  "p":0.0,
@@ -558,9 +558,9 @@
558
  "f":0.0
559
  },
560
  "vocative":{
561
- "p":0.8333333333,
562
  "r":0.625,
563
- "f":0.7142857143
564
  },
565
  "dislocated":{
566
  "p":0.0,
@@ -568,9 +568,9 @@
568
  "f":0.0
569
  },
570
  "flat:foreign":{
571
- "p":0.5,
572
  "r":0.2857142857,
573
- "f":0.3636363636
574
  },
575
  "orphan":{
576
  "p":0.0,
@@ -588,34 +588,34 @@
588
  "f":0.0
589
  }
590
  },
591
- "tag_acc":0.9432561975,
592
- "lemma_acc":0.9076243321,
593
- "ents_p":0.8336070499,
594
- "ents_r":0.8330731356,
595
- "ents_f":0.8333400072,
596
  "ents_per_type":{
597
  "PER":{
598
- "p":0.8969594595,
599
- "r":0.9130902056,
600
- "f":0.9049529558
601
  },
602
  "LOC":{
603
- "p":0.8416312027,
604
- "r":0.8520873421,
605
- "f":0.846826997
606
  },
607
  "ORG":{
608
- "p":0.7680062124,
609
- "r":0.7549618321,
610
- "f":0.761428159
611
  },
612
  "MISC":{
613
- "p":0.7148241206,
614
- "r":0.6636535938,
615
- "f":0.6882891056
616
  }
617
  },
618
- "speed":4200.9996712213
619
  },
620
  "sources":[
621
  {
1
  {
2
  "lang":"fr",
3
  "name":"core_news_md",
4
+ "version":"3.5.0",
5
  "description":"French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
+ "spacy_version":">=3.5.0,<3.6.0",
11
+ "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":300,
14
  "vectors":20000,
290
  "senter"
291
  ],
292
  "performance":{
293
+ "token_acc":0.997952498,
294
  "token_p":0.9844389844,
295
  "token_r":0.9896058454,
296
  "token_f":0.9870156531,
297
+ "pos_acc":0.9737221764,
298
+ "morph_acc":0.9648987166,
299
+ "morph_micro_p":0.9868073879,
300
+ "morph_micro_r":0.9797733642,
301
+ "morph_micro_f":0.9832777965,
302
  "morph_per_feat":{
303
  "Definite":{
304
+ "p":0.9890430972,
305
+ "r":0.9883211679,
306
+ "f":0.9886820007
307
  },
308
  "Number":{
309
+ "p":0.9927751019,
310
+ "r":0.9865611193,
311
+ "f":0.9896583564
312
  },
313
  "PronType":{
314
+ "p":0.9954954955,
315
+ "r":0.9897632758,
316
+ "f":0.99262111
317
  },
318
  "Gender":{
319
+ "p":0.9822164948,
320
+ "r":0.9739330437,
321
+ "f":0.9780572308
322
  },
323
  "Mood":{
324
+ "p":0.9712230216,
325
+ "r":0.9591474245,
326
+ "f":0.9651474531
327
  },
328
  "Person":{
329
+ "p":0.9897959184,
330
+ "r":0.9761006289,
331
+ "f":0.98290057
332
  },
333
  "Tense":{
334
+ "p":0.9691358025,
335
+ "r":0.962206333,
336
+ "f":0.9656586366
337
  },
338
  "VerbForm":{
339
+ "p":0.9849498328,
340
+ "r":0.9751655629,
341
+ "f":0.9800332779
342
  },
343
  "NumType":{
344
  "p":1.0,
345
+ "r":0.9658703072,
346
+ "f":0.9826388889
347
  },
348
  "Reflex":{
349
+ "p":0.9777777778,
350
  "r":1.0,
351
+ "f":0.9887640449
352
  },
353
  "Voice":{
354
+ "p":0.906779661,
355
+ "r":0.9553571429,
356
+ "f":0.9304347826
357
  },
358
  "Poss":{
359
  "p":1.0,
361
  "f":1.0
362
  },
363
  "Polarity":{
364
+ "p":0.9882352941,
365
  "r":0.9882352941,
366
+ "f":0.9882352941
367
  }
368
  },
369
+ "sents_p":0.8819277108,
370
+ "sents_r":0.8946497306,
371
+ "sents_f":0.8851269649,
372
+ "dep_uas":0.8947398877,
373
+ "dep_las":0.8562998959,
374
  "dep_las_per_type":{
375
  "det":{
376
+ "p":0.9790153349,
377
+ "r":0.9790153349,
378
+ "f":0.9790153349
379
  },
380
  "nsubj":{
381
+ "p":0.8663484487,
382
  "r":0.8746987952,
383
+ "f":0.8705035971
384
  },
385
  "aux:tense":{
386
+ "p":0.952,
387
  "r":0.952,
388
+ "f":0.952
389
  },
390
  "root":{
391
+ "p":0.8671497585,
392
+ "r":0.8713592233,
393
+ "f":0.8692493947
394
  },
395
  "obj":{
396
+ "p":0.8450292398,
397
+ "r":0.8575667656,
398
+ "f":0.8512518409
399
  },
400
  "cc":{
401
+ "p":0.8858447489,
402
+ "r":0.8940092166,
403
+ "f":0.8899082569
404
  },
405
  "case":{
406
+ "p":0.9687712152,
407
+ "r":0.9720708447,
408
+ "f":0.9704182251
409
  },
410
  "obl:mod":{
411
+ "p":0.6698412698,
412
+ "r":0.6298507463,
413
+ "f":0.6492307692
414
  },
415
  "nmod":{
416
+ "p":0.812319538,
417
+ "r":0.8431568432,
418
+ "f":0.8274509804
419
  },
420
  "conj":{
421
+ "p":0.5533596838,
422
+ "r":0.5511811024,
423
+ "f":0.5522682446
424
  },
425
  "nummod":{
426
+ "p":0.9141104294,
427
+ "r":0.8816568047,
428
+ "f":0.8975903614
429
  },
430
  "amod":{
431
+ "p":0.9189686924,
432
  "r":0.9089253188,
433
+ "f":0.9139194139
434
  },
435
  "acl":{
436
+ "p":0.7100591716,
437
+ "r":0.6936416185,
438
+ "f":0.701754386
439
  },
440
  "mark":{
441
+ "p":0.8883928571,
442
+ "r":0.8766519824,
443
+ "f":0.8824833703
444
  },
445
  "xcomp":{
446
+ "p":0.8620689655,
447
+ "r":0.8278145695,
448
+ "f":0.8445945946
449
  },
450
  "flat:name":{
451
+ "p":0.8952380952,
452
+ "r":0.8952380952,
453
+ "f":0.8952380952
454
  },
455
  "cop":{
456
+ "p":0.8681318681,
457
  "r":0.8777777778,
458
+ "f":0.8729281768
459
  },
460
  "advmod":{
461
+ "p":0.8553054662,
462
+ "r":0.8338557994,
463
+ "f":0.8444444444
464
  },
465
  "obl:arg":{
466
+ "p":0.6772727273,
467
+ "r":0.6772727273,
468
+ "f":0.6772727273
469
  },
470
  "appos":{
471
+ "p":0.5227272727,
472
+ "r":0.5542168675,
473
+ "f":0.5380116959
474
  },
475
  "nsubj:pass":{
476
+ "p":0.8604651163,
477
+ "r":0.8705882353,
478
+ "f":0.865497076
479
  },
480
  "aux:pass":{
481
+ "p":0.9557522124,
482
  "r":0.9642857143,
483
+ "f":0.96
484
  },
485
  "acl:relcl":{
486
+ "p":0.6329113924,
487
+ "r":0.5813953488,
488
+ "f":0.6060606061
489
  },
490
  "advcl":{
491
+ "p":0.4756097561,
492
+ "r":0.5,
493
+ "f":0.4875
494
  },
495
  "fixed":{
496
+ "p":0.8202247191,
497
+ "r":0.73,
498
+ "f":0.7724867725
499
  },
500
  "dep":{
501
+ "p":0.25,
502
  "r":0.4827586207,
503
+ "f":0.3294117647
504
  },
505
  "expl:subj":{
506
+ "p":0.7647058824,
507
+ "r":0.8125,
508
+ "f":0.7878787879
509
  },
510
  "expl:comp":{
511
+ "p":0.6666666667,
512
+ "r":0.9333333333,
513
+ "f":0.7777777778
514
  },
515
  "expl:pass":{
516
+ "p":0.4,
517
  "r":0.2857142857,
518
+ "f":0.3333333333
519
  },
520
  "ccomp":{
521
+ "p":0.7037037037,
522
+ "r":0.7450980392,
523
+ "f":0.7238095238
524
  },
525
  "parataxis":{
526
+ "p":0.6875,
527
+ "r":0.3928571429,
528
+ "f":0.5
529
  },
530
  "iobj":{
531
+ "p":0.7857142857,
532
+ "r":0.44,
533
+ "f":0.5641025641
534
  },
535
  "obl:agent":{
536
+ "p":0.8974358974,
537
+ "r":0.8333333333,
538
+ "f":0.8641975309
539
  },
540
  "nsubj:caus":{
541
  "p":0.0,
558
  "f":0.0
559
  },
560
  "vocative":{
561
+ "p":1.0,
562
  "r":0.625,
563
+ "f":0.7692307692
564
  },
565
  "dislocated":{
566
  "p":0.0,
568
  "f":0.0
569
  },
570
  "flat:foreign":{
571
+ "p":1.0,
572
  "r":0.2857142857,
573
+ "f":0.4444444444
574
  },
575
  "orphan":{
576
  "p":0.0,
588
  "f":0.0
589
  }
590
  },
591
+ "tag_acc":0.9450741962,
592
+ "lemma_acc":0.9134812988,
593
+ "ents_p":0.8317031703,
594
+ "ents_r":0.8322525119,
595
+ "ents_f":0.8319777504,
596
  "ents_per_type":{
597
  "PER":{
598
+ "p":0.8970691842,
599
+ "r":0.9123020706,
600
+ "f":0.9046215055
601
  },
602
  "LOC":{
603
+ "p":0.8359820952,
604
+ "r":0.8515435456,
605
+ "f":0.8436910707
606
  },
607
  "ORG":{
608
+ "p":0.767491717,
609
+ "r":0.7515267176,
610
+ "f":0.7594253206
611
  },
612
  "MISC":{
613
+ "p":0.7205253996,
614
+ "r":0.6637993877,
615
+ "f":0.6910001518
616
  }
617
  },
618
+ "speed":4167.4553797123
619
  },
620
  "sources":[
621
  {
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b623888817c7f06c946dfff3db4dc07150916f9f5cdc41ee7ca1ea629eac2aed
3
  size 76873
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99a5aac09fe92ef869720f50a8eb849ff8eb447bc96efb5d7180c3375f20b406
3
  size 76873
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001ed69c144ec0e00dcdf836d92860048eb08f5e87fe45dd73b93454bafdd647
3
- size 6496592
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc3d51bd1b57a4cc2812e4d202974a07800e678f916ee0e784d5b5df4c147016
3
+ size 6366382
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a09cff55600c9285f0251f9b83fd6ed667ea1cd376eaba485693839d8a0d15
3
  size 304828
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f712371f7f93ae3cb2693484bc3241f73bd9af7b59f9a04c42387a1b69fe47a
3
  size 304828
senter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1913b6e05150cd4e36490694cd24437d917dbb85e486e4a949f5d9cbc32a26c7
3
  size 219953
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc95dd3f757619b1ba0c252e77f59f5b98522fae7699c9b617a8e2ff8afd6723
3
  size 219953
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:639c4da8bd6d4af1f467a5a6b60b32f6ca3c330c163961c4d2ac70c55e43e749
3
- size 6365604
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73894f8d0b96d81b471fd094069c55c0861d18e7113c17731ba55c054c0b8f5d
3
+ size 6495793
tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d399a4e4c03d3ff8f9f14701678b318876e8f6799c64e01f4d342fa86cf6ac3
3
- size 1515364
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c5994780fdf69912547eff13b24e61764efefae670274e56112354430afe0f4
3
+ size 1515440