adrianeboyd commited on
Commit
904faac
1 Parent(s): d996fdb

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,62 +14,62 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.8411510446
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8413194027
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8412352152
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9442383014
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
- value: 0.9729952587
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
- value: 0.9678251005
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
- value: 0.9071105631
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
- value: 0.8966076184
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
- value: 0.8581018519
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
- value: 0.8963855422
73
  ---
74
  ### Details: https://spacy.io/models/fr#fr_core_news_lg
75
 
@@ -78,8 +78,8 @@ French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `fr_core_news_lg` |
81
- | **Version** | `3.4.0` |
82
- | **spaCy** | `>=3.4.0,<3.5.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 500000 keys, 500000 unique vectors (300 dimensions) |
@@ -105,22 +105,22 @@ French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
105
 
106
  | Type | Score |
107
  | --- | --- |
108
- | `TOKEN_ACC` | 99.90 |
109
  | `TOKEN_P` | 98.44 |
110
  | `TOKEN_R` | 98.96 |
111
  | `TOKEN_F` | 98.70 |
112
- | `POS_ACC` | 97.30 |
113
- | `MORPH_ACC` | 96.78 |
114
- | `MORPH_MICRO_P` | 98.89 |
115
- | `MORPH_MICRO_R` | 98.02 |
116
- | `MORPH_MICRO_F` | 98.45 |
117
- | `SENTS_P` | 89.00 |
118
- | `SENTS_R` | 90.29 |
119
- | `SENTS_F` | 89.64 |
120
- | `DEP_UAS` | 89.66 |
121
- | `DEP_LAS` | 85.81 |
122
- | `TAG_ACC` | 94.42 |
123
- | `LEMMA_ACC` | 90.71 |
124
- | `ENTS_P` | 84.12 |
125
- | `ENTS_R` | 84.13 |
126
- | `ENTS_F` | 84.12 |
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.8398572946
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.83869741
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8392769516
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.9446562919
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
+ value: 0.9734102855
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
+ value: 0.9674260386
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
+ value: 0.9135840526
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
+ value: 0.9028935185
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
+ value: 0.8654090962
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
+ value: 0.8735083532
73
  ---
74
  ### Details: https://spacy.io/models/fr#fr_core_news_lg
75
 
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `fr_core_news_lg` |
81
+ | **Version** | `3.5.0` |
82
+ | **spaCy** | `>=3.5.0,<3.6.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 500000 keys, 500000 unique vectors (300 dimensions) |
105
 
106
  | Type | Score |
107
  | --- | --- |
108
+ | `TOKEN_ACC` | 99.80 |
109
  | `TOKEN_P` | 98.44 |
110
  | `TOKEN_R` | 98.96 |
111
  | `TOKEN_F` | 98.70 |
112
+ | `POS_ACC` | 97.34 |
113
+ | `MORPH_ACC` | 96.74 |
114
+ | `MORPH_MICRO_P` | 98.91 |
115
+ | `MORPH_MICRO_R` | 98.17 |
116
+ | `MORPH_MICRO_F` | 98.54 |
117
+ | `SENTS_P` | 85.92 |
118
+ | `SENTS_R` | 89.26 |
119
+ | `SENTS_F` | 87.35 |
120
+ | `DEP_UAS` | 90.29 |
121
+ | `DEP_LAS` | 86.54 |
122
+ | `TAG_ACC` | 94.47 |
123
+ | `LEMMA_ACC` | 91.36 |
124
+ | `ENTS_P` | 83.99 |
125
+ | `ENTS_R` | 83.87 |
126
+ | `ENTS_F` | 83.93 |
accuracy.json CHANGED
@@ -1,73 +1,73 @@
1
  {
2
- "token_acc": 0.9989751998,
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
- "pos_acc": 0.9729952587,
7
- "morph_acc": 0.9678251005,
8
- "morph_micro_p": 0.9889360133,
9
- "morph_micro_r": 0.9801998294,
10
- "morph_micro_f": 0.9845485421,
11
  "morph_per_feat": {
12
  "Definite": {
13
- "p": 0.9890029326,
14
- "r": 0.9846715328,
15
- "f": 0.9868324799
16
  },
17
  "Number": {
18
- "p": 0.9955464836,
19
- "r": 0.9876656848,
20
- "f": 0.991590426
21
  },
22
  "PronType": {
23
- "p": 0.9961265332,
24
- "r": 0.9872040947,
25
- "f": 0.9916452442
26
  },
27
  "Gender": {
28
- "p": 0.9855595668,
29
- "r": 0.976744186,
30
- "f": 0.9811320755
31
  },
32
  "Mood": {
33
- "p": 0.98,
34
- "r": 0.9573712256,
35
- "f": 0.9685534591
36
  },
37
  "Person": {
38
- "p": 0.9871794872,
39
- "r": 0.9685534591,
40
- "f": 0.9777777778
41
  },
42
  "Tense": {
43
- "p": 0.97327852,
44
- "r": 0.9673135853,
45
- "f": 0.9702868852
46
  },
47
  "VerbForm": {
48
- "p": 0.9824854045,
49
- "r": 0.9751655629,
50
- "f": 0.9788117989
51
  },
52
  "NumType": {
53
  "p": 1.0,
54
- "r": 0.9692832765,
55
- "f": 0.9844020797
56
  },
57
  "Reflex": {
58
- "p": 1.0,
59
  "r": 1.0,
60
- "f": 1.0
61
  },
62
  "Voice": {
63
- "p": 0.9145299145,
64
  "r": 0.9553571429,
65
- "f": 0.9344978166
66
  },
67
  "Poss": {
68
- "p": 1.0,
69
  "r": 1.0,
70
- "f": 1.0
71
  },
72
  "Polarity": {
73
  "p": 0.9882352941,
@@ -75,36 +75,36 @@
75
  "f": 0.9882352941
76
  }
77
  },
78
- "sents_p": 0.8899521531,
79
- "sents_r": 0.9029126214,
80
- "sents_f": 0.8963855422,
81
- "dep_uas": 0.8966076184,
82
- "dep_las": 0.8581018519,
83
  "dep_las_per_type": {
84
  "det": {
85
- "p": 0.9805668016,
86
- "r": 0.9774011299,
87
- "f": 0.9789814066
88
  },
89
  "nsubj": {
90
- "p": 0.8886198547,
91
- "r": 0.8843373494,
92
- "f": 0.88647343
93
  },
94
  "aux:tense": {
95
- "p": 0.936,
96
- "r": 0.936,
97
- "f": 0.936
98
  },
99
  "root": {
100
- "p": 0.8758949881,
101
- "r": 0.890776699,
102
- "f": 0.8832731649
103
  },
104
  "obj": {
105
- "p": 0.8362573099,
106
- "r": 0.8486646884,
107
- "f": 0.8424153166
108
  },
109
  "cc": {
110
  "p": 0.8812785388,
@@ -112,139 +112,139 @@
112
  "f": 0.8853211009
113
  },
114
  "case": {
115
- "p": 0.9669365722,
116
- "r": 0.9761580381,
117
- "f": 0.9715254237
118
  },
119
  "obl:mod": {
120
- "p": 0.6948051948,
121
- "r": 0.6388059701,
122
- "f": 0.66562986
123
  },
124
  "nmod": {
125
- "p": 0.8057142857,
126
- "r": 0.8451548452,
127
- "f": 0.8249634325
128
  },
129
  "conj": {
130
- "p": 0.5555555556,
131
- "r": 0.531496063,
132
- "f": 0.5432595573
133
  },
134
  "nummod": {
135
- "p": 0.9119496855,
136
- "r": 0.8579881657,
137
- "f": 0.8841463415
138
  },
139
  "amod": {
140
- "p": 0.9539347409,
141
- "r": 0.9052823315,
142
- "f": 0.9289719626
143
  },
144
  "acl": {
145
- "p": 0.6971428571,
146
- "r": 0.7052023121,
147
- "f": 0.7011494253
148
  },
149
  "mark": {
150
- "p": 0.8839285714,
151
- "r": 0.872246696,
152
- "f": 0.8780487805
153
  },
154
  "xcomp": {
155
- "p": 0.8671328671,
156
- "r": 0.821192053,
157
- "f": 0.843537415
158
  },
159
  "flat:name": {
160
- "p": 0.9393939394,
161
- "r": 0.8857142857,
162
- "f": 0.9117647059
163
  },
164
  "cop": {
165
- "p": 0.9101123596,
166
- "r": 0.9,
167
- "f": 0.905027933
168
  },
169
  "advmod": {
170
- "p": 0.8525641026,
171
- "r": 0.8338557994,
172
- "f": 0.8431061807
173
  },
174
  "obl:arg": {
175
- "p": 0.704845815,
176
- "r": 0.7272727273,
177
- "f": 0.7158836689
178
  },
179
  "appos": {
180
- "p": 0.5056179775,
181
- "r": 0.5421686747,
182
- "f": 0.523255814
183
  },
184
  "nsubj:pass": {
185
- "p": 0.869047619,
186
- "r": 0.8588235294,
187
- "f": 0.8639053254
188
  },
189
  "aux:pass": {
190
- "p": 0.9230769231,
191
  "r": 0.9642857143,
192
- "f": 0.943231441
193
  },
194
  "acl:relcl": {
195
- "p": 0.5833333333,
196
- "r": 0.5697674419,
197
- "f": 0.5764705882
198
  },
199
  "advcl": {
200
- "p": 0.4831460674,
201
- "r": 0.5512820513,
202
- "f": 0.5149700599
203
  },
204
  "fixed": {
205
- "p": 0.8295454545,
206
- "r": 0.73,
207
- "f": 0.7765957447
208
  },
209
  "dep": {
210
- "p": 0.2372881356,
211
- "r": 0.4827586207,
212
- "f": 0.3181818182
213
  },
214
  "expl:subj": {
215
- "p": 0.8181818182,
216
- "r": 0.84375,
217
- "f": 0.8307692308
218
  },
219
  "expl:comp": {
220
- "p": 0.6097560976,
221
- "r": 0.8333333333,
222
- "f": 0.7042253521
223
  },
224
  "expl:pass": {
225
- "p": 0.25,
226
  "r": 0.1428571429,
227
- "f": 0.1818181818
228
  },
229
  "ccomp": {
230
- "p": 0.7391304348,
231
- "r": 0.6666666667,
232
- "f": 0.7010309278
233
  },
234
  "parataxis": {
235
- "p": 0.6,
236
- "r": 0.4285714286,
237
- "f": 0.5
238
  },
239
  "iobj": {
240
- "p": 0.7058823529,
241
- "r": 0.48,
242
- "f": 0.5714285714
243
  },
244
  "obl:agent": {
245
- "p": 0.8974358974,
246
- "r": 0.8333333333,
247
- "f": 0.8641975309
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
@@ -277,9 +277,9 @@
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
- "p": 1.0,
281
- "r": 0.4285714286,
282
- "f": 0.6
283
  },
284
  "orphan": {
285
  "p": 0.0,
@@ -297,32 +297,32 @@
297
  "f": 0.0
298
  }
299
  },
300
- "tag_acc": 0.9442383014,
301
- "lemma_acc": 0.9071105631,
302
- "ents_p": 0.8411510446,
303
- "ents_r": 0.8413194027,
304
- "ents_f": 0.8412352152,
305
  "ents_per_type": {
306
  "PER": {
307
- "p": 0.9102898653,
308
- "r": 0.9247689332,
309
- "f": 0.9174722775
310
  },
311
  "LOC": {
312
- "p": 0.8429506981,
313
- "r": 0.8561030704,
314
- "f": 0.849475978
315
  },
316
  "ORG": {
317
- "p": 0.7854503015,
318
- "r": 0.770610687,
319
- "f": 0.7779597341
320
  },
321
  "MISC": {
322
- "p": 0.7254040483,
323
- "r": 0.674004957,
324
- "f": 0.6987605804
325
  }
326
  },
327
- "speed": 4391.3854054034
328
  }
1
  {
2
+ "token_acc": 0.997952498,
3
  "token_p": 0.9844389844,
4
  "token_r": 0.9896058454,
5
  "token_f": 0.9870156531,
6
+ "pos_acc": 0.9734102855,
7
+ "morph_acc": 0.9674260386,
8
+ "morph_micro_p": 0.9891344383,
9
+ "morph_micro_r": 0.9816619959,
10
+ "morph_micro_f": 0.9853840509,
11
  "morph_per_feat": {
12
  "Definite": {
13
+ "p": 0.9890350877,
14
+ "r": 0.9875912409,
15
+ "f": 0.988312637
16
  },
17
  "Number": {
18
+ "p": 0.9935149157,
19
+ "r": 0.9871134021,
20
+ "f": 0.9903038138
21
  },
22
  "PronType": {
23
+ "p": 0.9954867827,
24
+ "r": 0.98784389,
25
+ "f": 0.9916506101
26
  },
27
  "Gender": {
28
+ "p": 0.9845559846,
29
+ "r": 0.9775108612,
30
+ "f": 0.9810207746
31
  },
32
  "Mood": {
33
+ "p": 0.9801444043,
34
+ "r": 0.9644760213,
35
+ "f": 0.9722470904
36
  },
37
  "Person": {
38
+ "p": 0.993622449,
39
+ "r": 0.9798742138,
40
+ "f": 0.9867004433
41
  },
42
  "Tense": {
43
+ "p": 0.9834710744,
44
+ "r": 0.9724208376,
45
+ "f": 0.9779147406
46
  },
47
  "VerbForm": {
48
+ "p": 0.9866220736,
49
+ "r": 0.9768211921,
50
+ "f": 0.9816971714
51
  },
52
  "NumType": {
53
  "p": 1.0,
54
+ "r": 0.9658703072,
55
+ "f": 0.9826388889
56
  },
57
  "Reflex": {
58
+ "p": 0.9777777778,
59
  "r": 1.0,
60
+ "f": 0.9887640449
61
  },
62
  "Voice": {
63
+ "p": 0.9224137931,
64
  "r": 0.9553571429,
65
+ "f": 0.9385964912
66
  },
67
  "Poss": {
68
+ "p": 0.9827586207,
69
  "r": 1.0,
70
+ "f": 0.9913043478
71
  },
72
  "Polarity": {
73
  "p": 0.9882352941,
75
  "f": 0.9882352941
76
  }
77
  },
78
+ "sents_p": 0.8591549296,
79
+ "sents_r": 0.8926096998,
80
+ "sents_f": 0.8735083532,
81
+ "dep_uas": 0.9028935185,
82
+ "dep_las": 0.8654090962,
83
  "dep_las_per_type": {
84
  "det": {
85
+ "p": 0.9805982215,
86
+ "r": 0.9790153349,
87
+ "f": 0.9798061389
88
  },
89
  "nsubj": {
90
+ "p": 0.8985148515,
91
+ "r": 0.8746987952,
92
+ "f": 0.8864468864
93
  },
94
  "aux:tense": {
95
+ "p": 0.96,
96
+ "r": 0.96,
97
+ "f": 0.96
98
  },
99
  "root": {
100
+ "p": 0.8700696056,
101
+ "r": 0.9101941748,
102
+ "f": 0.8896797153
103
  },
104
  "obj": {
105
+ "p": 0.8433048433,
106
+ "r": 0.8783382789,
107
+ "f": 0.8604651163
108
  },
109
  "cc": {
110
  "p": 0.8812785388,
112
  "f": 0.8853211009
113
  },
114
  "case": {
115
+ "p": 0.9695121951,
116
+ "r": 0.9747956403,
117
+ "f": 0.9721467391
118
  },
119
  "obl:mod": {
120
+ "p": 0.6866666667,
121
+ "r": 0.6149253731,
122
+ "f": 0.6488188976
123
  },
124
  "nmod": {
125
+ "p": 0.8181818182,
126
+ "r": 0.8541458541,
127
+ "f": 0.8357771261
128
  },
129
  "conj": {
130
+ "p": 0.5809128631,
131
+ "r": 0.5511811024,
132
+ "f": 0.5656565657
133
  },
134
  "nummod": {
135
+ "p": 0.9079754601,
136
+ "r": 0.875739645,
137
+ "f": 0.8915662651
138
  },
139
  "amod": {
140
+ "p": 0.9242144177,
141
+ "r": 0.9107468124,
142
+ "f": 0.9174311927
143
  },
144
  "acl": {
145
+ "p": 0.7076023392,
146
+ "r": 0.6994219653,
147
+ "f": 0.7034883721
148
  },
149
  "mark": {
150
+ "p": 0.8826086957,
151
+ "r": 0.8942731278,
152
+ "f": 0.8884026258
153
  },
154
  "xcomp": {
155
+ "p": 0.8445945946,
156
+ "r": 0.8278145695,
157
+ "f": 0.8361204013
158
  },
159
  "flat:name": {
160
+ "p": 0.9292929293,
161
+ "r": 0.8761904762,
162
+ "f": 0.9019607843
163
  },
164
  "cop": {
165
+ "p": 0.8913043478,
166
+ "r": 0.9111111111,
167
+ "f": 0.9010989011
168
  },
169
  "advmod": {
170
+ "p": 0.8817891374,
171
+ "r": 0.8652037618,
172
+ "f": 0.8734177215
173
  },
174
  "obl:arg": {
175
+ "p": 0.7056074766,
176
+ "r": 0.6863636364,
177
+ "f": 0.6958525346
178
  },
179
  "appos": {
180
+ "p": 0.5222222222,
181
+ "r": 0.5662650602,
182
+ "f": 0.5433526012
183
  },
184
  "nsubj:pass": {
185
+ "p": 0.9024390244,
186
+ "r": 0.8705882353,
187
+ "f": 0.8862275449
188
  },
189
  "aux:pass": {
190
+ "p": 0.9642857143,
191
  "r": 0.9642857143,
192
+ "f": 0.9642857143
193
  },
194
  "acl:relcl": {
195
+ "p": 0.7341772152,
196
+ "r": 0.6744186047,
197
+ "f": 0.703030303
198
  },
199
  "advcl": {
200
+ "p": 0.5632183908,
201
+ "r": 0.6282051282,
202
+ "f": 0.5939393939
203
  },
204
  "fixed": {
205
+ "p": 0.8,
206
+ "r": 0.76,
207
+ "f": 0.7794871795
208
  },
209
  "dep": {
210
+ "p": 0.3,
211
+ "r": 0.6206896552,
212
+ "f": 0.404494382
213
  },
214
  "expl:subj": {
215
+ "p": 0.8387096774,
216
+ "r": 0.8125,
217
+ "f": 0.8253968254
218
  },
219
  "expl:comp": {
220
+ "p": 0.675,
221
+ "r": 0.9,
222
+ "f": 0.7714285714
223
  },
224
  "expl:pass": {
225
+ "p": 0.3333333333,
226
  "r": 0.1428571429,
227
+ "f": 0.2
228
  },
229
  "ccomp": {
230
+ "p": 0.8,
231
+ "r": 0.7843137255,
232
+ "f": 0.7920792079
233
  },
234
  "parataxis": {
235
+ "p": 0.55,
236
+ "r": 0.3928571429,
237
+ "f": 0.4583333333
238
  },
239
  "iobj": {
240
+ "p": 0.7222222222,
241
+ "r": 0.52,
242
+ "f": 0.6046511628
243
  },
244
  "obl:agent": {
245
+ "p": 0.8947368421,
246
+ "r": 0.8095238095,
247
+ "f": 0.85
248
  },
249
  "nsubj:caus": {
250
  "p": 0.0,
277
  "f": 0.0
278
  },
279
  "flat:foreign": {
280
+ "p": 0.5,
281
+ "r": 0.1428571429,
282
+ "f": 0.2222222222
283
  },
284
  "orphan": {
285
  "p": 0.0,
297
  "f": 0.0
298
  }
299
  },
300
+ "tag_acc": 0.9446562919,
301
+ "lemma_acc": 0.9135840526,
302
+ "ents_p": 0.8398572946,
303
+ "ents_r": 0.83869741,
304
+ "ents_f": 0.8392769516,
305
  "ents_per_type": {
306
  "PER": {
307
+ "p": 0.9071242337,
308
+ "r": 0.9223328796,
309
+ "f": 0.9146653403
310
  },
311
  "LOC": {
312
+ "p": 0.8418339999,
313
+ "r": 0.8540533757,
314
+ "f": 0.8478996657
315
  },
316
  "ORG": {
317
+ "p": 0.7824390244,
318
+ "r": 0.7652671756,
319
+ "f": 0.7737578389
320
  },
321
  "MISC": {
322
+ "p": 0.7278621126,
323
+ "r": 0.67108908,
324
+ "f": 0.6983235986
325
  }
326
  },
327
+ "speed": 3541.0086678905
328
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
config.cfg CHANGED
@@ -70,8 +70,8 @@ nO = null
70
  [components.ner.model.tok2vec.embed]
71
  @architectures = "spacy.MultiHashEmbed.v2"
72
  width = 96
73
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
74
- rows = [5000,1000,2500,2500,50]
75
  include_static_vectors = true
76
 
77
  [components.ner.model.tok2vec.encode]
@@ -139,8 +139,8 @@ factory = "tok2vec"
139
  [components.tok2vec.model.embed]
140
  @architectures = "spacy.MultiHashEmbed.v2"
141
  width = ${components.tok2vec.model.encode:width}
142
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
143
- rows = [5000,1000,2500,2500,50]
144
  include_static_vectors = true
145
 
146
  [components.tok2vec.model.encode]
@@ -182,6 +182,7 @@ eval_frequency = 1000
182
  frozen_components = []
183
  before_to_disk = null
184
  annotating_components = []
 
185
 
186
  [training.batcher]
187
  @batchers = "spacy.batch_by_words.v1"
70
  [components.ner.model.tok2vec.embed]
71
  @architectures = "spacy.MultiHashEmbed.v2"
72
  width = 96
73
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
74
+ rows = [5000,1000,2500,2500]
75
  include_static_vectors = true
76
 
77
  [components.ner.model.tok2vec.encode]
139
  [components.tok2vec.model.embed]
140
  @architectures = "spacy.MultiHashEmbed.v2"
141
  width = ${components.tok2vec.model.encode:width}
142
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
143
+ rows = [5000,1000,2500,2500,50,50]
144
  include_static_vectors = true
145
 
146
  [components.tok2vec.model.encode]
182
  frozen_components = []
183
  before_to_disk = null
184
  annotating_components = []
185
+ before_update = null
186
 
187
  [training.batcher]
188
  @batchers = "spacy.batch_by_words.v1"
fr_core_news_lg-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b68c2e0472a66287690293cd08b47a7947a1df4ae4f729de1fd48983dd0beaf9
3
- size 571829411
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bcfe20998d29f134e5c87cb543a1142a46c29b2d2a5773e1e178b7812854ff9
3
+ size 571836624
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"fr",
3
  "name":"core_news_lg",
4
- "version":"3.4.0",
5
  "description":"French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
- "spacy_version":">=3.4.0,<3.5.0",
11
- "spacy_git_version":"dd038b536",
12
  "vectors":{
13
  "width":300,
14
  "vectors":500000,
@@ -290,75 +290,75 @@
290
  "senter"
291
  ],
292
  "performance":{
293
- "token_acc":0.9989751998,
294
  "token_p":0.9844389844,
295
  "token_r":0.9896058454,
296
  "token_f":0.9870156531,
297
- "pos_acc":0.9729952587,
298
- "morph_acc":0.9678251005,
299
- "morph_micro_p":0.9889360133,
300
- "morph_micro_r":0.9801998294,
301
- "morph_micro_f":0.9845485421,
302
  "morph_per_feat":{
303
  "Definite":{
304
- "p":0.9890029326,
305
- "r":0.9846715328,
306
- "f":0.9868324799
307
  },
308
  "Number":{
309
- "p":0.9955464836,
310
- "r":0.9876656848,
311
- "f":0.991590426
312
  },
313
  "PronType":{
314
- "p":0.9961265332,
315
- "r":0.9872040947,
316
- "f":0.9916452442
317
  },
318
  "Gender":{
319
- "p":0.9855595668,
320
- "r":0.976744186,
321
- "f":0.9811320755
322
  },
323
  "Mood":{
324
- "p":0.98,
325
- "r":0.9573712256,
326
- "f":0.9685534591
327
  },
328
  "Person":{
329
- "p":0.9871794872,
330
- "r":0.9685534591,
331
- "f":0.9777777778
332
  },
333
  "Tense":{
334
- "p":0.97327852,
335
- "r":0.9673135853,
336
- "f":0.9702868852
337
  },
338
  "VerbForm":{
339
- "p":0.9824854045,
340
- "r":0.9751655629,
341
- "f":0.9788117989
342
  },
343
  "NumType":{
344
  "p":1.0,
345
- "r":0.9692832765,
346
- "f":0.9844020797
347
  },
348
  "Reflex":{
349
- "p":1.0,
350
  "r":1.0,
351
- "f":1.0
352
  },
353
  "Voice":{
354
- "p":0.9145299145,
355
  "r":0.9553571429,
356
- "f":0.9344978166
357
  },
358
  "Poss":{
359
- "p":1.0,
360
  "r":1.0,
361
- "f":1.0
362
  },
363
  "Polarity":{
364
  "p":0.9882352941,
@@ -366,36 +366,36 @@
366
  "f":0.9882352941
367
  }
368
  },
369
- "sents_p":0.8899521531,
370
- "sents_r":0.9029126214,
371
- "sents_f":0.8963855422,
372
- "dep_uas":0.8966076184,
373
- "dep_las":0.8581018519,
374
  "dep_las_per_type":{
375
  "det":{
376
- "p":0.9805668016,
377
- "r":0.9774011299,
378
- "f":0.9789814066
379
  },
380
  "nsubj":{
381
- "p":0.8886198547,
382
- "r":0.8843373494,
383
- "f":0.88647343
384
  },
385
  "aux:tense":{
386
- "p":0.936,
387
- "r":0.936,
388
- "f":0.936
389
  },
390
  "root":{
391
- "p":0.8758949881,
392
- "r":0.890776699,
393
- "f":0.8832731649
394
  },
395
  "obj":{
396
- "p":0.8362573099,
397
- "r":0.8486646884,
398
- "f":0.8424153166
399
  },
400
  "cc":{
401
  "p":0.8812785388,
@@ -403,139 +403,139 @@
403
  "f":0.8853211009
404
  },
405
  "case":{
406
- "p":0.9669365722,
407
- "r":0.9761580381,
408
- "f":0.9715254237
409
  },
410
  "obl:mod":{
411
- "p":0.6948051948,
412
- "r":0.6388059701,
413
- "f":0.66562986
414
  },
415
  "nmod":{
416
- "p":0.8057142857,
417
- "r":0.8451548452,
418
- "f":0.8249634325
419
  },
420
  "conj":{
421
- "p":0.5555555556,
422
- "r":0.531496063,
423
- "f":0.5432595573
424
  },
425
  "nummod":{
426
- "p":0.9119496855,
427
- "r":0.8579881657,
428
- "f":0.8841463415
429
  },
430
  "amod":{
431
- "p":0.9539347409,
432
- "r":0.9052823315,
433
- "f":0.9289719626
434
  },
435
  "acl":{
436
- "p":0.6971428571,
437
- "r":0.7052023121,
438
- "f":0.7011494253
439
  },
440
  "mark":{
441
- "p":0.8839285714,
442
- "r":0.872246696,
443
- "f":0.8780487805
444
  },
445
  "xcomp":{
446
- "p":0.8671328671,
447
- "r":0.821192053,
448
- "f":0.843537415
449
  },
450
  "flat:name":{
451
- "p":0.9393939394,
452
- "r":0.8857142857,
453
- "f":0.9117647059
454
  },
455
  "cop":{
456
- "p":0.9101123596,
457
- "r":0.9,
458
- "f":0.905027933
459
  },
460
  "advmod":{
461
- "p":0.8525641026,
462
- "r":0.8338557994,
463
- "f":0.8431061807
464
  },
465
  "obl:arg":{
466
- "p":0.704845815,
467
- "r":0.7272727273,
468
- "f":0.7158836689
469
  },
470
  "appos":{
471
- "p":0.5056179775,
472
- "r":0.5421686747,
473
- "f":0.523255814
474
  },
475
  "nsubj:pass":{
476
- "p":0.869047619,
477
- "r":0.8588235294,
478
- "f":0.8639053254
479
  },
480
  "aux:pass":{
481
- "p":0.9230769231,
482
  "r":0.9642857143,
483
- "f":0.943231441
484
  },
485
  "acl:relcl":{
486
- "p":0.5833333333,
487
- "r":0.5697674419,
488
- "f":0.5764705882
489
  },
490
  "advcl":{
491
- "p":0.4831460674,
492
- "r":0.5512820513,
493
- "f":0.5149700599
494
  },
495
  "fixed":{
496
- "p":0.8295454545,
497
- "r":0.73,
498
- "f":0.7765957447
499
  },
500
  "dep":{
501
- "p":0.2372881356,
502
- "r":0.4827586207,
503
- "f":0.3181818182
504
  },
505
  "expl:subj":{
506
- "p":0.8181818182,
507
- "r":0.84375,
508
- "f":0.8307692308
509
  },
510
  "expl:comp":{
511
- "p":0.6097560976,
512
- "r":0.8333333333,
513
- "f":0.7042253521
514
  },
515
  "expl:pass":{
516
- "p":0.25,
517
  "r":0.1428571429,
518
- "f":0.1818181818
519
  },
520
  "ccomp":{
521
- "p":0.7391304348,
522
- "r":0.6666666667,
523
- "f":0.7010309278
524
  },
525
  "parataxis":{
526
- "p":0.6,
527
- "r":0.4285714286,
528
- "f":0.5
529
  },
530
  "iobj":{
531
- "p":0.7058823529,
532
- "r":0.48,
533
- "f":0.5714285714
534
  },
535
  "obl:agent":{
536
- "p":0.8974358974,
537
- "r":0.8333333333,
538
- "f":0.8641975309
539
  },
540
  "nsubj:caus":{
541
  "p":0.0,
@@ -568,9 +568,9 @@
568
  "f":0.0
569
  },
570
  "flat:foreign":{
571
- "p":1.0,
572
- "r":0.4285714286,
573
- "f":0.6
574
  },
575
  "orphan":{
576
  "p":0.0,
@@ -588,34 +588,34 @@
588
  "f":0.0
589
  }
590
  },
591
- "tag_acc":0.9442383014,
592
- "lemma_acc":0.9071105631,
593
- "ents_p":0.8411510446,
594
- "ents_r":0.8413194027,
595
- "ents_f":0.8412352152,
596
  "ents_per_type":{
597
  "PER":{
598
- "p":0.9102898653,
599
- "r":0.9247689332,
600
- "f":0.9174722775
601
  },
602
  "LOC":{
603
- "p":0.8429506981,
604
- "r":0.8561030704,
605
- "f":0.849475978
606
  },
607
  "ORG":{
608
- "p":0.7854503015,
609
- "r":0.770610687,
610
- "f":0.7779597341
611
  },
612
  "MISC":{
613
- "p":0.7254040483,
614
- "r":0.674004957,
615
- "f":0.6987605804
616
  }
617
  },
618
- "speed":4391.3854054034
619
  },
620
  "sources":[
621
  {
1
  {
2
  "lang":"fr",
3
  "name":"core_news_lg",
4
+ "version":"3.5.0",
5
  "description":"French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"LGPL-LR",
10
+ "spacy_version":">=3.5.0,<3.6.0",
11
+ "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":300,
14
  "vectors":500000,
290
  "senter"
291
  ],
292
  "performance":{
293
+ "token_acc":0.997952498,
294
  "token_p":0.9844389844,
295
  "token_r":0.9896058454,
296
  "token_f":0.9870156531,
297
+ "pos_acc":0.9734102855,
298
+ "morph_acc":0.9674260386,
299
+ "morph_micro_p":0.9891344383,
300
+ "morph_micro_r":0.9816619959,
301
+ "morph_micro_f":0.9853840509,
302
  "morph_per_feat":{
303
  "Definite":{
304
+ "p":0.9890350877,
305
+ "r":0.9875912409,
306
+ "f":0.988312637
307
  },
308
  "Number":{
309
+ "p":0.9935149157,
310
+ "r":0.9871134021,
311
+ "f":0.9903038138
312
  },
313
  "PronType":{
314
+ "p":0.9954867827,
315
+ "r":0.98784389,
316
+ "f":0.9916506101
317
  },
318
  "Gender":{
319
+ "p":0.9845559846,
320
+ "r":0.9775108612,
321
+ "f":0.9810207746
322
  },
323
  "Mood":{
324
+ "p":0.9801444043,
325
+ "r":0.9644760213,
326
+ "f":0.9722470904
327
  },
328
  "Person":{
329
+ "p":0.993622449,
330
+ "r":0.9798742138,
331
+ "f":0.9867004433
332
  },
333
  "Tense":{
334
+ "p":0.9834710744,
335
+ "r":0.9724208376,
336
+ "f":0.9779147406
337
  },
338
  "VerbForm":{
339
+ "p":0.9866220736,
340
+ "r":0.9768211921,
341
+ "f":0.9816971714
342
  },
343
  "NumType":{
344
  "p":1.0,
345
+ "r":0.9658703072,
346
+ "f":0.9826388889
347
  },
348
  "Reflex":{
349
+ "p":0.9777777778,
350
  "r":1.0,
351
+ "f":0.9887640449
352
  },
353
  "Voice":{
354
+ "p":0.9224137931,
355
  "r":0.9553571429,
356
+ "f":0.9385964912
357
  },
358
  "Poss":{
359
+ "p":0.9827586207,
360
  "r":1.0,
361
+ "f":0.9913043478
362
  },
363
  "Polarity":{
364
  "p":0.9882352941,
366
  "f":0.9882352941
367
  }
368
  },
369
+ "sents_p":0.8591549296,
370
+ "sents_r":0.8926096998,
371
+ "sents_f":0.8735083532,
372
+ "dep_uas":0.9028935185,
373
+ "dep_las":0.8654090962,
374
  "dep_las_per_type":{
375
  "det":{
376
+ "p":0.9805982215,
377
+ "r":0.9790153349,
378
+ "f":0.9798061389
379
  },
380
  "nsubj":{
381
+ "p":0.8985148515,
382
+ "r":0.8746987952,
383
+ "f":0.8864468864
384
  },
385
  "aux:tense":{
386
+ "p":0.96,
387
+ "r":0.96,
388
+ "f":0.96
389
  },
390
  "root":{
391
+ "p":0.8700696056,
392
+ "r":0.9101941748,
393
+ "f":0.8896797153
394
  },
395
  "obj":{
396
+ "p":0.8433048433,
397
+ "r":0.8783382789,
398
+ "f":0.8604651163
399
  },
400
  "cc":{
401
  "p":0.8812785388,
403
  "f":0.8853211009
404
  },
405
  "case":{
406
+ "p":0.9695121951,
407
+ "r":0.9747956403,
408
+ "f":0.9721467391
409
  },
410
  "obl:mod":{
411
+ "p":0.6866666667,
412
+ "r":0.6149253731,
413
+ "f":0.6488188976
414
  },
415
  "nmod":{
416
+ "p":0.8181818182,
417
+ "r":0.8541458541,
418
+ "f":0.8357771261
419
  },
420
  "conj":{
421
+ "p":0.5809128631,
422
+ "r":0.5511811024,
423
+ "f":0.5656565657
424
  },
425
  "nummod":{
426
+ "p":0.9079754601,
427
+ "r":0.875739645,
428
+ "f":0.8915662651
429
  },
430
  "amod":{
431
+ "p":0.9242144177,
432
+ "r":0.9107468124,
433
+ "f":0.9174311927
434
  },
435
  "acl":{
436
+ "p":0.7076023392,
437
+ "r":0.6994219653,
438
+ "f":0.7034883721
439
  },
440
  "mark":{
441
+ "p":0.8826086957,
442
+ "r":0.8942731278,
443
+ "f":0.8884026258
444
  },
445
  "xcomp":{
446
+ "p":0.8445945946,
447
+ "r":0.8278145695,
448
+ "f":0.8361204013
449
  },
450
  "flat:name":{
451
+ "p":0.9292929293,
452
+ "r":0.8761904762,
453
+ "f":0.9019607843
454
  },
455
  "cop":{
456
+ "p":0.8913043478,
457
+ "r":0.9111111111,
458
+ "f":0.9010989011
459
  },
460
  "advmod":{
461
+ "p":0.8817891374,
462
+ "r":0.8652037618,
463
+ "f":0.8734177215
464
  },
465
  "obl:arg":{
466
+ "p":0.7056074766,
467
+ "r":0.6863636364,
468
+ "f":0.6958525346
469
  },
470
  "appos":{
471
+ "p":0.5222222222,
472
+ "r":0.5662650602,
473
+ "f":0.5433526012
474
  },
475
  "nsubj:pass":{
476
+ "p":0.9024390244,
477
+ "r":0.8705882353,
478
+ "f":0.8862275449
479
  },
480
  "aux:pass":{
481
+ "p":0.9642857143,
482
  "r":0.9642857143,
483
+ "f":0.9642857143
484
  },
485
  "acl:relcl":{
486
+ "p":0.7341772152,
487
+ "r":0.6744186047,
488
+ "f":0.703030303
489
  },
490
  "advcl":{
491
+ "p":0.5632183908,
492
+ "r":0.6282051282,
493
+ "f":0.5939393939
494
  },
495
  "fixed":{
496
+ "p":0.8,
497
+ "r":0.76,
498
+ "f":0.7794871795
499
  },
500
  "dep":{
501
+ "p":0.3,
502
+ "r":0.6206896552,
503
+ "f":0.404494382
504
  },
505
  "expl:subj":{
506
+ "p":0.8387096774,
507
+ "r":0.8125,
508
+ "f":0.8253968254
509
  },
510
  "expl:comp":{
511
+ "p":0.675,
512
+ "r":0.9,
513
+ "f":0.7714285714
514
  },
515
  "expl:pass":{
516
+ "p":0.3333333333,
517
  "r":0.1428571429,
518
+ "f":0.2
519
  },
520
  "ccomp":{
521
+ "p":0.8,
522
+ "r":0.7843137255,
523
+ "f":0.7920792079
524
  },
525
  "parataxis":{
526
+ "p":0.55,
527
+ "r":0.3928571429,
528
+ "f":0.4583333333
529
  },
530
  "iobj":{
531
+ "p":0.7222222222,
532
+ "r":0.52,
533
+ "f":0.6046511628
534
  },
535
  "obl:agent":{
536
+ "p":0.8947368421,
537
+ "r":0.8095238095,
538
+ "f":0.85
539
  },
540
  "nsubj:caus":{
541
  "p":0.0,
568
  "f":0.0
569
  },
570
  "flat:foreign":{
571
+ "p":0.5,
572
+ "r":0.1428571429,
573
+ "f":0.2222222222
574
  },
575
  "orphan":{
576
  "p":0.0,
588
  "f":0.0
589
  }
590
  },
591
+ "tag_acc":0.9446562919,
592
+ "lemma_acc":0.9135840526,
593
+ "ents_p":0.8398572946,
594
+ "ents_r":0.83869741,
595
+ "ents_f":0.8392769516,
596
  "ents_per_type":{
597
  "PER":{
598
+ "p":0.9071242337,
599
+ "r":0.9223328796,
600
+ "f":0.9146653403
601
  },
602
  "LOC":{
603
+ "p":0.8418339999,
604
+ "r":0.8540533757,
605
+ "f":0.8478996657
606
  },
607
  "ORG":{
608
+ "p":0.7824390244,
609
+ "r":0.7652671756,
610
+ "f":0.7737578389
611
  },
612
  "MISC":{
613
+ "p":0.7278621126,
614
+ "r":0.67108908,
615
+ "f":0.6983235986
616
  }
617
  },
618
+ "speed":3541.0086678905
619
  },
620
  "sources":[
621
  {
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d46c83825f1999a17e964aef93563d16d383df12270c970ac6d6057cb22302c0
3
  size 76873
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82ff8b331dfc57ed791df76c4f9d46fd6d37d271c643aded6a7daaac00273c0
3
  size 76873
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7522e1471cd1f86d3e8cc97155e8a262c392901f9a6fa9c7d44b192ebf8483f0
3
- size 6496592
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995e2b905da0af420147b77850a1aabb9217f84991be9f28da6ddc3f0980e093
3
+ size 6366382
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36f99f7025175037574a1a4f4d0130aa3e3ee5c94d905467972f40c4e5d74696
3
  size 304828
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2357dda36d5a288f85d18a1a29523386ef131e023110e8f5a6cfd3278b14357c
3
  size 304828
senter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06327e2dca59945abc27c6b8e51f16edd2b995e9c298ca4ab2d34652e734ec12
3
  size 219953
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3cfe07c332e8821824d0f0100b567e9950a3cb967d64a654f63aafa865b9d69
3
  size 219953
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d54ef02d60b59a22848a202ed1373e2bf5b4727162db32f07bbd728869829402
3
- size 6365604
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9ecd03343dcf94e83b0a91997927930da93acec62f52b9f664c07800c91980
3
+ size 6495793
tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d399a4e4c03d3ff8f9f14701678b318876e8f6799c64e01f4d342fa86cf6ac3
3
- size 1515364
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c5994780fdf69912547eff13b24e61764efefae670274e56112354430afe0f4
3
+ size 1515440