osanseviero commited on
Commit
796dea5
1 Parent(s): 4d9c899

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,47 +14,47 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.898632744
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8985877404
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8986102416
24
  - task:
25
  name: POS
26
  type: token-classification
27
  metrics:
28
  - name: POS Accuracy
29
  type: accuracy
30
- value: 0.9779597496
31
  - task:
32
  name: SENTER
33
  type: token-classification
34
  metrics:
35
  - name: SENTER Precision
36
  type: precision
37
- value: 0.9533882851
38
  - name: SENTER Recall
39
  type: recall
40
- value: 0.8621940761
41
  - name: SENTER F Score
42
  type: f_score
43
- value: 0.9055009007
44
  - task:
45
  name: UNLABELED_DEPENDENCIES
46
  type: token-classification
47
  metrics:
48
  - name: Unlabeled Dependencies Accuracy
49
  type: accuracy
50
- value: 0.9529693229
51
  - task:
52
  name: LABELED_DEPENDENCIES
53
  type: token-classification
54
  metrics:
55
  - name: Labeled Dependencies Accuracy
56
  type: accuracy
57
- value: 0.9529693229
58
  ---
59
  ### Details: https://spacy.io/models/en#en_core_web_trf
60
 
@@ -63,8 +63,8 @@ English transformer pipeline (roberta-base). Components: transformer, tagger, pa
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `en_core_web_trf` |
66
- | **Version** | `3.1.0` |
67
- | **spaCy** | `>=3.1.0,<3.2.0` |
68
  | **Default Pipeline** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
69
  | **Components** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
70
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -91,12 +91,15 @@ English transformer pipeline (roberta-base). Components: transformer, tagger, pa
91
  | Type | Score |
92
  | --- | --- |
93
  | `TOKEN_ACC` | 99.93 |
94
- | `TAG_ACC` | 97.80 |
95
- | `DEP_UAS` | 95.30 |
96
- | `DEP_LAS` | 93.94 |
97
- | `ENTS_P` | 89.86 |
98
- | `ENTS_R` | 89.86 |
99
- | `ENTS_F` | 89.86 |
100
- | `SENTS_P` | 95.34 |
101
- | `SENTS_R` | 86.22 |
102
- | `SENTS_F` | 90.55 |
 
 
 
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.898708838
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.8993389423
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8990237797
24
  - task:
25
  name: POS
26
  type: token-classification
27
  metrics:
28
  - name: POS Accuracy
29
  type: accuracy
30
+ value: 0.9780627094
31
  - task:
32
  name: SENTER
33
  type: token-classification
34
  metrics:
35
  - name: SENTER Precision
36
  type: precision
37
+ value: 0.9568827385
38
  - name: SENTER Recall
39
  type: recall
40
+ value: 0.8666798601
41
  - name: SENTER F Score
42
  type: f_score
43
+ value: 0.9095503479
44
  - task:
45
  name: UNLABELED_DEPENDENCIES
46
  type: token-classification
47
  metrics:
48
  - name: Unlabeled Dependencies Accuracy
49
  type: accuracy
50
+ value: 0.9527056513
51
  - task:
52
  name: LABELED_DEPENDENCIES
53
  type: token-classification
54
  metrics:
55
  - name: Labeled Dependencies Accuracy
56
  type: accuracy
57
+ value: 0.9527056513
58
  ---
59
  ### Details: https://spacy.io/models/en#en_core_web_trf
60
 
 
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `en_core_web_trf` |
66
+ | **Version** | `3.2.0` |
67
+ | **spaCy** | `>=3.2.0,<3.3.0` |
68
  | **Default Pipeline** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
69
  | **Components** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
70
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
91
  | Type | Score |
92
  | --- | --- |
93
  | `TOKEN_ACC` | 99.93 |
94
+ | `TOKEN_P` | 99.57 |
95
+ | `TOKEN_R` | 99.58 |
96
+ | `TOKEN_F` | 99.57 |
97
+ | `TAG_ACC` | 97.81 |
98
+ | `SENTS_P` | 95.69 |
99
+ | `SENTS_R` | 86.67 |
100
+ | `SENTS_F` | 90.96 |
101
+ | `DEP_UAS` | 95.27 |
102
+ | `DEP_LAS` | 93.93 |
103
+ | `ENTS_P` | 89.87 |
104
+ | `ENTS_R` | 89.93 |
105
+ | `ENTS_F` | 89.90 |
accuracy.json CHANGED
@@ -1,327 +1,330 @@
1
  {
2
  "token_acc": 0.9993053983,
3
- "tag_acc": 0.9779597496,
4
- "dep_uas": 0.9529693229,
5
- "dep_las": 0.9394097615,
6
- "ents_p": 0.898632744,
7
- "ents_r": 0.8985877404,
8
- "ents_f": 0.8986102416,
9
- "sents_p": 0.9533882851,
10
- "sents_r": 0.8621940761,
11
- "sents_f": 0.9055009007,
12
- "speed": 5467.6411211076,
13
  "dep_las_per_type": {
14
  "prep": {
15
- "p": 0.9227131724,
16
- "r": 0.9254790362,
17
- "f": 0.9240940347
18
  },
19
  "det": {
20
- "p": 0.9901676798,
21
- "r": 0.9895620974,
22
- "f": 0.989864796
23
  },
24
  "pobj": {
25
- "p": 0.9827126617,
26
- "r": 0.9844492441,
27
- "f": 0.9835801864
28
  },
29
  "nsubj": {
30
- "p": 0.9797975371,
31
- "r": 0.9795399781,
32
- "f": 0.9796687407
33
  },
34
  "aux": {
35
- "p": 0.9890501202,
36
- "r": 0.9890501202,
37
- "f": 0.9890501202
38
  },
39
  "advmod": {
40
- "p": 0.8975934029,
41
- "r": 0.8974423692,
42
- "f": 0.8975178797
43
  },
44
  "relcl": {
45
- "p": 0.873105393,
46
- "r": 0.898766328,
47
- "f": 0.8857500447
48
  },
49
  "root": {
50
- "p": 0.9679445053,
51
- "r": 0.8744640148,
52
- "f": 0.9188327442
53
  },
54
  "xcomp": {
55
- "p": 0.940690151,
56
- "r": 0.9393395549,
57
- "f": 0.9400143678
58
  },
59
  "amod": {
60
- "p": 0.942555534,
61
- "r": 0.9429219307,
62
- "f": 0.9427386967
63
  },
64
  "compound": {
65
- "p": 0.952575817,
66
- "r": 0.9463689018,
67
- "f": 0.9494622154
68
  },
69
  "poss": {
70
- "p": 0.986746988,
71
- "r": 0.9891304348,
72
- "f": 0.9879372738
73
  },
74
  "ccomp": {
75
- "p": 0.8579088472,
76
- "r": 0.9124236253,
77
- "f": 0.8843268851
78
  },
79
  "attr": {
80
- "p": 0.9473251029,
81
- "r": 0.9680403701,
82
- "f": 0.9575707155
83
  },
84
  "case": {
85
- "p": 0.9880834161,
86
- "r": 0.995995996,
87
- "f": 0.9920239282
88
  },
89
  "mark": {
90
- "p": 0.9546054368,
91
- "r": 0.958399576,
92
- "f": 0.9564987439
93
  },
94
  "intj": {
95
- "p": 0.6154299176,
96
- "r": 0.7655677656,
97
- "f": 0.6823375775
98
  },
99
  "advcl": {
100
- "p": 0.8175954592,
101
- "r": 0.7980357593,
102
- "f": 0.8076972091
103
  },
104
  "cc": {
105
- "p": 0.8981217309,
106
- "r": 0.9036000478,
107
- "f": 0.9008525607
108
  },
109
  "neg": {
110
- "p": 0.957,
111
- "r": 0.9603612644,
112
- "f": 0.958677686
113
  },
114
  "conj": {
115
- "p": 0.8658565642,
116
- "r": 0.9149043303,
117
- "f": 0.8897049822
118
  },
119
  "nsubjpass": {
120
- "p": 0.958161157,
121
- "r": 0.9512820513,
122
- "f": 0.9547092126
123
  },
124
  "auxpass": {
125
- "p": 0.9672727273,
126
- "r": 0.969476082,
127
- "f": 0.9683731513
128
  },
129
  "dobj": {
130
- "p": 0.9737199457,
131
- "r": 0.9713921428,
132
- "f": 0.9725546513
133
  },
134
  "nummod": {
135
- "p": 0.9584077571,
136
- "r": 0.9484848485,
137
- "f": 0.9534204848
138
  },
139
  "npadvmod": {
140
- "p": 0.85505659,
141
- "r": 0.8319715808,
142
- "f": 0.8433561397
143
  },
144
  "prt": {
145
- "p": 0.8902546093,
146
- "r": 0.9086021505,
147
- "f": 0.8993348115
148
  },
149
  "pcomp": {
150
- "p": 0.9301833568,
151
- "r": 0.9236694678,
152
- "f": 0.9269149684
153
  },
154
  "expl": {
155
- "p": 0.9935897436,
156
  "r": 0.9957173448,
157
- "f": 0.9946524064
158
  },
159
  "acl": {
160
- "p": 0.8569069895,
161
- "r": 0.8494271686,
162
- "f": 0.8531506849
163
  },
164
  "agent": {
165
- "p": 0.9471830986,
166
- "r": 0.9641577061,
167
- "f": 0.9555950266
168
  },
169
  "dative": {
170
- "p": 0.8220720721,
171
- "r": 0.8371559633,
172
- "f": 0.8295454545
173
  },
174
  "acomp": {
175
- "p": 0.9371040724,
176
- "r": 0.9392290249,
177
- "f": 0.9381653454
178
  },
179
  "dep": {
180
- "p": 0.4218241042,
181
- "r": 0.4204545455,
182
- "f": 0.4211382114
183
  },
184
  "csubj": {
185
- "p": 0.865497076,
186
- "r": 0.875739645,
187
- "f": 0.8705882353
188
  },
189
  "quantmod": {
190
- "p": 0.8902847571,
191
- "r": 0.863525589,
192
- "f": 0.8767010309
193
  },
194
  "nmod": {
195
- "p": 0.8395759717,
196
- "r": 0.7239488117,
197
- "f": 0.777486911
198
  },
199
  "appos": {
200
- "p": 0.8256564787,
201
- "r": 0.8321041215,
202
- "f": 0.8288677615
203
  },
204
  "predet": {
205
- "p": 0.8537549407,
206
- "r": 0.9270386266,
207
- "f": 0.8888888889
208
  },
209
  "preconj": {
210
- "p": 0.6489361702,
211
- "r": 0.7093023256,
212
- "f": 0.6777777778
213
  },
214
  "oprd": {
215
- "p": 0.8913738019,
216
- "r": 0.8328358209,
217
- "f": 0.8611111111
218
  },
219
  "parataxis": {
220
- "p": 0.5842911877,
221
- "r": 0.6616052061,
222
- "f": 0.6205493388
223
  },
224
  "meta": {
225
- "p": 0.1808510638,
226
  "r": 0.6538461538,
227
- "f": 0.2833333333
228
  },
229
  "csubjpass": {
230
- "p": 1.0,
231
  "r": 0.8333333333,
232
- "f": 0.9090909091
233
  }
234
  },
 
 
 
235
  "ents_per_type": {
236
  "DATE": {
237
- "p": 0.8967681205,
238
- "r": 0.9073015873,
239
- "f": 0.9020041029
240
  },
241
  "GPE": {
242
- "p": 0.9559695174,
243
- "r": 0.9447698745,
244
- "f": 0.9503367003
245
  },
246
  "ORDINAL": {
247
- "p": 0.803030303,
248
  "r": 0.8229813665,
249
- "f": 0.8128834356
250
  },
251
  "ORG": {
252
- "p": 0.9044176707,
253
- "r": 0.8955461294,
254
- "f": 0.8999600373
255
  },
256
  "FAC": {
257
- "p": 0.544973545,
258
- "r": 0.7923076923,
259
- "f": 0.6457680251
260
  },
261
  "QUANTITY": {
262
- "p": 0.7828571429,
263
- "r": 0.7527472527,
264
- "f": 0.7675070028
265
  },
266
  "LOC": {
267
- "p": 0.8250825083,
268
- "r": 0.7961783439,
269
- "f": 0.8103727715
270
  },
271
  "CARDINAL": {
272
- "p": 0.8595769683,
273
- "r": 0.8697978597,
274
- "f": 0.8646572104
275
  },
276
  "PERSON": {
277
- "p": 0.9422647528,
278
- "r": 0.9640992167,
279
- "f": 0.9530569447
280
  },
281
  "NORP": {
282
- "p": 0.9249800479,
283
- "r": 0.9272,
284
- "f": 0.9260886936
285
  },
286
  "LAW": {
287
- "p": 0.5789473684,
288
- "r": 0.6875,
289
- "f": 0.6285714286
290
  },
291
  "PRODUCT": {
292
- "p": 0.6686746988,
293
  "r": 0.5260663507,
294
- "f": 0.5888594164
 
 
 
 
 
295
  },
296
  "TIME": {
297
- "p": 0.7287671233,
298
  "r": 0.7777777778,
299
- "f": 0.7524752475
300
  },
301
  "EVENT": {
302
- "p": 0.7443609023,
303
- "r": 0.5689655172,
304
- "f": 0.6449511401
305
  },
306
  "WORK_OF_ART": {
307
- "p": 0.5721925134,
308
- "r": 0.5515463918,
309
- "f": 0.56167979
310
- },
311
- "MONEY": {
312
- "p": 0.9120481928,
313
- "r": 0.893742621,
314
- "f": 0.9028026237
315
  },
316
  "PERCENT": {
317
- "p": 0.9164037855,
318
- "r": 0.8897396631,
319
- "f": 0.9028749029
320
  },
321
  "LANGUAGE": {
322
  "p": 1.0,
323
- "r": 0.75,
324
- "f": 0.8571428571
325
  }
326
- }
 
327
  }
 
1
  {
2
  "token_acc": 0.9993053983,
3
+ "token_p": 0.9956742163,
4
+ "token_r": 0.9957505887,
5
+ "token_f": 0.9957124011,
6
+ "tag_acc": 0.9780627094,
7
+ "sents_p": 0.9568827385,
8
+ "sents_r": 0.8666798601,
9
+ "sents_f": 0.9095503479,
10
+ "dep_uas": 0.9527056513,
11
+ "dep_las": 0.9393298266,
 
12
  "dep_las_per_type": {
13
  "prep": {
14
+ "p": 0.9229604964,
15
+ "r": 0.9255169797,
16
+ "f": 0.9242369702
17
  },
18
  "det": {
19
+ "p": 0.9898046572,
20
+ "r": 0.9896028704,
21
+ "f": 0.9897037535
22
  },
23
  "pobj": {
24
+ "p": 0.9832869081,
25
+ "r": 0.9842136265,
26
+ "f": 0.9837500491
27
  },
28
  "nsubj": {
29
+ "p": 0.9803139249,
30
+ "r": 0.9795837897,
31
+ "f": 0.9799487213
32
  },
33
  "aux": {
34
+ "p": 0.989571263,
35
+ "r": 0.9883379329,
36
+ "f": 0.9889542134
37
  },
38
  "advmod": {
39
+ "p": 0.8962161255,
40
+ "r": 0.8987043581,
41
+ "f": 0.8974585171
42
  },
43
  "relcl": {
44
+ "p": 0.8798724309,
45
+ "r": 0.9009433962,
46
+ "f": 0.8902832556
47
  },
48
  "root": {
49
+ "p": 0.9664043142,
50
+ "r": 0.8747938518,
51
+ "f": 0.9183200028
52
  },
53
  "xcomp": {
54
+ "p": 0.9438444924,
55
+ "r": 0.9411342426,
56
+ "f": 0.9424874191
57
  },
58
  "amod": {
59
+ "p": 0.9413514387,
60
+ "r": 0.943181082,
61
+ "f": 0.9422653722
62
  },
63
  "compound": {
64
+ "p": 0.951964576,
65
+ "r": 0.9458676765,
66
+ "f": 0.9489063329
67
  },
68
  "poss": {
69
+ "p": 0.986728333,
70
+ "r": 0.9877214171,
71
+ "f": 0.9872246253
72
  },
73
  "ccomp": {
74
+ "p": 0.8604651163,
75
+ "r": 0.9118126273,
76
+ "f": 0.8853950361
77
  },
78
  "attr": {
79
+ "p": 0.9478225144,
80
+ "r": 0.9701429773,
81
+ "f": 0.9588528678
82
  },
83
  "case": {
84
+ "p": 0.9890547264,
85
+ "r": 0.994994995,
86
+ "f": 0.9920159681
87
  },
88
  "mark": {
89
+ "p": 0.9492105263,
90
+ "r": 0.9557498675,
91
+ "f": 0.9524689728
92
  },
93
  "intj": {
94
+ "p": 0.6237388724,
95
+ "r": 0.76996337,
96
+ "f": 0.6891803279
97
  },
98
  "advcl": {
99
+ "p": 0.8195836546,
100
+ "r": 0.803072274,
101
+ "f": 0.8112439583
102
  },
103
  "cc": {
104
+ "p": 0.8986197049,
105
+ "r": 0.9032412391,
106
+ "f": 0.9009245452
107
  },
108
  "neg": {
109
+ "p": 0.9574148297,
110
+ "r": 0.958855996,
111
+ "f": 0.9581348709
112
  },
113
  "conj": {
114
+ "p": 0.8712776177,
115
+ "r": 0.9133937563,
116
+ "f": 0.8918387414
117
  },
118
  "nsubjpass": {
119
+ "p": 0.9616182573,
120
+ "r": 0.9507692308,
121
+ "f": 0.9561629706
122
  },
123
  "auxpass": {
124
+ "p": 0.9676979072,
125
+ "r": 0.9690205011,
126
+ "f": 0.9683587526
127
  },
128
  "dobj": {
129
+ "p": 0.9742177522,
130
+ "r": 0.9725874572,
131
+ "f": 0.9734019221
132
  },
133
  "nummod": {
134
+ "p": 0.9582800102,
135
+ "r": 0.9454545455,
136
+ "f": 0.9518240753
137
  },
138
  "npadvmod": {
139
+ "p": 0.8582474227,
140
+ "r": 0.8280639432,
141
+ "f": 0.8428855541
142
  },
143
  "prt": {
144
+ "p": 0.896,
145
+ "r": 0.9032258065,
146
+ "f": 0.8995983936
147
  },
148
  "pcomp": {
149
+ "p": 0.9316901408,
150
+ "r": 0.9264705882,
151
+ "f": 0.9290730337
152
  },
153
  "expl": {
154
+ "p": 0.9893617021,
155
  "r": 0.9957173448,
156
+ "f": 0.992529349
157
  },
158
  "acl": {
159
+ "p": 0.8476138829,
160
+ "r": 0.852700491,
161
+ "f": 0.8501495785
162
  },
163
  "agent": {
164
+ "p": 0.9559859155,
165
+ "r": 0.9731182796,
166
+ "f": 0.9644760213
167
  },
168
  "dative": {
169
+ "p": 0.8236607143,
170
+ "r": 0.8463302752,
171
+ "f": 0.834841629
172
  },
173
  "acomp": {
174
+ "p": 0.9352355072,
175
+ "r": 0.9365079365,
176
+ "f": 0.9358712894
177
  },
178
  "dep": {
179
+ "p": 0.3949044586,
180
+ "r": 0.4025974026,
181
+ "f": 0.3987138264
182
  },
183
  "csubj": {
184
+ "p": 0.8928571429,
185
+ "r": 0.8875739645,
186
+ "f": 0.8902077151
187
  },
188
  "quantmod": {
189
+ "p": 0.8867449664,
190
+ "r": 0.8586515028,
191
+ "f": 0.872472142
192
  },
193
  "nmod": {
194
+ "p": 0.82904795,
195
+ "r": 0.7269957343,
196
+ "f": 0.7746753247
197
  },
198
  "appos": {
199
+ "p": 0.8099701747,
200
+ "r": 0.8247288503,
201
+ "f": 0.8172828891
202
  },
203
  "predet": {
204
+ "p": 0.8508064516,
205
+ "r": 0.9055793991,
206
+ "f": 0.8773388773
207
  },
208
  "preconj": {
209
+ "p": 0.7083333333,
210
+ "r": 0.7906976744,
211
+ "f": 0.7472527473
212
  },
213
  "oprd": {
214
+ "p": 0.9022082019,
215
+ "r": 0.8537313433,
216
+ "f": 0.8773006135
217
  },
218
  "parataxis": {
219
+ "p": 0.5602189781,
220
+ "r": 0.6659436009,
221
+ "f": 0.6085232904
222
  },
223
  "meta": {
224
+ "p": 0.1683168317,
225
  "r": 0.6538461538,
226
+ "f": 0.2677165354
227
  },
228
  "csubjpass": {
229
+ "p": 0.8333333333,
230
  "r": 0.8333333333,
231
+ "f": 0.8333333333
232
  }
233
  },
234
+ "ents_p": 0.898708838,
235
+ "ents_r": 0.8993389423,
236
+ "ents_f": 0.8990237797,
237
  "ents_per_type": {
238
  "DATE": {
239
+ "p": 0.8928683632,
240
+ "r": 0.9022222222,
241
+ "f": 0.8975209222
242
  },
243
  "GPE": {
244
+ "p": 0.9575551783,
245
+ "r": 0.9439330544,
246
+ "f": 0.9506953224
247
  },
248
  "ORDINAL": {
249
+ "p": 0.8079268293,
250
  "r": 0.8229813665,
251
+ "f": 0.8153846154
252
  },
253
  "ORG": {
254
+ "p": 0.9032515991,
255
+ "r": 0.8984623542,
256
+ "f": 0.9008506114
257
  },
258
  "FAC": {
259
+ "p": 0.5567010309,
260
+ "r": 0.8307692308,
261
+ "f": 0.6666666667
262
  },
263
  "QUANTITY": {
264
+ "p": 0.8187134503,
265
+ "r": 0.7692307692,
266
+ "f": 0.7932011331
267
  },
268
  "LOC": {
269
+ "p": 0.8610169492,
270
+ "r": 0.8089171975,
271
+ "f": 0.8341543514
272
  },
273
  "CARDINAL": {
274
+ "p": 0.8530092593,
275
+ "r": 0.8763376932,
276
+ "f": 0.864516129
277
  },
278
  "PERSON": {
279
+ "p": 0.9436258808,
280
+ "r": 0.9614882507,
281
+ "f": 0.9524733269
282
  },
283
  "NORP": {
284
+ "p": 0.9211783439,
285
+ "r": 0.9256,
286
+ "f": 0.9233838787
287
  },
288
  "LAW": {
289
+ "p": 0.5324675325,
290
+ "r": 0.640625,
291
+ "f": 0.5815602837
292
  },
293
  "PRODUCT": {
294
+ "p": 0.649122807,
295
  "r": 0.5260663507,
296
+ "f": 0.5811518325
297
+ },
298
+ "MONEY": {
299
+ "p": 0.9157641396,
300
+ "r": 0.8984651712,
301
+ "f": 0.9070321812
302
  },
303
  "TIME": {
304
+ "p": 0.7514124294,
305
  "r": 0.7777777778,
306
+ "f": 0.7643678161
307
  },
308
  "EVENT": {
309
+ "p": 0.776119403,
310
+ "r": 0.5977011494,
311
+ "f": 0.6753246753
312
  },
313
  "WORK_OF_ART": {
314
+ "p": 0.5684210526,
315
+ "r": 0.5567010309,
316
+ "f": 0.5625
 
 
 
 
 
317
  },
318
  "PERCENT": {
319
+ "p": 0.9154929577,
320
+ "r": 0.8958652374,
321
+ "f": 0.9055727554
322
  },
323
  "LANGUAGE": {
324
  "p": 1.0,
325
+ "r": 0.71875,
326
+ "f": 0.8363636364
327
  }
328
+ },
329
+ "speed": 6163.7320781314
330
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -1,10 +1,8 @@
1
  [paths]
2
- train = "corpus/en-core-web/train.spacy"
3
- dev = "corpus/en-core-web/dev.spacy"
4
  vectors = null
5
- raw = null
6
  init_tok2vec = null
7
- vocab_data = null
8
 
9
  [system]
10
  gpu_allocator = "pytorch"
@@ -24,6 +22,7 @@ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
24
 
25
  [components.attribute_ruler]
26
  factory = "attribute_ruler"
 
27
  validate = false
28
 
29
  [components.lemmatizer]
@@ -31,11 +30,13 @@ factory = "lemmatizer"
31
  mode = "rule"
32
  model = null
33
  overwrite = false
 
34
 
35
  [components.ner]
36
  factory = "ner"
37
  incorrect_spans_key = null
38
  moves = null
 
39
  update_with_oracle_cut_size = 100
40
 
41
  [components.ner.model]
@@ -58,6 +59,7 @@ factory = "parser"
58
  learn_tokens = false
59
  min_action_freq = 30
60
  moves = null
 
61
  update_with_oracle_cut_size = 100
62
 
63
  [components.parser.model]
@@ -77,6 +79,8 @@ pooling = {"@layers":"reduce_mean.v1"}
77
 
78
  [components.tagger]
79
  factory = "tagger"
 
 
80
 
81
  [components.tagger.model]
82
  @architectures = "spacy.Tagger.v1"
@@ -94,42 +98,39 @@ max_batch_items = 4096
94
  set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
95
 
96
  [components.transformer.model]
97
- @architectures = "spacy-transformers.TransformerModel.v1"
98
  name = "roberta-base"
 
99
 
100
  [components.transformer.model.get_spans]
101
  @span_getters = "spacy-transformers.strided_spans.v1"
102
  window = 128
103
  stride = 96
104
 
 
 
105
  [components.transformer.model.tokenizer_config]
106
  use_fast = true
107
 
 
 
108
  [corpora]
109
 
110
  [corpora.dev]
111
  @readers = "spacy.Corpus.v1"
112
- limit = 0
113
- max_length = 0
114
- path = ${paths:dev}
115
  gold_preproc = false
 
 
116
  augmenter = null
117
 
118
  [corpora.train]
119
  @readers = "spacy.Corpus.v1"
120
- path = ${paths:train}
121
- max_length = 500
122
  gold_preproc = false
 
123
  limit = 0
124
-
125
- [corpora.train.augmenter]
126
- @augmenters = "spacy.orth_variants.v1"
127
- level = 0.2
128
- lower = 0.5
129
-
130
- [corpora.train.augmenter.orth_variants]
131
- @readers = "srsly.read_json.v1"
132
- path = "assets/orth_variants.json"
133
 
134
  [training]
135
  train_corpus = "corpora.train"
@@ -186,11 +187,12 @@ ents_f = 0.16
186
  ents_p = 0.0
187
  ents_r = 0.0
188
  ents_per_type = null
 
189
 
190
  [pretraining]
191
 
192
  [initialize]
193
- vocab_data = ${paths.vocab_data}
194
  vectors = ${paths.vectors}
195
  init_tok2vec = ${paths.init_tok2vec}
196
  before_init = null
 
1
  [paths]
2
+ train = null
3
+ dev = null
4
  vectors = null
 
5
  init_tok2vec = null
 
6
 
7
  [system]
8
  gpu_allocator = "pytorch"
 
22
 
23
  [components.attribute_ruler]
24
  factory = "attribute_ruler"
25
+ scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
26
  validate = false
27
 
28
  [components.lemmatizer]
 
30
  mode = "rule"
31
  model = null
32
  overwrite = false
33
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
34
 
35
  [components.ner]
36
  factory = "ner"
37
  incorrect_spans_key = null
38
  moves = null
39
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
40
  update_with_oracle_cut_size = 100
41
 
42
  [components.ner.model]
 
59
  learn_tokens = false
60
  min_action_freq = 30
61
  moves = null
62
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
63
  update_with_oracle_cut_size = 100
64
 
65
  [components.parser.model]
 
79
 
80
  [components.tagger]
81
  factory = "tagger"
82
+ overwrite = false
83
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
84
 
85
  [components.tagger.model]
86
  @architectures = "spacy.Tagger.v1"
 
98
  set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
99
 
100
  [components.transformer.model]
101
+ @architectures = "spacy-transformers.TransformerModel.v3"
102
  name = "roberta-base"
103
+ mixed_precision = false
104
 
105
  [components.transformer.model.get_spans]
106
  @span_getters = "spacy-transformers.strided_spans.v1"
107
  window = 128
108
  stride = 96
109
 
110
+ [components.transformer.model.grad_scaler_config]
111
+
112
  [components.transformer.model.tokenizer_config]
113
  use_fast = true
114
 
115
+ [components.transformer.model.transformer_config]
116
+
117
  [corpora]
118
 
119
  [corpora.dev]
120
  @readers = "spacy.Corpus.v1"
121
+ path = ${paths.dev}
 
 
122
  gold_preproc = false
123
+ max_length = 0
124
+ limit = 0
125
  augmenter = null
126
 
127
  [corpora.train]
128
  @readers = "spacy.Corpus.v1"
129
+ path = ${paths.train}
 
130
  gold_preproc = false
131
+ max_length = 0
132
  limit = 0
133
+ augmenter = null
 
 
 
 
 
 
 
 
134
 
135
  [training]
136
  train_corpus = "corpora.train"
 
187
  ents_p = 0.0
188
  ents_r = 0.0
189
  ents_per_type = null
190
+ speed = 0.0
191
 
192
  [pretraining]
193
 
194
  [initialize]
195
+ vocab_data = null
196
  vectors = ${paths.vectors}
197
  init_tok2vec = ${paths.init_tok2vec}
198
  before_init = null
en_core_web_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d34003f4dcd6c6bc04347e00ec1145337e2c606f579f5b35a34089c74f624761
3
- size 460239233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6be2ccfc1c30edb5690f5e2baecf2a0cabd23529c0abb84c67aedcf0dac86a2
3
+ size 460235340
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"en",
3
  "name":"core_web_trf",
4
- "version":"3.1.0",
5
  "description":"English transformer pipeline (roberta-base). Components: transformer, tagger, parser, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
- "spacy_version":">=3.1.0,<3.2.0",
11
- "spacy_git_version":"caba63b74",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -165,330 +165,333 @@
165
  ],
166
  "performance":{
167
  "token_acc":0.9993053983,
168
- "tag_acc":0.9779597496,
169
- "dep_uas":0.9529693229,
170
- "dep_las":0.9394097615,
171
- "ents_p":0.898632744,
172
- "ents_r":0.8985877404,
173
- "ents_f":0.8986102416,
174
- "sents_p":0.9533882851,
175
- "sents_r":0.8621940761,
176
- "sents_f":0.9055009007,
177
- "speed":5467.6411211076,
178
  "dep_las_per_type":{
179
  "prep":{
180
- "p":0.9227131724,
181
- "r":0.9254790362,
182
- "f":0.9240940347
183
  },
184
  "det":{
185
- "p":0.9901676798,
186
- "r":0.9895620974,
187
- "f":0.989864796
188
  },
189
  "pobj":{
190
- "p":0.9827126617,
191
- "r":0.9844492441,
192
- "f":0.9835801864
193
  },
194
  "nsubj":{
195
- "p":0.9797975371,
196
- "r":0.9795399781,
197
- "f":0.9796687407
198
  },
199
  "aux":{
200
- "p":0.9890501202,
201
- "r":0.9890501202,
202
- "f":0.9890501202
203
  },
204
  "advmod":{
205
- "p":0.8975934029,
206
- "r":0.8974423692,
207
- "f":0.8975178797
208
  },
209
  "relcl":{
210
- "p":0.873105393,
211
- "r":0.898766328,
212
- "f":0.8857500447
213
  },
214
  "root":{
215
- "p":0.9679445053,
216
- "r":0.8744640148,
217
- "f":0.9188327442
218
  },
219
  "xcomp":{
220
- "p":0.940690151,
221
- "r":0.9393395549,
222
- "f":0.9400143678
223
  },
224
  "amod":{
225
- "p":0.942555534,
226
- "r":0.9429219307,
227
- "f":0.9427386967
228
  },
229
  "compound":{
230
- "p":0.952575817,
231
- "r":0.9463689018,
232
- "f":0.9494622154
233
  },
234
  "poss":{
235
- "p":0.986746988,
236
- "r":0.9891304348,
237
- "f":0.9879372738
238
  },
239
  "ccomp":{
240
- "p":0.8579088472,
241
- "r":0.9124236253,
242
- "f":0.8843268851
243
  },
244
  "attr":{
245
- "p":0.9473251029,
246
- "r":0.9680403701,
247
- "f":0.9575707155
248
  },
249
  "case":{
250
- "p":0.9880834161,
251
- "r":0.995995996,
252
- "f":0.9920239282
253
  },
254
  "mark":{
255
- "p":0.9546054368,
256
- "r":0.958399576,
257
- "f":0.9564987439
258
  },
259
  "intj":{
260
- "p":0.6154299176,
261
- "r":0.7655677656,
262
- "f":0.6823375775
263
  },
264
  "advcl":{
265
- "p":0.8175954592,
266
- "r":0.7980357593,
267
- "f":0.8076972091
268
  },
269
  "cc":{
270
- "p":0.8981217309,
271
- "r":0.9036000478,
272
- "f":0.9008525607
273
  },
274
  "neg":{
275
- "p":0.957,
276
- "r":0.9603612644,
277
- "f":0.958677686
278
  },
279
  "conj":{
280
- "p":0.8658565642,
281
- "r":0.9149043303,
282
- "f":0.8897049822
283
  },
284
  "nsubjpass":{
285
- "p":0.958161157,
286
- "r":0.9512820513,
287
- "f":0.9547092126
288
  },
289
  "auxpass":{
290
- "p":0.9672727273,
291
- "r":0.969476082,
292
- "f":0.9683731513
293
  },
294
  "dobj":{
295
- "p":0.9737199457,
296
- "r":0.9713921428,
297
- "f":0.9725546513
298
  },
299
  "nummod":{
300
- "p":0.9584077571,
301
- "r":0.9484848485,
302
- "f":0.9534204848
303
  },
304
  "npadvmod":{
305
- "p":0.85505659,
306
- "r":0.8319715808,
307
- "f":0.8433561397
308
  },
309
  "prt":{
310
- "p":0.8902546093,
311
- "r":0.9086021505,
312
- "f":0.8993348115
313
  },
314
  "pcomp":{
315
- "p":0.9301833568,
316
- "r":0.9236694678,
317
- "f":0.9269149684
318
  },
319
  "expl":{
320
- "p":0.9935897436,
321
  "r":0.9957173448,
322
- "f":0.9946524064
323
  },
324
  "acl":{
325
- "p":0.8569069895,
326
- "r":0.8494271686,
327
- "f":0.8531506849
328
  },
329
  "agent":{
330
- "p":0.9471830986,
331
- "r":0.9641577061,
332
- "f":0.9555950266
333
  },
334
  "dative":{
335
- "p":0.8220720721,
336
- "r":0.8371559633,
337
- "f":0.8295454545
338
  },
339
  "acomp":{
340
- "p":0.9371040724,
341
- "r":0.9392290249,
342
- "f":0.9381653454
343
  },
344
  "dep":{
345
- "p":0.4218241042,
346
- "r":0.4204545455,
347
- "f":0.4211382114
348
  },
349
  "csubj":{
350
- "p":0.865497076,
351
- "r":0.875739645,
352
- "f":0.8705882353
353
  },
354
  "quantmod":{
355
- "p":0.8902847571,
356
- "r":0.863525589,
357
- "f":0.8767010309
358
  },
359
  "nmod":{
360
- "p":0.8395759717,
361
- "r":0.7239488117,
362
- "f":0.777486911
363
  },
364
  "appos":{
365
- "p":0.8256564787,
366
- "r":0.8321041215,
367
- "f":0.8288677615
368
  },
369
  "predet":{
370
- "p":0.8537549407,
371
- "r":0.9270386266,
372
- "f":0.8888888889
373
  },
374
  "preconj":{
375
- "p":0.6489361702,
376
- "r":0.7093023256,
377
- "f":0.6777777778
378
  },
379
  "oprd":{
380
- "p":0.8913738019,
381
- "r":0.8328358209,
382
- "f":0.8611111111
383
  },
384
  "parataxis":{
385
- "p":0.5842911877,
386
- "r":0.6616052061,
387
- "f":0.6205493388
388
  },
389
  "meta":{
390
- "p":0.1808510638,
391
  "r":0.6538461538,
392
- "f":0.2833333333
393
  },
394
  "csubjpass":{
395
- "p":1.0,
396
  "r":0.8333333333,
397
- "f":0.9090909091
398
  }
399
  },
 
 
 
400
  "ents_per_type":{
401
  "DATE":{
402
- "p":0.8967681205,
403
- "r":0.9073015873,
404
- "f":0.9020041029
405
  },
406
  "GPE":{
407
- "p":0.9559695174,
408
- "r":0.9447698745,
409
- "f":0.9503367003
410
  },
411
  "ORDINAL":{
412
- "p":0.803030303,
413
  "r":0.8229813665,
414
- "f":0.8128834356
415
  },
416
  "ORG":{
417
- "p":0.9044176707,
418
- "r":0.8955461294,
419
- "f":0.8999600373
420
  },
421
  "FAC":{
422
- "p":0.544973545,
423
- "r":0.7923076923,
424
- "f":0.6457680251
425
  },
426
  "QUANTITY":{
427
- "p":0.7828571429,
428
- "r":0.7527472527,
429
- "f":0.7675070028
430
  },
431
  "LOC":{
432
- "p":0.8250825083,
433
- "r":0.7961783439,
434
- "f":0.8103727715
435
  },
436
  "CARDINAL":{
437
- "p":0.8595769683,
438
- "r":0.8697978597,
439
- "f":0.8646572104
440
  },
441
  "PERSON":{
442
- "p":0.9422647528,
443
- "r":0.9640992167,
444
- "f":0.9530569447
445
  },
446
  "NORP":{
447
- "p":0.9249800479,
448
- "r":0.9272,
449
- "f":0.9260886936
450
  },
451
  "LAW":{
452
- "p":0.5789473684,
453
- "r":0.6875,
454
- "f":0.6285714286
455
  },
456
  "PRODUCT":{
457
- "p":0.6686746988,
458
  "r":0.5260663507,
459
- "f":0.5888594164
 
 
 
 
 
460
  },
461
  "TIME":{
462
- "p":0.7287671233,
463
  "r":0.7777777778,
464
- "f":0.7524752475
465
  },
466
  "EVENT":{
467
- "p":0.7443609023,
468
- "r":0.5689655172,
469
- "f":0.6449511401
470
  },
471
  "WORK_OF_ART":{
472
- "p":0.5721925134,
473
- "r":0.5515463918,
474
- "f":0.56167979
475
- },
476
- "MONEY":{
477
- "p":0.9120481928,
478
- "r":0.893742621,
479
- "f":0.9028026237
480
  },
481
  "PERCENT":{
482
- "p":0.9164037855,
483
- "r":0.8897396631,
484
- "f":0.9028749029
485
  },
486
  "LANGUAGE":{
487
  "p":1.0,
488
- "r":0.75,
489
- "f":0.8571428571
490
  }
491
- }
 
492
  },
493
  "sources":[
494
  {
@@ -517,6 +520,6 @@
517
  }
518
  ],
519
  "requirements":[
520
- "spacy-transformers>=1.0.3,<1.1.0"
521
  ]
522
  }
 
1
  {
2
  "lang":"en",
3
  "name":"core_web_trf",
4
+ "version":"3.2.0",
5
  "description":"English transformer pipeline (roberta-base). Components: transformer, tagger, parser, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
+ "spacy_version":">=3.2.0,<3.3.0",
11
+ "spacy_git_version":"bb26550e2",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
165
  ],
166
  "performance":{
167
  "token_acc":0.9993053983,
168
+ "token_p":0.9956742163,
169
+ "token_r":0.9957505887,
170
+ "token_f":0.9957124011,
171
+ "tag_acc":0.9780627094,
172
+ "sents_p":0.9568827385,
173
+ "sents_r":0.8666798601,
174
+ "sents_f":0.9095503479,
175
+ "dep_uas":0.9527056513,
176
+ "dep_las":0.9393298266,
 
177
  "dep_las_per_type":{
178
  "prep":{
179
+ "p":0.9229604964,
180
+ "r":0.9255169797,
181
+ "f":0.9242369702
182
  },
183
  "det":{
184
+ "p":0.9898046572,
185
+ "r":0.9896028704,
186
+ "f":0.9897037535
187
  },
188
  "pobj":{
189
+ "p":0.9832869081,
190
+ "r":0.9842136265,
191
+ "f":0.9837500491
192
  },
193
  "nsubj":{
194
+ "p":0.9803139249,
195
+ "r":0.9795837897,
196
+ "f":0.9799487213
197
  },
198
  "aux":{
199
+ "p":0.989571263,
200
+ "r":0.9883379329,
201
+ "f":0.9889542134
202
  },
203
  "advmod":{
204
+ "p":0.8962161255,
205
+ "r":0.8987043581,
206
+ "f":0.8974585171
207
  },
208
  "relcl":{
209
+ "p":0.8798724309,
210
+ "r":0.9009433962,
211
+ "f":0.8902832556
212
  },
213
  "root":{
214
+ "p":0.9664043142,
215
+ "r":0.8747938518,
216
+ "f":0.9183200028
217
  },
218
  "xcomp":{
219
+ "p":0.9438444924,
220
+ "r":0.9411342426,
221
+ "f":0.9424874191
222
  },
223
  "amod":{
224
+ "p":0.9413514387,
225
+ "r":0.943181082,
226
+ "f":0.9422653722
227
  },
228
  "compound":{
229
+ "p":0.951964576,
230
+ "r":0.9458676765,
231
+ "f":0.9489063329
232
  },
233
  "poss":{
234
+ "p":0.986728333,
235
+ "r":0.9877214171,
236
+ "f":0.9872246253
237
  },
238
  "ccomp":{
239
+ "p":0.8604651163,
240
+ "r":0.9118126273,
241
+ "f":0.8853950361
242
  },
243
  "attr":{
244
+ "p":0.9478225144,
245
+ "r":0.9701429773,
246
+ "f":0.9588528678
247
  },
248
  "case":{
249
+ "p":0.9890547264,
250
+ "r":0.994994995,
251
+ "f":0.9920159681
252
  },
253
  "mark":{
254
+ "p":0.9492105263,
255
+ "r":0.9557498675,
256
+ "f":0.9524689728
257
  },
258
  "intj":{
259
+ "p":0.6237388724,
260
+ "r":0.76996337,
261
+ "f":0.6891803279
262
  },
263
  "advcl":{
264
+ "p":0.8195836546,
265
+ "r":0.803072274,
266
+ "f":0.8112439583
267
  },
268
  "cc":{
269
+ "p":0.8986197049,
270
+ "r":0.9032412391,
271
+ "f":0.9009245452
272
  },
273
  "neg":{
274
+ "p":0.9574148297,
275
+ "r":0.958855996,
276
+ "f":0.9581348709
277
  },
278
  "conj":{
279
+ "p":0.8712776177,
280
+ "r":0.9133937563,
281
+ "f":0.8918387414
282
  },
283
  "nsubjpass":{
284
+ "p":0.9616182573,
285
+ "r":0.9507692308,
286
+ "f":0.9561629706
287
  },
288
  "auxpass":{
289
+ "p":0.9676979072,
290
+ "r":0.9690205011,
291
+ "f":0.9683587526
292
  },
293
  "dobj":{
294
+ "p":0.9742177522,
295
+ "r":0.9725874572,
296
+ "f":0.9734019221
297
  },
298
  "nummod":{
299
+ "p":0.9582800102,
300
+ "r":0.9454545455,
301
+ "f":0.9518240753
302
  },
303
  "npadvmod":{
304
+ "p":0.8582474227,
305
+ "r":0.8280639432,
306
+ "f":0.8428855541
307
  },
308
  "prt":{
309
+ "p":0.896,
310
+ "r":0.9032258065,
311
+ "f":0.8995983936
312
  },
313
  "pcomp":{
314
+ "p":0.9316901408,
315
+ "r":0.9264705882,
316
+ "f":0.9290730337
317
  },
318
  "expl":{
319
+ "p":0.9893617021,
320
  "r":0.9957173448,
321
+ "f":0.992529349
322
  },
323
  "acl":{
324
+ "p":0.8476138829,
325
+ "r":0.852700491,
326
+ "f":0.8501495785
327
  },
328
  "agent":{
329
+ "p":0.9559859155,
330
+ "r":0.9731182796,
331
+ "f":0.9644760213
332
  },
333
  "dative":{
334
+ "p":0.8236607143,
335
+ "r":0.8463302752,
336
+ "f":0.834841629
337
  },
338
  "acomp":{
339
+ "p":0.9352355072,
340
+ "r":0.9365079365,
341
+ "f":0.9358712894
342
  },
343
  "dep":{
344
+ "p":0.3949044586,
345
+ "r":0.4025974026,
346
+ "f":0.3987138264
347
  },
348
  "csubj":{
349
+ "p":0.8928571429,
350
+ "r":0.8875739645,
351
+ "f":0.8902077151
352
  },
353
  "quantmod":{
354
+ "p":0.8867449664,
355
+ "r":0.8586515028,
356
+ "f":0.872472142
357
  },
358
  "nmod":{
359
+ "p":0.82904795,
360
+ "r":0.7269957343,
361
+ "f":0.7746753247
362
  },
363
  "appos":{
364
+ "p":0.8099701747,
365
+ "r":0.8247288503,
366
+ "f":0.8172828891
367
  },
368
  "predet":{
369
+ "p":0.8508064516,
370
+ "r":0.9055793991,
371
+ "f":0.8773388773
372
  },
373
  "preconj":{
374
+ "p":0.7083333333,
375
+ "r":0.7906976744,
376
+ "f":0.7472527473
377
  },
378
  "oprd":{
379
+ "p":0.9022082019,
380
+ "r":0.8537313433,
381
+ "f":0.8773006135
382
  },
383
  "parataxis":{
384
+ "p":0.5602189781,
385
+ "r":0.6659436009,
386
+ "f":0.6085232904
387
  },
388
  "meta":{
389
+ "p":0.1683168317,
390
  "r":0.6538461538,
391
+ "f":0.2677165354
392
  },
393
  "csubjpass":{
394
+ "p":0.8333333333,
395
  "r":0.8333333333,
396
+ "f":0.8333333333
397
  }
398
  },
399
+ "ents_p":0.898708838,
400
+ "ents_r":0.8993389423,
401
+ "ents_f":0.8990237797,
402
  "ents_per_type":{
403
  "DATE":{
404
+ "p":0.8928683632,
405
+ "r":0.9022222222,
406
+ "f":0.8975209222
407
  },
408
  "GPE":{
409
+ "p":0.9575551783,
410
+ "r":0.9439330544,
411
+ "f":0.9506953224
412
  },
413
  "ORDINAL":{
414
+ "p":0.8079268293,
415
  "r":0.8229813665,
416
+ "f":0.8153846154
417
  },
418
  "ORG":{
419
+ "p":0.9032515991,
420
+ "r":0.8984623542,
421
+ "f":0.9008506114
422
  },
423
  "FAC":{
424
+ "p":0.5567010309,
425
+ "r":0.8307692308,
426
+ "f":0.6666666667
427
  },
428
  "QUANTITY":{
429
+ "p":0.8187134503,
430
+ "r":0.7692307692,
431
+ "f":0.7932011331
432
  },
433
  "LOC":{
434
+ "p":0.8610169492,
435
+ "r":0.8089171975,
436
+ "f":0.8341543514
437
  },
438
  "CARDINAL":{
439
+ "p":0.8530092593,
440
+ "r":0.8763376932,
441
+ "f":0.864516129
442
  },
443
  "PERSON":{
444
+ "p":0.9436258808,
445
+ "r":0.9614882507,
446
+ "f":0.9524733269
447
  },
448
  "NORP":{
449
+ "p":0.9211783439,
450
+ "r":0.9256,
451
+ "f":0.9233838787
452
  },
453
  "LAW":{
454
+ "p":0.5324675325,
455
+ "r":0.640625,
456
+ "f":0.5815602837
457
  },
458
  "PRODUCT":{
459
+ "p":0.649122807,
460
  "r":0.5260663507,
461
+ "f":0.5811518325
462
+ },
463
+ "MONEY":{
464
+ "p":0.9157641396,
465
+ "r":0.8984651712,
466
+ "f":0.9070321812
467
  },
468
  "TIME":{
469
+ "p":0.7514124294,
470
  "r":0.7777777778,
471
+ "f":0.7643678161
472
  },
473
  "EVENT":{
474
+ "p":0.776119403,
475
+ "r":0.5977011494,
476
+ "f":0.6753246753
477
  },
478
  "WORK_OF_ART":{
479
+ "p":0.5684210526,
480
+ "r":0.5567010309,
481
+ "f":0.5625
 
 
 
 
 
482
  },
483
  "PERCENT":{
484
+ "p":0.9154929577,
485
+ "r":0.8958652374,
486
+ "f":0.9055727554
487
  },
488
  "LANGUAGE":{
489
  "p":1.0,
490
+ "r":0.71875,
491
+ "f":0.8363636364
492
  }
493
+ },
494
+ "speed":6163.7320781314
495
  },
496
  "sources":[
497
  {
 
520
  }
521
  ],
522
  "requirements":[
523
+ "spacy-transformers>=1.1.2,<1.2.0"
524
  ]
525
  }
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
parser/model CHANGED
Binary files a/parser/model and b/parser/model differ
 
tagger/cfg CHANGED
@@ -49,5 +49,6 @@
49
  "WRB",
50
  "XX",
51
  "``"
52
- ]
 
53
  }
 
49
  "WRB",
50
  "XX",
51
  "``"
52
+ ],
53
+ "overwrite":false
54
  }
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
tokenizer CHANGED
The diff for this file is too large to render. See raw diff
 
transformer/{model/pytorch_model.bin → model} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1370895942196336d70d45a0e2610ca87ed84f68fa85b00c651e1bee1f40fa06
3
- size 498661169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523fefd9ecd5f2a76cb7f686222af0615942bc51d131339f5883a6190e7fdc72
3
+ size 501274094
transformer/model/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "_name_or_path": "/mnt/scratch/tmp/en_core_web_trf/dc04c7b2-9045-42b5-95f8-fa28249ae3b8/training/core/model-best/transformer/model",
3
- "architectures": [
4
- "RobertaForMaskedLM"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "eos_token_id": 2,
9
- "gradient_checkpointing": false,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 514,
17
- "model_type": "roberta",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
- "pad_token_id": 1,
21
- "position_embedding_type": "absolute",
22
- "transformers_version": "4.6.1",
23
- "type_vocab_size": 1,
24
- "use_cache": true,
25
- "vocab_size": 50265
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
transformer/model/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
transformer/model/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
 
 
transformer/model/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
transformer/model/tokenizer_config.json DELETED
@@ -1 +0,0 @@
1
- {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/mnt/scratch/tmp/en_core_web_trf/dc04c7b2-9045-42b5-95f8-fa28249ae3b8/training/core/model-best/transformer/model"}
 
 
transformer/model/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d7d17d9d63314534996ec9f1264e258ff1632d65deb4de10203a5f70a37c57f
3
- size 1091369
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6fda43452971a53ac720650d08c74297656aec52627651521ce4fbceaabce87
3
+ size 1091378
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }