osanseviero HF staff commited on
Commit
f35dec9
1 Parent(s): 3dcb810

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -15,3 +15,8 @@
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
  *tfevents* filter=lfs diff=lfs merge=lfs -text
18
+ *.whl filter=lfs diff=lfs merge=lfs -text
19
+ *.npz filter=lfs diff=lfs merge=lfs -text
20
+ *strings.json filter=lfs diff=lfs merge=lfs -text
21
+ vectors filter=lfs diff=lfs merge=lfs -text
22
+ model filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2021 ExplosionAI GmbH
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
LICENSES_SOURCES ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OntoNotes 5
2
+
3
+ * Author: Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston
4
+ * URL: https://catalog.ldc.upenn.edu/LDC2013T19
5
+ * License: commercial (licensed by Explosion)
6
+
7
+ ```
8
+ ```
9
+
10
+
11
+
12
+
13
+ # ClearNLP Constituent-to-Dependency Conversion
14
+
15
+ * Author: Emory University
16
+ * URL: https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md
17
+ * License: Citation provided for reference, no code packaged with model
18
+
19
+ ```
20
+ ```
21
+
22
+
23
+
24
+
25
+ # WordNet 3.0
26
+
27
+ * Author: Princeton University
28
+ * URL: https://wordnet.princeton.edu/
29
+ * License: WordNet 3.0 License
30
+
31
+ ```
32
+ WordNet Release 3.0
33
+
34
+ This software and database is being provided to you, the LICENSEE, by
35
+ Princeton University under the following license. By obtaining, using
36
+ and/or copying this software and database, you agree that you have
37
+ read, understood, and will comply with these terms and conditions.:
38
+
39
+ Permission to use, copy, modify and distribute this software and
40
+ database and its documentation for any purpose and without fee or
41
+ royalty is hereby granted, provided that you agree to comply with
42
+ the following copyright notice and statements, including the disclaimer,
43
+ and that the same appear on ALL copies of the software, database and
44
+ documentation, including modifications that you make for internal
45
+ use or for distribution.
46
+
47
+ WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
48
+
49
+ THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
50
+ UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
51
+ IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
52
+ UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
53
+ ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
54
+ OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
55
+ INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
56
+ OTHER RIGHTS.
57
+
58
+ The name of Princeton University or Princeton may not be used in
59
+ advertising or publicity pertaining to distribution of the software
60
+ and/or database. Title to copyright in this software, database and
61
+ any associated documentation shall at all times remain with
62
+ Princeton University and LICENSEE agrees to preserve same.```
63
+
64
+
65
+
66
+
README.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - token-classification
5
+ language:
6
+ - en
7
+ license: MIT
8
+ model-index:
9
+ - name: en_core_web_sm
10
+ results:
11
+ - task:
12
+ name: NER
13
+ type: token-classification
14
+ metrics:
15
+ - name: NER Precision
16
+ type: precision
17
+ value: 0.8424355924
18
+ - name: NER Recall
19
+ type: recall
20
+ value: 0.8335336538
21
+ - name: NER F Score
22
+ type: f_score
23
+ value: 0.8379609817
24
+ - task:
25
+ name: POS
26
+ type: token-classification
27
+ metrics:
28
+ - name: POS Accuracy
29
+ type: accuracy
30
+ value: 0.9720712187
31
+ - task:
32
+ name: SENTER
33
+ type: token-classification
34
+ metrics:
35
+ - name: SENTER Precision
36
+ type: precision
37
+ value: 0.9074955788
38
+ - name: SENTER Recall
39
+ type: recall
40
+ value: 0.8801372122
41
+ - name: SENTER F Score
42
+ type: f_score
43
+ value: 0.893607046
44
+ - task:
45
+ name: UNLABELED_DEPENDENCIES
46
+ type: token-classification
47
+ metrics:
48
+ - name: Unlabeled Dependencies Accuracy
49
+ type: accuracy
50
+ value: 0.9185392711
51
+ - task:
52
+ name: LABELED_DEPENDENCIES
53
+ type: token-classification
54
+ metrics:
55
+ - name: Labeled Dependencies Accuracy
56
+ type: accuracy
57
+ value: 0.9185392711
58
+ ---
59
+ ### Details: https://spacy.io/models/en#en_core_web_sm
60
+
61
+ English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.
62
+
63
+ | Feature | Description |
64
+ | --- | --- |
65
+ | **Name** | `en_core_web_sm` |
66
+ | **Version** | `3.1.0` |
67
+ | **spaCy** | `>=3.1.0,<3.2.0` |
68
+ | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
69
+ | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
70
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
71
+ | **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University) |
72
+ | **License** | `MIT` |
73
+ | **Author** | [Explosion](https://explosion.ai) |
74
+
75
+ ### Label Scheme
76
+
77
+ <details>
78
+
79
+ <summary>View label scheme (114 labels for 4 components)</summary>
80
+
81
+ | Component | Labels |
82
+ | --- | --- |
83
+ | **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, ```` |
84
+ | **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
85
+ | **`senter`** | `I`, `S` |
86
+ | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
87
+
88
+ </details>
89
+
90
+ ### Accuracy
91
+
92
+ | Type | Score |
93
+ | --- | --- |
94
+ | `TOKEN_ACC` | 99.93 |
95
+ | `TAG_ACC` | 97.21 |
96
+ | `DEP_UAS` | 91.85 |
97
+ | `DEP_LAS` | 90.02 |
98
+ | `ENTS_P` | 84.24 |
99
+ | `ENTS_R` | 83.35 |
100
+ | `ENTS_F` | 83.80 |
101
+ | `SENTS_P` | 90.75 |
102
+ | `SENTS_R` | 88.01 |
103
+ | `SENTS_F` | 89.36 |
accuracy.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "token_acc": 0.9993053983,
3
+ "tag_acc": 0.9720712187,
4
+ "dep_uas": 0.9185392711,
5
+ "dep_las": 0.9001546872,
6
+ "ents_p": 0.8424355924,
7
+ "ents_r": 0.8335336538,
8
+ "ents_f": 0.8379609817,
9
+ "sents_p": 0.9074955788,
10
+ "sents_r": 0.8801372122,
11
+ "sents_f": 0.893607046,
12
+ "speed": 10426.0619939972,
13
+ "dep_las_per_type": {
14
+ "prep": {
15
+ "p": 0.8554402587,
16
+ "r": 0.8633276418,
17
+ "f": 0.8593658527
18
+ },
19
+ "det": {
20
+ "p": 0.9772329247,
21
+ "r": 0.9783087336,
22
+ "f": 0.9777705332
23
+ },
24
+ "pobj": {
25
+ "p": 0.9603285837,
26
+ "r": 0.9686628706,
27
+ "f": 0.9644777228
28
+ },
29
+ "nsubj": {
30
+ "p": 0.9598718747,
31
+ "r": 0.945279299,
32
+ "f": 0.9525197007
33
+ },
34
+ "aux": {
35
+ "p": 0.9795737123,
36
+ "r": 0.9819282471,
37
+ "f": 0.9807495665
38
+ },
39
+ "advmod": {
40
+ "p": 0.8538267452,
41
+ "r": 0.8541140838,
42
+ "f": 0.8539703903
43
+ },
44
+ "relcl": {
45
+ "p": 0.764978602,
46
+ "r": 0.7783018868,
47
+ "f": 0.7715827338
48
+ },
49
+ "root": {
50
+ "p": 0.9199754818,
51
+ "r": 0.8910878026,
52
+ "f": 0.9053012533
53
+ },
54
+ "xcomp": {
55
+ "p": 0.8813440672,
56
+ "r": 0.903804738,
57
+ "f": 0.892433103
58
+ },
59
+ "amod": {
60
+ "p": 0.9171875,
61
+ "r": 0.9127308066,
62
+ "f": 0.9149537263
63
+ },
64
+ "compound": {
65
+ "p": 0.916057391,
66
+ "r": 0.9280463355,
67
+ "f": 0.9220128918
68
+ },
69
+ "poss": {
70
+ "p": 0.9744672296,
71
+ "r": 0.9756441224,
72
+ "f": 0.9750553209
73
+ },
74
+ "ccomp": {
75
+ "p": 0.7747747748,
76
+ "r": 0.8407331976,
77
+ "f": 0.8064075015
78
+ },
79
+ "attr": {
80
+ "p": 0.8989490703,
81
+ "r": 0.9352396972,
82
+ "f": 0.9167353669
83
+ },
84
+ "case": {
85
+ "p": 0.9801980198,
86
+ "r": 0.990990991,
87
+ "f": 0.9855649577
88
+ },
89
+ "mark": {
90
+ "p": 0.9015392781,
91
+ "r": 0.9001059883,
92
+ "f": 0.9008220631
93
+ },
94
+ "intj": {
95
+ "p": 0.6712856043,
96
+ "r": 0.6388278388,
97
+ "f": 0.6546546547
98
+ },
99
+ "advcl": {
100
+ "p": 0.6687116564,
101
+ "r": 0.6587761269,
102
+ "f": 0.6637067106
103
+ },
104
+ "cc": {
105
+ "p": 0.8426682692,
106
+ "r": 0.8385360603,
107
+ "f": 0.8405970865
108
+ },
109
+ "neg": {
110
+ "p": 0.9461346633,
111
+ "r": 0.9518314099,
112
+ "f": 0.9489744872
113
+ },
114
+ "conj": {
115
+ "p": 0.7715100422,
116
+ "r": 0.7833585096,
117
+ "f": 0.7773891318
118
+ },
119
+ "nsubjpass": {
120
+ "p": 0.9296875,
121
+ "r": 0.9153846154,
122
+ "f": 0.9224806202
123
+ },
124
+ "auxpass": {
125
+ "p": 0.9496883348,
126
+ "r": 0.9717539863,
127
+ "f": 0.9605944607
128
+ },
129
+ "dobj": {
130
+ "p": 0.919667805,
131
+ "r": 0.9442186628,
132
+ "f": 0.9317815437
133
+ },
134
+ "nummod": {
135
+ "p": 0.9341637011,
136
+ "r": 0.928030303,
137
+ "f": 0.9310869014
138
+ },
139
+ "npadvmod": {
140
+ "p": 0.7723823976,
141
+ "r": 0.723268206,
142
+ "f": 0.7470188956
143
+ },
144
+ "prt": {
145
+ "p": 0.8175842235,
146
+ "r": 0.8915770609,
147
+ "f": 0.852978997
148
+ },
149
+ "pcomp": {
150
+ "p": 0.890625,
151
+ "r": 0.8781512605,
152
+ "f": 0.8843441467
153
+ },
154
+ "expl": {
155
+ "p": 0.978858351,
156
+ "r": 0.9914346895,
157
+ "f": 0.985106383
158
+ },
159
+ "acl": {
160
+ "p": 0.7473745624,
161
+ "r": 0.6988543372,
162
+ "f": 0.7223005357
163
+ },
164
+ "agent": {
165
+ "p": 0.8937605396,
166
+ "r": 0.9498207885,
167
+ "f": 0.9209383145
168
+ },
169
+ "dative": {
170
+ "p": 0.7608695652,
171
+ "r": 0.7224770642,
172
+ "f": 0.7411764706
173
+ },
174
+ "acomp": {
175
+ "p": 0.91184573,
176
+ "r": 0.9006802721,
177
+ "f": 0.9062286105
178
+ },
179
+ "dep": {
180
+ "p": 0.3689655172,
181
+ "r": 0.1737012987,
182
+ "f": 0.2362030905
183
+ },
184
+ "csubj": {
185
+ "p": 0.6424242424,
186
+ "r": 0.6272189349,
187
+ "f": 0.6347305389
188
+ },
189
+ "quantmod": {
190
+ "p": 0.8624667258,
191
+ "r": 0.7896019496,
192
+ "f": 0.8244274809
193
+ },
194
+ "nmod": {
195
+ "p": 0.7359073359,
196
+ "r": 0.5807434491,
197
+ "f": 0.6491825613
198
+ },
199
+ "appos": {
200
+ "p": 0.7011441648,
201
+ "r": 0.6646420824,
202
+ "f": 0.6824053452
203
+ },
204
+ "predet": {
205
+ "p": 0.8514056225,
206
+ "r": 0.9098712446,
207
+ "f": 0.8796680498
208
+ },
209
+ "preconj": {
210
+ "p": 0.476635514,
211
+ "r": 0.5930232558,
212
+ "f": 0.5284974093
213
+ },
214
+ "oprd": {
215
+ "p": 0.8193979933,
216
+ "r": 0.7313432836,
217
+ "f": 0.7728706625
218
+ },
219
+ "parataxis": {
220
+ "p": 0.6336088154,
221
+ "r": 0.4989154013,
222
+ "f": 0.5582524272
223
+ },
224
+ "meta": {
225
+ "p": 0.7727272727,
226
+ "r": 0.3269230769,
227
+ "f": 0.4594594595
228
+ },
229
+ "csubjpass": {
230
+ "p": 0.5,
231
+ "r": 0.8333333333,
232
+ "f": 0.625
233
+ }
234
+ },
235
+ "ents_per_type": {
236
+ "DATE": {
237
+ "p": 0.8659955961,
238
+ "r": 0.873968254,
239
+ "f": 0.8699636593
240
+ },
241
+ "GPE": {
242
+ "p": 0.9142529395,
243
+ "r": 0.8892608089,
244
+ "f": 0.9015837104
245
+ },
246
+ "ORG": {
247
+ "p": 0.789707419,
248
+ "r": 0.8014316013,
249
+ "f": 0.7955263158
250
+ },
251
+ "FAC": {
252
+ "p": 0.3679245283,
253
+ "r": 0.3,
254
+ "f": 0.3305084746
255
+ },
256
+ "CARDINAL": {
257
+ "p": 0.8146892655,
258
+ "r": 0.8573127229,
259
+ "f": 0.8354577057
260
+ },
261
+ "PERSON": {
262
+ "p": 0.8424487259,
263
+ "r": 0.8847911227,
264
+ "f": 0.8631009233
265
+ },
266
+ "NORP": {
267
+ "p": 0.9078415521,
268
+ "r": 0.8984,
269
+ "f": 0.9030960997
270
+ },
271
+ "ORDINAL": {
272
+ "p": 0.7787114846,
273
+ "r": 0.8633540373,
274
+ "f": 0.8188512518
275
+ },
276
+ "LOC": {
277
+ "p": 0.713740458,
278
+ "r": 0.5955414013,
279
+ "f": 0.6493055556
280
+ },
281
+ "TIME": {
282
+ "p": 0.712962963,
283
+ "r": 0.6754385965,
284
+ "f": 0.6936936937
285
+ },
286
+ "PRODUCT": {
287
+ "p": 0.5510204082,
288
+ "r": 0.2559241706,
289
+ "f": 0.3495145631
290
+ },
291
+ "QUANTITY": {
292
+ "p": 0.842519685,
293
+ "r": 0.5879120879,
294
+ "f": 0.6925566343
295
+ },
296
+ "WORK_OF_ART": {
297
+ "p": 0.46875,
298
+ "r": 0.3092783505,
299
+ "f": 0.3726708075
300
+ },
301
+ "EVENT": {
302
+ "p": 0.595959596,
303
+ "r": 0.3390804598,
304
+ "f": 0.4322344322
305
+ },
306
+ "MONEY": {
307
+ "p": 0.9182692308,
308
+ "r": 0.9020070838,
309
+ "f": 0.9100655152
310
+ },
311
+ "LAW": {
312
+ "p": 0.5555555556,
313
+ "r": 0.46875,
314
+ "f": 0.5084745763
315
+ },
316
+ "PERCENT": {
317
+ "p": 0.92,
318
+ "r": 0.8805513017,
319
+ "f": 0.8998435055
320
+ },
321
+ "LANGUAGE": {
322
+ "p": 0.7692307692,
323
+ "r": 0.625,
324
+ "f": 0.6896551724
325
+ }
326
+ }
327
+ }
attribute_ruler/patterns ADDED
Binary file (16 kB). View file
 
config.cfg ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "corpus/en-core-web/train.spacy"
3
+ dev = "corpus/en-core-web/dev.spacy"
4
+ vectors = null
5
+ raw = null
6
+ init_tok2vec = null
7
+ vocab_data = null
8
+
9
+ [system]
10
+ gpu_allocator = null
11
+ seed = 0
12
+
13
+ [nlp]
14
+ lang = "en"
15
+ pipeline = ["tok2vec","tagger","parser","senter","attribute_ruler","lemmatizer","ner"]
16
+ disabled = ["senter"]
17
+ before_creation = null
18
+ after_creation = null
19
+ after_pipeline_creation = null
20
+ batch_size = 256
21
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
22
+
23
+ [components]
24
+
25
+ [components.attribute_ruler]
26
+ factory = "attribute_ruler"
27
+ validate = false
28
+
29
+ [components.lemmatizer]
30
+ factory = "lemmatizer"
31
+ mode = "rule"
32
+ model = null
33
+ overwrite = false
34
+
35
+ [components.ner]
36
+ factory = "ner"
37
+ incorrect_spans_key = null
38
+ moves = null
39
+ update_with_oracle_cut_size = 100
40
+
41
+ [components.ner.model]
42
+ @architectures = "spacy.TransitionBasedParser.v2"
43
+ state_type = "ner"
44
+ extra_state_tokens = false
45
+ hidden_width = 64
46
+ maxout_pieces = 2
47
+ use_upper = true
48
+ nO = null
49
+
50
+ [components.ner.model.tok2vec]
51
+ @architectures = "spacy.Tok2Vec.v2"
52
+
53
+ [components.ner.model.tok2vec.embed]
54
+ @architectures = "spacy.MultiHashEmbed.v2"
55
+ width = 96
56
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
57
+ rows = [5000,2500,2500,2500]
58
+ include_static_vectors = false
59
+
60
+ [components.ner.model.tok2vec.encode]
61
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
62
+ width = 96
63
+ depth = 4
64
+ window_size = 1
65
+ maxout_pieces = 3
66
+
67
+ [components.parser]
68
+ factory = "parser"
69
+ learn_tokens = false
70
+ min_action_freq = 30
71
+ moves = null
72
+ update_with_oracle_cut_size = 100
73
+
74
+ [components.parser.model]
75
+ @architectures = "spacy.TransitionBasedParser.v2"
76
+ state_type = "parser"
77
+ extra_state_tokens = false
78
+ hidden_width = 64
79
+ maxout_pieces = 2
80
+ use_upper = true
81
+ nO = null
82
+
83
+ [components.parser.model.tok2vec]
84
+ @architectures = "spacy.Tok2VecListener.v1"
85
+ width = ${components.tok2vec.model.encode:width}
86
+ upstream = "tok2vec"
87
+
88
+ [components.senter]
89
+ factory = "senter"
90
+
91
+ [components.senter.model]
92
+ @architectures = "spacy.Tagger.v1"
93
+ nO = null
94
+
95
+ [components.senter.model.tok2vec]
96
+ @architectures = "spacy.Tok2Vec.v2"
97
+
98
+ [components.senter.model.tok2vec.embed]
99
+ @architectures = "spacy.MultiHashEmbed.v2"
100
+ width = 16
101
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
102
+ rows = [1000,500,500,500]
103
+ include_static_vectors = false
104
+
105
+ [components.senter.model.tok2vec.encode]
106
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
107
+ width = 16
108
+ depth = 2
109
+ window_size = 1
110
+ maxout_pieces = 2
111
+
112
+ [components.tagger]
113
+ factory = "tagger"
114
+
115
+ [components.tagger.model]
116
+ @architectures = "spacy.Tagger.v1"
117
+ nO = null
118
+
119
+ [components.tagger.model.tok2vec]
120
+ @architectures = "spacy.Tok2VecListener.v1"
121
+ width = ${components.tok2vec.model.encode:width}
122
+ upstream = "tok2vec"
123
+
124
+ [components.tok2vec]
125
+ factory = "tok2vec"
126
+
127
+ [components.tok2vec.model]
128
+ @architectures = "spacy.Tok2Vec.v2"
129
+
130
+ [components.tok2vec.model.embed]
131
+ @architectures = "spacy.MultiHashEmbed.v2"
132
+ width = ${components.tok2vec.model.encode:width}
133
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
134
+ rows = [5000,2500,2500,2500]
135
+ include_static_vectors = false
136
+
137
+ [components.tok2vec.model.encode]
138
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
139
+ width = 96
140
+ depth = 4
141
+ window_size = 1
142
+ maxout_pieces = 3
143
+
144
+ [corpora]
145
+
146
+ [corpora.dev]
147
+ @readers = "spacy.Corpus.v1"
148
+ limit = 0
149
+ max_length = 0
150
+ path = ${paths:dev}
151
+ gold_preproc = false
152
+ augmenter = null
153
+
154
+ [corpora.train]
155
+ @readers = "spacy.Corpus.v1"
156
+ path = ${paths:train}
157
+ max_length = 5000
158
+ gold_preproc = false
159
+ limit = 0
160
+
161
+ [corpora.train.augmenter]
162
+ @augmenters = "spacy.orth_variants.v1"
163
+ level = 0.2
164
+ lower = 0.5
165
+
166
+ [corpora.train.augmenter.orth_variants]
167
+ @readers = "srsly.read_json.v1"
168
+ path = "assets/orth_variants.json"
169
+
170
+ [training]
171
+ train_corpus = "corpora.train"
172
+ dev_corpus = "corpora.dev"
173
+ seed = ${system:seed}
174
+ gpu_allocator = ${system:gpu_allocator}
175
+ dropout = 0.1
176
+ accumulate_gradient = 1
177
+ patience = 5000
178
+ max_epochs = 0
179
+ max_steps = 0
180
+ eval_frequency = 1000
181
+ frozen_components = []
182
+ before_to_disk = null
183
+ annotating_components = []
184
+
185
+ [training.batcher]
186
+ @batchers = "spacy.batch_by_words.v1"
187
+ discard_oversize = false
188
+ tolerance = 0.2
189
+ get_length = null
190
+
191
+ [training.batcher.size]
192
+ @schedules = "compounding.v1"
193
+ start = 100
194
+ stop = 1000
195
+ compound = 1.001
196
+ t = 0.0
197
+
198
+ [training.logger]
199
+ @loggers = "spacy.WandbLogger.v1"
200
+ project_name = "spacy-v3.0.0a2"
201
+ remove_config_values = []
202
+
203
+ [training.optimizer]
204
+ @optimizers = "Adam.v1"
205
+ beta1 = 0.9
206
+ beta2 = 0.999
207
+ L2_is_weight_decay = true
208
+ L2 = 0.01
209
+ grad_clip = 1.0
210
+ use_averages = true
211
+ eps = 0.00000001
212
+ learn_rate = 0.001
213
+
214
+ [training.score_weights]
215
+ tag_acc = 0.16
216
+ dep_uas = 0.0
217
+ dep_las = 0.16
218
+ dep_las_per_type = null
219
+ sents_p = null
220
+ sents_r = null
221
+ sents_f = 0.02
222
+ lemma_acc = 0.33
223
+ ents_f = 0.33
224
+ ents_p = 0.0
225
+ ents_r = 0.0
226
+ ents_per_type = null
227
+
228
+ [pretraining]
229
+
230
+ [initialize]
231
+ vocab_data = ${paths.vocab_data}
232
+ vectors = ${paths.vectors}
233
+ init_tok2vec = ${paths.init_tok2vec}
234
+ before_init = null
235
+ after_init = null
236
+
237
+ [initialize.components]
238
+
239
+ [initialize.components.ner]
240
+
241
+ [initialize.components.ner.labels]
242
+ @readers = "spacy.read_labels.v1"
243
+ path = "corpus/labels/ner.json"
244
+ require = false
245
+
246
+ [initialize.components.parser]
247
+
248
+ [initialize.components.parser.labels]
249
+ @readers = "spacy.read_labels.v1"
250
+ path = "corpus/labels/parser.json"
251
+ require = false
252
+
253
+ [initialize.components.tagger]
254
+
255
+ [initialize.components.tagger.labels]
256
+ @readers = "spacy.read_labels.v1"
257
+ path = "corpus/labels/tagger.json"
258
+ require = false
259
+
260
+ [initialize.lookups]
261
+ @misc = "spacy.LookupsDataLoader.v1"
262
+ lang = ${nlp.lang}
263
+ tables = ["lexeme_norm"]
264
+
265
+ [initialize.tokenizer]
en_core_web_sm-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82bc8de1e7fa6609f0ff481af519c18291d0204044580df0813461608ae2d00c
3
+ size 13622633
lemmatizer/lookups/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb64f40c0f8396d1762730c0ddf4dad2a52d138f5a389f71a1a1d088173b7737
3
+ size 972893
meta.json ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"en",
3
+ "name":"core_web_sm",
4
+ "version":"3.1.0",
5
+ "description":"English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.",
6
+ "author":"Explosion",
7
+ "email":"contact@explosion.ai",
8
+ "url":"https://explosion.ai",
9
+ "license":"MIT",
10
+ "spacy_version":">=3.1.0,<3.2.0",
11
+ "spacy_git_version":"caba63b74",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "tok2vec":[
20
+
21
+ ],
22
+ "tagger":[
23
+ "$",
24
+ "''",
25
+ ",",
26
+ "-LRB-",
27
+ "-RRB-",
28
+ ".",
29
+ ":",
30
+ "ADD",
31
+ "AFX",
32
+ "CC",
33
+ "CD",
34
+ "DT",
35
+ "EX",
36
+ "FW",
37
+ "HYPH",
38
+ "IN",
39
+ "JJ",
40
+ "JJR",
41
+ "JJS",
42
+ "LS",
43
+ "MD",
44
+ "NFP",
45
+ "NN",
46
+ "NNP",
47
+ "NNPS",
48
+ "NNS",
49
+ "PDT",
50
+ "POS",
51
+ "PRP",
52
+ "PRP$",
53
+ "RB",
54
+ "RBR",
55
+ "RBS",
56
+ "RP",
57
+ "SYM",
58
+ "TO",
59
+ "UH",
60
+ "VB",
61
+ "VBD",
62
+ "VBG",
63
+ "VBN",
64
+ "VBP",
65
+ "VBZ",
66
+ "WDT",
67
+ "WP",
68
+ "WP$",
69
+ "WRB",
70
+ "XX",
71
+ "``"
72
+ ],
73
+ "parser":[
74
+ "ROOT",
75
+ "acl",
76
+ "acomp",
77
+ "advcl",
78
+ "advmod",
79
+ "agent",
80
+ "amod",
81
+ "appos",
82
+ "attr",
83
+ "aux",
84
+ "auxpass",
85
+ "case",
86
+ "cc",
87
+ "ccomp",
88
+ "compound",
89
+ "conj",
90
+ "csubj",
91
+ "csubjpass",
92
+ "dative",
93
+ "dep",
94
+ "det",
95
+ "dobj",
96
+ "expl",
97
+ "intj",
98
+ "mark",
99
+ "meta",
100
+ "neg",
101
+ "nmod",
102
+ "npadvmod",
103
+ "nsubj",
104
+ "nsubjpass",
105
+ "nummod",
106
+ "oprd",
107
+ "parataxis",
108
+ "pcomp",
109
+ "pobj",
110
+ "poss",
111
+ "preconj",
112
+ "predet",
113
+ "prep",
114
+ "prt",
115
+ "punct",
116
+ "quantmod",
117
+ "relcl",
118
+ "xcomp"
119
+ ],
120
+ "senter":[
121
+ "I",
122
+ "S"
123
+ ],
124
+ "attribute_ruler":[
125
+
126
+ ],
127
+ "lemmatizer":[
128
+
129
+ ],
130
+ "ner":[
131
+ "CARDINAL",
132
+ "DATE",
133
+ "EVENT",
134
+ "FAC",
135
+ "GPE",
136
+ "LANGUAGE",
137
+ "LAW",
138
+ "LOC",
139
+ "MONEY",
140
+ "NORP",
141
+ "ORDINAL",
142
+ "ORG",
143
+ "PERCENT",
144
+ "PERSON",
145
+ "PRODUCT",
146
+ "QUANTITY",
147
+ "TIME",
148
+ "WORK_OF_ART"
149
+ ]
150
+ },
151
+ "pipeline":[
152
+ "tok2vec",
153
+ "tagger",
154
+ "parser",
155
+ "attribute_ruler",
156
+ "lemmatizer",
157
+ "ner"
158
+ ],
159
+ "components":[
160
+ "tok2vec",
161
+ "tagger",
162
+ "parser",
163
+ "senter",
164
+ "attribute_ruler",
165
+ "lemmatizer",
166
+ "ner"
167
+ ],
168
+ "disabled":[
169
+ "senter"
170
+ ],
171
+ "performance":{
172
+ "token_acc":0.9993053983,
173
+ "tag_acc":0.9720712187,
174
+ "dep_uas":0.9185392711,
175
+ "dep_las":0.9001546872,
176
+ "ents_p":0.8424355924,
177
+ "ents_r":0.8335336538,
178
+ "ents_f":0.8379609817,
179
+ "sents_p":0.9074955788,
180
+ "sents_r":0.8801372122,
181
+ "sents_f":0.893607046,
182
+ "speed":10426.0619939972,
183
+ "dep_las_per_type":{
184
+ "prep":{
185
+ "p":0.8554402587,
186
+ "r":0.8633276418,
187
+ "f":0.8593658527
188
+ },
189
+ "det":{
190
+ "p":0.9772329247,
191
+ "r":0.9783087336,
192
+ "f":0.9777705332
193
+ },
194
+ "pobj":{
195
+ "p":0.9603285837,
196
+ "r":0.9686628706,
197
+ "f":0.9644777228
198
+ },
199
+ "nsubj":{
200
+ "p":0.9598718747,
201
+ "r":0.945279299,
202
+ "f":0.9525197007
203
+ },
204
+ "aux":{
205
+ "p":0.9795737123,
206
+ "r":0.9819282471,
207
+ "f":0.9807495665
208
+ },
209
+ "advmod":{
210
+ "p":0.8538267452,
211
+ "r":0.8541140838,
212
+ "f":0.8539703903
213
+ },
214
+ "relcl":{
215
+ "p":0.764978602,
216
+ "r":0.7783018868,
217
+ "f":0.7715827338
218
+ },
219
+ "root":{
220
+ "p":0.9199754818,
221
+ "r":0.8910878026,
222
+ "f":0.9053012533
223
+ },
224
+ "xcomp":{
225
+ "p":0.8813440672,
226
+ "r":0.903804738,
227
+ "f":0.892433103
228
+ },
229
+ "amod":{
230
+ "p":0.9171875,
231
+ "r":0.9127308066,
232
+ "f":0.9149537263
233
+ },
234
+ "compound":{
235
+ "p":0.916057391,
236
+ "r":0.9280463355,
237
+ "f":0.9220128918
238
+ },
239
+ "poss":{
240
+ "p":0.9744672296,
241
+ "r":0.9756441224,
242
+ "f":0.9750553209
243
+ },
244
+ "ccomp":{
245
+ "p":0.7747747748,
246
+ "r":0.8407331976,
247
+ "f":0.8064075015
248
+ },
249
+ "attr":{
250
+ "p":0.8989490703,
251
+ "r":0.9352396972,
252
+ "f":0.9167353669
253
+ },
254
+ "case":{
255
+ "p":0.9801980198,
256
+ "r":0.990990991,
257
+ "f":0.9855649577
258
+ },
259
+ "mark":{
260
+ "p":0.9015392781,
261
+ "r":0.9001059883,
262
+ "f":0.9008220631
263
+ },
264
+ "intj":{
265
+ "p":0.6712856043,
266
+ "r":0.6388278388,
267
+ "f":0.6546546547
268
+ },
269
+ "advcl":{
270
+ "p":0.6687116564,
271
+ "r":0.6587761269,
272
+ "f":0.6637067106
273
+ },
274
+ "cc":{
275
+ "p":0.8426682692,
276
+ "r":0.8385360603,
277
+ "f":0.8405970865
278
+ },
279
+ "neg":{
280
+ "p":0.9461346633,
281
+ "r":0.9518314099,
282
+ "f":0.9489744872
283
+ },
284
+ "conj":{
285
+ "p":0.7715100422,
286
+ "r":0.7833585096,
287
+ "f":0.7773891318
288
+ },
289
+ "nsubjpass":{
290
+ "p":0.9296875,
291
+ "r":0.9153846154,
292
+ "f":0.9224806202
293
+ },
294
+ "auxpass":{
295
+ "p":0.9496883348,
296
+ "r":0.9717539863,
297
+ "f":0.9605944607
298
+ },
299
+ "dobj":{
300
+ "p":0.919667805,
301
+ "r":0.9442186628,
302
+ "f":0.9317815437
303
+ },
304
+ "nummod":{
305
+ "p":0.9341637011,
306
+ "r":0.928030303,
307
+ "f":0.9310869014
308
+ },
309
+ "npadvmod":{
310
+ "p":0.7723823976,
311
+ "r":0.723268206,
312
+ "f":0.7470188956
313
+ },
314
+ "prt":{
315
+ "p":0.8175842235,
316
+ "r":0.8915770609,
317
+ "f":0.852978997
318
+ },
319
+ "pcomp":{
320
+ "p":0.890625,
321
+ "r":0.8781512605,
322
+ "f":0.8843441467
323
+ },
324
+ "expl":{
325
+ "p":0.978858351,
326
+ "r":0.9914346895,
327
+ "f":0.985106383
328
+ },
329
+ "acl":{
330
+ "p":0.7473745624,
331
+ "r":0.6988543372,
332
+ "f":0.7223005357
333
+ },
334
+ "agent":{
335
+ "p":0.8937605396,
336
+ "r":0.9498207885,
337
+ "f":0.9209383145
338
+ },
339
+ "dative":{
340
+ "p":0.7608695652,
341
+ "r":0.7224770642,
342
+ "f":0.7411764706
343
+ },
344
+ "acomp":{
345
+ "p":0.91184573,
346
+ "r":0.9006802721,
347
+ "f":0.9062286105
348
+ },
349
+ "dep":{
350
+ "p":0.3689655172,
351
+ "r":0.1737012987,
352
+ "f":0.2362030905
353
+ },
354
+ "csubj":{
355
+ "p":0.6424242424,
356
+ "r":0.6272189349,
357
+ "f":0.6347305389
358
+ },
359
+ "quantmod":{
360
+ "p":0.8624667258,
361
+ "r":0.7896019496,
362
+ "f":0.8244274809
363
+ },
364
+ "nmod":{
365
+ "p":0.7359073359,
366
+ "r":0.5807434491,
367
+ "f":0.6491825613
368
+ },
369
+ "appos":{
370
+ "p":0.7011441648,
371
+ "r":0.6646420824,
372
+ "f":0.6824053452
373
+ },
374
+ "predet":{
375
+ "p":0.8514056225,
376
+ "r":0.9098712446,
377
+ "f":0.8796680498
378
+ },
379
+ "preconj":{
380
+ "p":0.476635514,
381
+ "r":0.5930232558,
382
+ "f":0.5284974093
383
+ },
384
+ "oprd":{
385
+ "p":0.8193979933,
386
+ "r":0.7313432836,
387
+ "f":0.7728706625
388
+ },
389
+ "parataxis":{
390
+ "p":0.6336088154,
391
+ "r":0.4989154013,
392
+ "f":0.5582524272
393
+ },
394
+ "meta":{
395
+ "p":0.7727272727,
396
+ "r":0.3269230769,
397
+ "f":0.4594594595
398
+ },
399
+ "csubjpass":{
400
+ "p":0.5,
401
+ "r":0.8333333333,
402
+ "f":0.625
403
+ }
404
+ },
405
+ "ents_per_type":{
406
+ "DATE":{
407
+ "p":0.8659955961,
408
+ "r":0.873968254,
409
+ "f":0.8699636593
410
+ },
411
+ "GPE":{
412
+ "p":0.9142529395,
413
+ "r":0.8892608089,
414
+ "f":0.9015837104
415
+ },
416
+ "ORG":{
417
+ "p":0.789707419,
418
+ "r":0.8014316013,
419
+ "f":0.7955263158
420
+ },
421
+ "FAC":{
422
+ "p":0.3679245283,
423
+ "r":0.3,
424
+ "f":0.3305084746
425
+ },
426
+ "CARDINAL":{
427
+ "p":0.8146892655,
428
+ "r":0.8573127229,
429
+ "f":0.8354577057
430
+ },
431
+ "PERSON":{
432
+ "p":0.8424487259,
433
+ "r":0.8847911227,
434
+ "f":0.8631009233
435
+ },
436
+ "NORP":{
437
+ "p":0.9078415521,
438
+ "r":0.8984,
439
+ "f":0.9030960997
440
+ },
441
+ "ORDINAL":{
442
+ "p":0.7787114846,
443
+ "r":0.8633540373,
444
+ "f":0.8188512518
445
+ },
446
+ "LOC":{
447
+ "p":0.713740458,
448
+ "r":0.5955414013,
449
+ "f":0.6493055556
450
+ },
451
+ "TIME":{
452
+ "p":0.712962963,
453
+ "r":0.6754385965,
454
+ "f":0.6936936937
455
+ },
456
+ "PRODUCT":{
457
+ "p":0.5510204082,
458
+ "r":0.2559241706,
459
+ "f":0.3495145631
460
+ },
461
+ "QUANTITY":{
462
+ "p":0.842519685,
463
+ "r":0.5879120879,
464
+ "f":0.6925566343
465
+ },
466
+ "WORK_OF_ART":{
467
+ "p":0.46875,
468
+ "r":0.3092783505,
469
+ "f":0.3726708075
470
+ },
471
+ "EVENT":{
472
+ "p":0.595959596,
473
+ "r":0.3390804598,
474
+ "f":0.4322344322
475
+ },
476
+ "MONEY":{
477
+ "p":0.9182692308,
478
+ "r":0.9020070838,
479
+ "f":0.9100655152
480
+ },
481
+ "LAW":{
482
+ "p":0.5555555556,
483
+ "r":0.46875,
484
+ "f":0.5084745763
485
+ },
486
+ "PERCENT":{
487
+ "p":0.92,
488
+ "r":0.8805513017,
489
+ "f":0.8998435055
490
+ },
491
+ "LANGUAGE":{
492
+ "p":0.7692307692,
493
+ "r":0.625,
494
+ "f":0.6896551724
495
+ }
496
+ }
497
+ },
498
+ "sources":[
499
+ {
500
+ "name":"OntoNotes 5",
501
+ "url":"https://catalog.ldc.upenn.edu/LDC2013T19",
502
+ "license":"commercial (licensed by Explosion)",
503
+ "author":"Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston"
504
+ },
505
+ {
506
+ "name":"ClearNLP Constituent-to-Dependency Conversion",
507
+ "url":"https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md",
508
+ "license":"Citation provided for reference, no code packaged with model",
509
+ "author":"Emory University"
510
+ },
511
+ {
512
+ "name":"WordNet 3.0",
513
+ "url":"https://wordnet.princeton.edu/",
514
+ "author":"Princeton University",
515
+ "license":"WordNet 3.0 License"
516
+ }
517
+ ],
518
+ "requirements":[
519
+
520
+ ]
521
+ }
ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
ner/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20b7d17b21c63fcef7ae4c3c3fb96dd7303d9c1af7679730c17526188b556952
3
+ size 6730601
ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves�{"0":{},"1":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355},"2":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355},"3":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355},"4":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355,"":1},"5":{"":1}}�cfg��neg_key�
parser/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":30,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
parser/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86de5c9df77f4e1b7f943a9bbe03597f247958f1c9d6cb4c2319028b855e4b8f
3
+ size 319909
parser/moves ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ��moves�
2
+ {"0":{"":995932},"1":{"":989662},"2":{"det":172430,"nsubj":165679,"compound":116803,"amod":106128,"aux":87078,"punct":65505,"advmod":62711,"poss":36427,"mark":27913,"nummod":22583,"auxpass":15597,"prep":13989,"nsubjpass":13867,"neg":12358,"cc":10694,"nmod":9572,"advcl":9063,"npadvmod":8135,"quantmod":7071,"intj":6557,"ccomp":5899,"dobj":3427,"expl":3360,"dep":3191,"predet":1945,"parataxis":1826,"csubj":1431,"preconj":620,"pobj||prep":615,"attr":578,"meta":448,"advmod||conj":367,"dobj||xcomp":352,"acomp":284,"nsubj||ccomp":224,"dative":206,"advmod||xcomp":149,"dobj||ccomp":70,"csubjpass":64,"dobj||conj":62,"prep||conj":51,"acl":48,"prep||nsubj":41,"prep||dobj":36,"xcomp":34,"advmod||ccomp":32,"oprd":31},"3":{"punct":183437,"pobj":182256,"prep":173845,"dobj":89650,"conj":59689,"cc":51858,"ccomp":30404,"advmod":22820,"xcomp":21045,"relcl":20968,"advcl":19833,"attr":17739,"acomp":16824,"appos":14963,"case":13361,"acl":12091,"pcomp":10345,"npadvmod":9702,"prt":8179,"agent":3884,"dative":3867,"nsubj":3465,"intj":2898,"neg":2871,"amod":2843,"nummod":2510,"oprd":2304,"dep":1518,"parataxis":1261,"quantmod":317,"nmod":296,"acl||dobj":202,"prep||dobj":190,"prep||nsubj":162,"acl||nsubj":159,"appos||nsubj":145,"relcl||dobj":134,"relcl||nsubj":111,"aux":103,"expl":96,"meta":93,"appos||dobj":86,"preconj":71,"csubj":65,"prep||nsubjpass":55,"prep||advmod":54,"prep||acomp":53,"det":51,"nsubjpass":45,"acl||nsubjpass":42,"relcl||pobj":41,"mark":40,"auxpass":39,"prep||pobj":36,"relcl||nsubjpass":32,"appos||nsubjpass":31},"4":{"ROOT":110979}}�cfg��neg_key�
senter/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
senter/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b08f7b005a60e60a5ef7bfccf7688a43b827589b25bb00870a579b3b4fb3e448
3
+ size 190395
tagger/cfg ADDED
@@ -0,0 +1,53 @@