osanseviero HF staff commited on
Commit
7bfda68
β€’
1 Parent(s): e570d5e

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -63,8 +63,8 @@ Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter,
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `zh_core_web_sm` |
66
- | **Version** | `3.1.0` |
67
- | **spaCy** | `>=3.1.0,<3.2.0` |
68
  | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `ner` |
69
  | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `ner` |
70
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -92,12 +92,15 @@ Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter,
92
  | Type | Score |
93
  | --- | --- |
94
  | `TOKEN_ACC` | 97.88 |
 
 
 
95
  | `TAG_ACC` | 89.57 |
 
 
 
96
  | `DEP_UAS` | 69.65 |
97
  | `DEP_LAS` | 64.26 |
98
  | `ENTS_P` | 72.25 |
99
  | `ENTS_R` | 65.32 |
100
- | `ENTS_F` | 68.61 |
101
- | `SENTS_P` | 78.18 |
102
- | `SENTS_R` | 73.11 |
103
- | `SENTS_F` | 75.56 |
 
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `zh_core_web_sm` |
66
+ | **Version** | `3.2.0` |
67
+ | **spaCy** | `>=3.2.0,<3.3.0` |
68
  | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `ner` |
69
  | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `ner` |
70
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
92
  | Type | Score |
93
  | --- | --- |
94
  | `TOKEN_ACC` | 97.88 |
95
+ | `TOKEN_P` | 94.58 |
96
+ | `TOKEN_R` | 91.36 |
97
+ | `TOKEN_F` | 92.94 |
98
  | `TAG_ACC` | 89.57 |
99
+ | `SENTS_P` | 78.18 |
100
+ | `SENTS_R` | 73.11 |
101
+ | `SENTS_F` | 75.56 |
102
  | `DEP_UAS` | 69.65 |
103
  | `DEP_LAS` | 64.26 |
104
  | `ENTS_P` | 72.25 |
105
  | `ENTS_R` | 65.32 |
106
+ | `ENTS_F` | 68.61 |
 
 
 
accuracy.json CHANGED
@@ -1,15 +1,14 @@
1
  {
2
  "token_acc": 0.9788303388,
 
 
 
3
  "tag_acc": 0.8957464158,
4
- "dep_uas": 0.6965379684,
5
- "dep_las": 0.6426392548,
6
- "ents_p": 0.7224990884,
7
- "ents_r": 0.6531868132,
8
- "ents_f": 0.6860968431,
9
  "sents_p": 0.7817728729,
10
  "sents_r": 0.7311469952,
11
  "sents_f": 0.7556129032,
12
- "speed": 10175.5709293766,
 
13
  "dep_las_per_type": {
14
  "dep": {
15
  "p": 0.4702473498,
@@ -237,6 +236,9 @@
237
  "f": 0.8941176471
238
  }
239
  },
 
 
 
240
  "ents_per_type": {
241
  "DATE": {
242
  "p": 0.75,
@@ -278,26 +280,26 @@
278
  "r": 0.5110887097,
279
  "f": 0.5439914163
280
  },
281
- "LOC": {
282
- "p": 0.5319148936,
283
- "r": 0.3360215054,
284
- "f": 0.4118616145
285
- },
286
  "NORP": {
287
  "p": 0.6774193548,
288
  "r": 0.4411764706,
289
  "f": 0.534351145
290
  },
291
- "WORK_OF_ART": {
292
- "p": 0.4520547945,
293
- "r": 0.22,
294
- "f": 0.2959641256
295
  },
296
  "TIME": {
297
  "p": 0.7438423645,
298
  "r": 0.7330097087,
299
  "f": 0.7383863081
300
  },
 
 
 
 
 
301
  "MONEY": {
302
  "p": 0.9292035398,
303
  "r": 0.7777777778,
@@ -328,5 +330,6 @@
328
  "r": 0.5555555556,
329
  "f": 0.5263157895
330
  }
331
- }
 
332
  }
 
1
  {
2
  "token_acc": 0.9788303388,
3
+ "token_p": 0.9458325855,
4
+ "token_r": 0.9136060443,
5
+ "token_f": 0.9294400505,
6
  "tag_acc": 0.8957464158,
 
 
 
 
 
7
  "sents_p": 0.7817728729,
8
  "sents_r": 0.7311469952,
9
  "sents_f": 0.7556129032,
10
+ "dep_uas": 0.6965379684,
11
+ "dep_las": 0.6426392548,
12
  "dep_las_per_type": {
13
  "dep": {
14
  "p": 0.4702473498,
 
236
  "f": 0.8941176471
237
  }
238
  },
239
+ "ents_p": 0.7224990884,
240
+ "ents_r": 0.6531868132,
241
+ "ents_f": 0.6860968431,
242
  "ents_per_type": {
243
  "DATE": {
244
  "p": 0.75,
 
280
  "r": 0.5110887097,
281
  "f": 0.5439914163
282
  },
 
 
 
 
 
283
  "NORP": {
284
  "p": 0.6774193548,
285
  "r": 0.4411764706,
286
  "f": 0.534351145
287
  },
288
+ "LOC": {
289
+ "p": 0.5319148936,
290
+ "r": 0.3360215054,
291
+ "f": 0.4118616145
292
  },
293
  "TIME": {
294
  "p": 0.7438423645,
295
  "r": 0.7330097087,
296
  "f": 0.7383863081
297
  },
298
+ "WORK_OF_ART": {
299
+ "p": 0.4520547945,
300
+ "r": 0.22,
301
+ "f": 0.2959641256
302
+ },
303
  "MONEY": {
304
  "p": 0.9292035398,
305
  "r": 0.7777777778,
 
330
  "r": 0.5555555556,
331
  "f": 0.5263157895
332
  }
333
+ },
334
+ "speed": 6703.9223469178
335
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -1,10 +1,8 @@
1
  [paths]
2
- train = "corpus/zh-core-news/train.spacy"
3
- dev = "corpus/zh-core-news/dev.spacy"
4
  vectors = null
5
- raw = null
6
  init_tok2vec = null
7
- vocab_data = null
8
 
9
  [system]
10
  gpu_allocator = null
@@ -27,12 +25,14 @@ segmenter = "pkuseg"
27
 
28
  [components.attribute_ruler]
29
  factory = "attribute_ruler"
 
30
  validate = false
31
 
32
  [components.ner]
33
  factory = "ner"
34
  incorrect_spans_key = null
35
  moves = null
 
36
  update_with_oracle_cut_size = 100
37
 
38
  [components.ner.model]
@@ -66,6 +66,7 @@ factory = "parser"
66
  learn_tokens = false
67
  min_action_freq = 30
68
  moves = null
 
69
  update_with_oracle_cut_size = 100
70
 
71
  [components.parser.model]
@@ -84,6 +85,8 @@ upstream = "tok2vec"
84
 
85
  [components.senter]
86
  factory = "senter"
 
 
87
 
88
  [components.senter.model]
89
  @architectures = "spacy.Tagger.v1"
@@ -108,6 +111,8 @@ maxout_pieces = 2
108
 
109
  [components.tagger]
110
  factory = "tagger"
 
 
111
 
112
  [components.tagger.model]
113
  @architectures = "spacy.Tagger.v1"
@@ -142,17 +147,17 @@ maxout_pieces = 3
142
 
143
  [corpora.dev]
144
  @readers = "spacy.Corpus.v1"
145
- limit = 0
146
- max_length = 0
147
- path = ${paths:dev}
148
  gold_preproc = false
 
 
149
  augmenter = null
150
 
151
  [corpora.train]
152
  @readers = "spacy.Corpus.v1"
153
- path = ${paths:train}
154
- max_length = 5000
155
  gold_preproc = false
 
156
  limit = 0
157
  augmenter = null
158
 
@@ -185,9 +190,8 @@ compound = 1.001
185
  t = 0.0
186
 
187
  [training.logger]
188
- @loggers = "spacy.WandbLogger.v1"
189
- project_name = "spacy-v3.0.0a2"
190
- remove_config_values = []
191
 
192
  [training.optimizer]
193
  @optimizers = "Adam.v1"
@@ -201,22 +205,23 @@ eps = 0.00000001
201
  learn_rate = 0.001
202
 
203
  [training.score_weights]
204
- tag_acc = 0.24
205
  dep_uas = 0.0
206
- dep_las = 0.24
207
  dep_las_per_type = null
208
  sents_p = null
209
  sents_r = null
210
- sents_f = 0.03
211
- ents_f = 0.5
212
  ents_p = 0.0
213
  ents_r = 0.0
214
  ents_per_type = null
 
215
 
216
  [pretraining]
217
 
218
  [initialize]
219
- vocab_data = ${paths.vocab_data}
220
  vectors = ${paths.vectors}
221
  init_tok2vec = ${paths.init_tok2vec}
222
  before_init = null
 
1
  [paths]
2
+ train = null
3
+ dev = null
4
  vectors = null
 
5
  init_tok2vec = null
 
6
 
7
  [system]
8
  gpu_allocator = null
 
25
 
26
  [components.attribute_ruler]
27
  factory = "attribute_ruler"
28
+ scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
29
  validate = false
30
 
31
  [components.ner]
32
  factory = "ner"
33
  incorrect_spans_key = null
34
  moves = null
35
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
36
  update_with_oracle_cut_size = 100
37
 
38
  [components.ner.model]
 
66
  learn_tokens = false
67
  min_action_freq = 30
68
  moves = null
69
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
70
  update_with_oracle_cut_size = 100
71
 
72
  [components.parser.model]
 
85
 
86
  [components.senter]
87
  factory = "senter"
88
+ overwrite = false
89
+ scorer = {"@scorers":"spacy.senter_scorer.v1"}
90
 
91
  [components.senter.model]
92
  @architectures = "spacy.Tagger.v1"
 
111
 
112
  [components.tagger]
113
  factory = "tagger"
114
+ overwrite = false
115
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
116
 
117
  [components.tagger.model]
118
  @architectures = "spacy.Tagger.v1"
 
147
 
148
  [corpora.dev]
149
  @readers = "spacy.Corpus.v1"
150
+ path = ${paths.dev}
 
 
151
  gold_preproc = false
152
+ max_length = 0
153
+ limit = 0
154
  augmenter = null
155
 
156
  [corpora.train]
157
  @readers = "spacy.Corpus.v1"
158
+ path = ${paths.train}
 
159
  gold_preproc = false
160
+ max_length = 0
161
  limit = 0
162
  augmenter = null
163
 
 
190
  t = 0.0
191
 
192
  [training.logger]
193
+ @loggers = "spacy.ConsoleLogger.v1"
194
+ progress_bar = false
 
195
 
196
  [training.optimizer]
197
  @optimizers = "Adam.v1"
 
205
  learn_rate = 0.001
206
 
207
  [training.score_weights]
208
+ tag_acc = 0.32
209
  dep_uas = 0.0
210
+ dep_las = 0.32
211
  dep_las_per_type = null
212
  sents_p = null
213
  sents_r = null
214
+ sents_f = 0.04
215
+ ents_f = 0.32
216
  ents_p = 0.0
217
  ents_r = 0.0
218
  ents_per_type = null
219
+ speed = 0.0
220
 
221
  [pretraining]
222
 
223
  [initialize]
224
+ vocab_data = null
225
  vectors = ${paths.vectors}
226
  init_tok2vec = ${paths.init_tok2vec}
227
  before_init = null
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"zh",
3
  "name":"core_web_sm",
4
- "version":"3.1.0",
5
  "description":"Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
- "spacy_version":">=3.1.0,<3.2.0",
11
- "spacy_git_version":"caba63b74",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -152,16 +152,15 @@
152
  ],
153
  "performance":{
154
  "token_acc":0.9788303388,
 
 
 
155
  "tag_acc":0.8957464158,
156
- "dep_uas":0.6965379684,
157
- "dep_las":0.6426392548,
158
- "ents_p":0.7224990884,
159
- "ents_r":0.6531868132,
160
- "ents_f":0.6860968431,
161
  "sents_p":0.7817728729,
162
  "sents_r":0.7311469952,
163
  "sents_f":0.7556129032,
164
- "speed":10175.5709293766,
 
165
  "dep_las_per_type":{
166
  "dep":{
167
  "p":0.4702473498,
@@ -389,6 +388,9 @@
389
  "f":0.8941176471
390
  }
391
  },
 
 
 
392
  "ents_per_type":{
393
  "DATE":{
394
  "p":0.75,
@@ -430,26 +432,26 @@
430
  "r":0.5110887097,
431
  "f":0.5439914163
432
  },
433
- "LOC":{
434
- "p":0.5319148936,
435
- "r":0.3360215054,
436
- "f":0.4118616145
437
- },
438
  "NORP":{
439
  "p":0.6774193548,
440
  "r":0.4411764706,
441
  "f":0.534351145
442
  },
443
- "WORK_OF_ART":{
444
- "p":0.4520547945,
445
- "r":0.22,
446
- "f":0.2959641256
447
  },
448
  "TIME":{
449
  "p":0.7438423645,
450
  "r":0.7330097087,
451
  "f":0.7383863081
452
  },
 
 
 
 
 
453
  "MONEY":{
454
  "p":0.9292035398,
455
  "r":0.7777777778,
@@ -480,7 +482,8 @@
480
  "r":0.5555555556,
481
  "f":0.5263157895
482
  }
483
- }
 
484
  },
485
  "sources":[
486
  {
 
1
  {
2
  "lang":"zh",
3
  "name":"core_web_sm",
4
+ "version":"3.2.0",
5
  "description":"Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
+ "spacy_version":">=3.2.0,<3.3.0",
11
+ "spacy_git_version":"bb26550e2",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
152
  ],
153
  "performance":{
154
  "token_acc":0.9788303388,
155
+ "token_p":0.9458325855,
156
+ "token_r":0.9136060443,
157
+ "token_f":0.9294400505,
158
  "tag_acc":0.8957464158,
 
 
 
 
 
159
  "sents_p":0.7817728729,
160
  "sents_r":0.7311469952,
161
  "sents_f":0.7556129032,
162
+ "dep_uas":0.6965379684,
163
+ "dep_las":0.6426392548,
164
  "dep_las_per_type":{
165
  "dep":{
166
  "p":0.4702473498,
 
388
  "f":0.8941176471
389
  }
390
  },
391
+ "ents_p":0.7224990884,
392
+ "ents_r":0.6531868132,
393
+ "ents_f":0.6860968431,
394
  "ents_per_type":{
395
  "DATE":{
396
  "p":0.75,
 
432
  "r":0.5110887097,
433
  "f":0.5439914163
434
  },
 
 
 
 
 
435
  "NORP":{
436
  "p":0.6774193548,
437
  "r":0.4411764706,
438
  "f":0.534351145
439
  },
440
+ "LOC":{
441
+ "p":0.5319148936,
442
+ "r":0.3360215054,
443
+ "f":0.4118616145
444
  },
445
  "TIME":{
446
  "p":0.7438423645,
447
  "r":0.7330097087,
448
  "f":0.7383863081
449
  },
450
+ "WORK_OF_ART":{
451
+ "p":0.4520547945,
452
+ "r":0.22,
453
+ "f":0.2959641256
454
+ },
455
  "MONEY":{
456
  "p":0.9292035398,
457
  "r":0.7777777778,
 
482
  "r":0.5555555556,
483
  "f":0.5263157895
484
  }
485
+ },
486
+ "speed":6703.9223469178
487
  },
488
  "sources":[
489
  {
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
parser/model CHANGED
Binary files a/parser/model and b/parser/model differ
 
senter/cfg CHANGED
@@ -1,3 +1,3 @@
1
  {
2
-
3
  }
 
1
  {
2
+ "overwrite":false
3
  }
senter/model CHANGED
Binary files a/senter/model and b/senter/model differ
 
tagger/cfg CHANGED
@@ -36,5 +36,6 @@
36
  "VE",
37
  "VV",
38
  "X"
39
- ]
 
40
  }
 
36
  "VE",
37
  "VV",
38
  "X"
39
+ ],
40
+ "overwrite":false
41
  }
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
tok2vec/model CHANGED
Binary files a/tok2vec/model and b/tok2vec/model differ
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }
zh_core_web_sm-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dad1f66fb6b3981c4986c7203332910a369eb42295ba9bc7ca36a3928ea73fe9
3
- size 49466044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0629b5fe5fc8979fa895be4956363e7721e040908b0dd4c9ed469ad7309dd5cf
3
+ size 49466491