osanseviero
commited on
Commit
β’
7bfda68
1
Parent(s):
e570d5e
Update spaCy pipeline
Browse files- README.md +9 -6
- accuracy.json +19 -16
- attribute_ruler/patterns +0 -0
- config.cfg +22 -17
- meta.json +22 -19
- ner/model +0 -0
- parser/model +0 -0
- senter/cfg +1 -1
- senter/model +0 -0
- tagger/cfg +2 -1
- tagger/model +0 -0
- tok2vec/model +0 -0
- vocab/vectors.cfg +3 -0
- zh_core_web_sm-any-py3-none-any.whl +2 -2
README.md
CHANGED
@@ -63,8 +63,8 @@ Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter,
|
|
63 |
| Feature | Description |
|
64 |
| --- | --- |
|
65 |
| **Name** | `zh_core_web_sm` |
|
66 |
-
| **Version** | `3.
|
67 |
-
| **spaCy** | `>=3.
|
68 |
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `ner` |
|
69 |
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `ner` |
|
70 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
@@ -92,12 +92,15 @@ Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter,
|
|
92 |
| Type | Score |
|
93 |
| --- | --- |
|
94 |
| `TOKEN_ACC` | 97.88 |
|
|
|
|
|
|
|
95 |
| `TAG_ACC` | 89.57 |
|
|
|
|
|
|
|
96 |
| `DEP_UAS` | 69.65 |
|
97 |
| `DEP_LAS` | 64.26 |
|
98 |
| `ENTS_P` | 72.25 |
|
99 |
| `ENTS_R` | 65.32 |
|
100 |
-
| `ENTS_F` | 68.61 |
|
101 |
-
| `SENTS_P` | 78.18 |
|
102 |
-
| `SENTS_R` | 73.11 |
|
103 |
-
| `SENTS_F` | 75.56 |
|
|
|
63 |
| Feature | Description |
|
64 |
| --- | --- |
|
65 |
| **Name** | `zh_core_web_sm` |
|
66 |
+
| **Version** | `3.2.0` |
|
67 |
+
| **spaCy** | `>=3.2.0,<3.3.0` |
|
68 |
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `ner` |
|
69 |
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `ner` |
|
70 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
|
|
92 |
| Type | Score |
|
93 |
| --- | --- |
|
94 |
| `TOKEN_ACC` | 97.88 |
|
95 |
+
| `TOKEN_P` | 94.58 |
|
96 |
+
| `TOKEN_R` | 91.36 |
|
97 |
+
| `TOKEN_F` | 92.94 |
|
98 |
| `TAG_ACC` | 89.57 |
|
99 |
+
| `SENTS_P` | 78.18 |
|
100 |
+
| `SENTS_R` | 73.11 |
|
101 |
+
| `SENTS_F` | 75.56 |
|
102 |
| `DEP_UAS` | 69.65 |
|
103 |
| `DEP_LAS` | 64.26 |
|
104 |
| `ENTS_P` | 72.25 |
|
105 |
| `ENTS_R` | 65.32 |
|
106 |
+
| `ENTS_F` | 68.61 |
|
|
|
|
|
|
accuracy.json
CHANGED
@@ -1,15 +1,14 @@
|
|
1 |
{
|
2 |
"token_acc": 0.9788303388,
|
|
|
|
|
|
|
3 |
"tag_acc": 0.8957464158,
|
4 |
-
"dep_uas": 0.6965379684,
|
5 |
-
"dep_las": 0.6426392548,
|
6 |
-
"ents_p": 0.7224990884,
|
7 |
-
"ents_r": 0.6531868132,
|
8 |
-
"ents_f": 0.6860968431,
|
9 |
"sents_p": 0.7817728729,
|
10 |
"sents_r": 0.7311469952,
|
11 |
"sents_f": 0.7556129032,
|
12 |
-
"
|
|
|
13 |
"dep_las_per_type": {
|
14 |
"dep": {
|
15 |
"p": 0.4702473498,
|
@@ -237,6 +236,9 @@
|
|
237 |
"f": 0.8941176471
|
238 |
}
|
239 |
},
|
|
|
|
|
|
|
240 |
"ents_per_type": {
|
241 |
"DATE": {
|
242 |
"p": 0.75,
|
@@ -278,26 +280,26 @@
|
|
278 |
"r": 0.5110887097,
|
279 |
"f": 0.5439914163
|
280 |
},
|
281 |
-
"LOC": {
|
282 |
-
"p": 0.5319148936,
|
283 |
-
"r": 0.3360215054,
|
284 |
-
"f": 0.4118616145
|
285 |
-
},
|
286 |
"NORP": {
|
287 |
"p": 0.6774193548,
|
288 |
"r": 0.4411764706,
|
289 |
"f": 0.534351145
|
290 |
},
|
291 |
-
"
|
292 |
-
"p": 0.
|
293 |
-
"r": 0.
|
294 |
-
"f": 0.
|
295 |
},
|
296 |
"TIME": {
|
297 |
"p": 0.7438423645,
|
298 |
"r": 0.7330097087,
|
299 |
"f": 0.7383863081
|
300 |
},
|
|
|
|
|
|
|
|
|
|
|
301 |
"MONEY": {
|
302 |
"p": 0.9292035398,
|
303 |
"r": 0.7777777778,
|
@@ -328,5 +330,6 @@
|
|
328 |
"r": 0.5555555556,
|
329 |
"f": 0.5263157895
|
330 |
}
|
331 |
-
}
|
|
|
332 |
}
|
|
|
1 |
{
|
2 |
"token_acc": 0.9788303388,
|
3 |
+
"token_p": 0.9458325855,
|
4 |
+
"token_r": 0.9136060443,
|
5 |
+
"token_f": 0.9294400505,
|
6 |
"tag_acc": 0.8957464158,
|
|
|
|
|
|
|
|
|
|
|
7 |
"sents_p": 0.7817728729,
|
8 |
"sents_r": 0.7311469952,
|
9 |
"sents_f": 0.7556129032,
|
10 |
+
"dep_uas": 0.6965379684,
|
11 |
+
"dep_las": 0.6426392548,
|
12 |
"dep_las_per_type": {
|
13 |
"dep": {
|
14 |
"p": 0.4702473498,
|
|
|
236 |
"f": 0.8941176471
|
237 |
}
|
238 |
},
|
239 |
+
"ents_p": 0.7224990884,
|
240 |
+
"ents_r": 0.6531868132,
|
241 |
+
"ents_f": 0.6860968431,
|
242 |
"ents_per_type": {
|
243 |
"DATE": {
|
244 |
"p": 0.75,
|
|
|
280 |
"r": 0.5110887097,
|
281 |
"f": 0.5439914163
|
282 |
},
|
|
|
|
|
|
|
|
|
|
|
283 |
"NORP": {
|
284 |
"p": 0.6774193548,
|
285 |
"r": 0.4411764706,
|
286 |
"f": 0.534351145
|
287 |
},
|
288 |
+
"LOC": {
|
289 |
+
"p": 0.5319148936,
|
290 |
+
"r": 0.3360215054,
|
291 |
+
"f": 0.4118616145
|
292 |
},
|
293 |
"TIME": {
|
294 |
"p": 0.7438423645,
|
295 |
"r": 0.7330097087,
|
296 |
"f": 0.7383863081
|
297 |
},
|
298 |
+
"WORK_OF_ART": {
|
299 |
+
"p": 0.4520547945,
|
300 |
+
"r": 0.22,
|
301 |
+
"f": 0.2959641256
|
302 |
+
},
|
303 |
"MONEY": {
|
304 |
"p": 0.9292035398,
|
305 |
"r": 0.7777777778,
|
|
|
330 |
"r": 0.5555555556,
|
331 |
"f": 0.5263157895
|
332 |
}
|
333 |
+
},
|
334 |
+
"speed": 6703.9223469178
|
335 |
}
|
attribute_ruler/patterns
CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
|
|
config.cfg
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
[paths]
|
2 |
-
train =
|
3 |
-
dev =
|
4 |
vectors = null
|
5 |
-
raw = null
|
6 |
init_tok2vec = null
|
7 |
-
vocab_data = null
|
8 |
|
9 |
[system]
|
10 |
gpu_allocator = null
|
@@ -27,12 +25,14 @@ segmenter = "pkuseg"
|
|
27 |
|
28 |
[components.attribute_ruler]
|
29 |
factory = "attribute_ruler"
|
|
|
30 |
validate = false
|
31 |
|
32 |
[components.ner]
|
33 |
factory = "ner"
|
34 |
incorrect_spans_key = null
|
35 |
moves = null
|
|
|
36 |
update_with_oracle_cut_size = 100
|
37 |
|
38 |
[components.ner.model]
|
@@ -66,6 +66,7 @@ factory = "parser"
|
|
66 |
learn_tokens = false
|
67 |
min_action_freq = 30
|
68 |
moves = null
|
|
|
69 |
update_with_oracle_cut_size = 100
|
70 |
|
71 |
[components.parser.model]
|
@@ -84,6 +85,8 @@ upstream = "tok2vec"
|
|
84 |
|
85 |
[components.senter]
|
86 |
factory = "senter"
|
|
|
|
|
87 |
|
88 |
[components.senter.model]
|
89 |
@architectures = "spacy.Tagger.v1"
|
@@ -108,6 +111,8 @@ maxout_pieces = 2
|
|
108 |
|
109 |
[components.tagger]
|
110 |
factory = "tagger"
|
|
|
|
|
111 |
|
112 |
[components.tagger.model]
|
113 |
@architectures = "spacy.Tagger.v1"
|
@@ -142,17 +147,17 @@ maxout_pieces = 3
|
|
142 |
|
143 |
[corpora.dev]
|
144 |
@readers = "spacy.Corpus.v1"
|
145 |
-
|
146 |
-
max_length = 0
|
147 |
-
path = ${paths:dev}
|
148 |
gold_preproc = false
|
|
|
|
|
149 |
augmenter = null
|
150 |
|
151 |
[corpora.train]
|
152 |
@readers = "spacy.Corpus.v1"
|
153 |
-
path = ${paths
|
154 |
-
max_length = 5000
|
155 |
gold_preproc = false
|
|
|
156 |
limit = 0
|
157 |
augmenter = null
|
158 |
|
@@ -185,9 +190,8 @@ compound = 1.001
|
|
185 |
t = 0.0
|
186 |
|
187 |
[training.logger]
|
188 |
-
@loggers = "spacy.
|
189 |
-
|
190 |
-
remove_config_values = []
|
191 |
|
192 |
[training.optimizer]
|
193 |
@optimizers = "Adam.v1"
|
@@ -201,22 +205,23 @@ eps = 0.00000001
|
|
201 |
learn_rate = 0.001
|
202 |
|
203 |
[training.score_weights]
|
204 |
-
tag_acc = 0.
|
205 |
dep_uas = 0.0
|
206 |
-
dep_las = 0.
|
207 |
dep_las_per_type = null
|
208 |
sents_p = null
|
209 |
sents_r = null
|
210 |
-
sents_f = 0.
|
211 |
-
ents_f = 0.
|
212 |
ents_p = 0.0
|
213 |
ents_r = 0.0
|
214 |
ents_per_type = null
|
|
|
215 |
|
216 |
[pretraining]
|
217 |
|
218 |
[initialize]
|
219 |
-
vocab_data =
|
220 |
vectors = ${paths.vectors}
|
221 |
init_tok2vec = ${paths.init_tok2vec}
|
222 |
before_init = null
|
|
|
1 |
[paths]
|
2 |
+
train = null
|
3 |
+
dev = null
|
4 |
vectors = null
|
|
|
5 |
init_tok2vec = null
|
|
|
6 |
|
7 |
[system]
|
8 |
gpu_allocator = null
|
|
|
25 |
|
26 |
[components.attribute_ruler]
|
27 |
factory = "attribute_ruler"
|
28 |
+
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
|
29 |
validate = false
|
30 |
|
31 |
[components.ner]
|
32 |
factory = "ner"
|
33 |
incorrect_spans_key = null
|
34 |
moves = null
|
35 |
+
scorer = {"@scorers":"spacy.ner_scorer.v1"}
|
36 |
update_with_oracle_cut_size = 100
|
37 |
|
38 |
[components.ner.model]
|
|
|
66 |
learn_tokens = false
|
67 |
min_action_freq = 30
|
68 |
moves = null
|
69 |
+
scorer = {"@scorers":"spacy.parser_scorer.v1"}
|
70 |
update_with_oracle_cut_size = 100
|
71 |
|
72 |
[components.parser.model]
|
|
|
85 |
|
86 |
[components.senter]
|
87 |
factory = "senter"
|
88 |
+
overwrite = false
|
89 |
+
scorer = {"@scorers":"spacy.senter_scorer.v1"}
|
90 |
|
91 |
[components.senter.model]
|
92 |
@architectures = "spacy.Tagger.v1"
|
|
|
111 |
|
112 |
[components.tagger]
|
113 |
factory = "tagger"
|
114 |
+
overwrite = false
|
115 |
+
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
116 |
|
117 |
[components.tagger.model]
|
118 |
@architectures = "spacy.Tagger.v1"
|
|
|
147 |
|
148 |
[corpora.dev]
|
149 |
@readers = "spacy.Corpus.v1"
|
150 |
+
path = ${paths.dev}
|
|
|
|
|
151 |
gold_preproc = false
|
152 |
+
max_length = 0
|
153 |
+
limit = 0
|
154 |
augmenter = null
|
155 |
|
156 |
[corpora.train]
|
157 |
@readers = "spacy.Corpus.v1"
|
158 |
+
path = ${paths.train}
|
|
|
159 |
gold_preproc = false
|
160 |
+
max_length = 0
|
161 |
limit = 0
|
162 |
augmenter = null
|
163 |
|
|
|
190 |
t = 0.0
|
191 |
|
192 |
[training.logger]
|
193 |
+
@loggers = "spacy.ConsoleLogger.v1"
|
194 |
+
progress_bar = false
|
|
|
195 |
|
196 |
[training.optimizer]
|
197 |
@optimizers = "Adam.v1"
|
|
|
205 |
learn_rate = 0.001
|
206 |
|
207 |
[training.score_weights]
|
208 |
+
tag_acc = 0.32
|
209 |
dep_uas = 0.0
|
210 |
+
dep_las = 0.32
|
211 |
dep_las_per_type = null
|
212 |
sents_p = null
|
213 |
sents_r = null
|
214 |
+
sents_f = 0.04
|
215 |
+
ents_f = 0.32
|
216 |
ents_p = 0.0
|
217 |
ents_r = 0.0
|
218 |
ents_per_type = null
|
219 |
+
speed = 0.0
|
220 |
|
221 |
[pretraining]
|
222 |
|
223 |
[initialize]
|
224 |
+
vocab_data = null
|
225 |
vectors = ${paths.vectors}
|
226 |
init_tok2vec = ${paths.init_tok2vec}
|
227 |
before_init = null
|
meta.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"lang":"zh",
|
3 |
"name":"core_web_sm",
|
4 |
-
"version":"3.
|
5 |
"description":"Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.",
|
6 |
"author":"Explosion",
|
7 |
"email":"contact@explosion.ai",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"MIT",
|
10 |
-
"spacy_version":">=3.
|
11 |
-
"spacy_git_version":"
|
12 |
"vectors":{
|
13 |
"width":0,
|
14 |
"vectors":0,
|
@@ -152,16 +152,15 @@
|
|
152 |
],
|
153 |
"performance":{
|
154 |
"token_acc":0.9788303388,
|
|
|
|
|
|
|
155 |
"tag_acc":0.8957464158,
|
156 |
-
"dep_uas":0.6965379684,
|
157 |
-
"dep_las":0.6426392548,
|
158 |
-
"ents_p":0.7224990884,
|
159 |
-
"ents_r":0.6531868132,
|
160 |
-
"ents_f":0.6860968431,
|
161 |
"sents_p":0.7817728729,
|
162 |
"sents_r":0.7311469952,
|
163 |
"sents_f":0.7556129032,
|
164 |
-
"
|
|
|
165 |
"dep_las_per_type":{
|
166 |
"dep":{
|
167 |
"p":0.4702473498,
|
@@ -389,6 +388,9 @@
|
|
389 |
"f":0.8941176471
|
390 |
}
|
391 |
},
|
|
|
|
|
|
|
392 |
"ents_per_type":{
|
393 |
"DATE":{
|
394 |
"p":0.75,
|
@@ -430,26 +432,26 @@
|
|
430 |
"r":0.5110887097,
|
431 |
"f":0.5439914163
|
432 |
},
|
433 |
-
"LOC":{
|
434 |
-
"p":0.5319148936,
|
435 |
-
"r":0.3360215054,
|
436 |
-
"f":0.4118616145
|
437 |
-
},
|
438 |
"NORP":{
|
439 |
"p":0.6774193548,
|
440 |
"r":0.4411764706,
|
441 |
"f":0.534351145
|
442 |
},
|
443 |
-
"
|
444 |
-
"p":0.
|
445 |
-
"r":0.
|
446 |
-
"f":0.
|
447 |
},
|
448 |
"TIME":{
|
449 |
"p":0.7438423645,
|
450 |
"r":0.7330097087,
|
451 |
"f":0.7383863081
|
452 |
},
|
|
|
|
|
|
|
|
|
|
|
453 |
"MONEY":{
|
454 |
"p":0.9292035398,
|
455 |
"r":0.7777777778,
|
@@ -480,7 +482,8 @@
|
|
480 |
"r":0.5555555556,
|
481 |
"f":0.5263157895
|
482 |
}
|
483 |
-
}
|
|
|
484 |
},
|
485 |
"sources":[
|
486 |
{
|
|
|
1 |
{
|
2 |
"lang":"zh",
|
3 |
"name":"core_web_sm",
|
4 |
+
"version":"3.2.0",
|
5 |
"description":"Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.",
|
6 |
"author":"Explosion",
|
7 |
"email":"contact@explosion.ai",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"MIT",
|
10 |
+
"spacy_version":">=3.2.0,<3.3.0",
|
11 |
+
"spacy_git_version":"bb26550e2",
|
12 |
"vectors":{
|
13 |
"width":0,
|
14 |
"vectors":0,
|
|
|
152 |
],
|
153 |
"performance":{
|
154 |
"token_acc":0.9788303388,
|
155 |
+
"token_p":0.9458325855,
|
156 |
+
"token_r":0.9136060443,
|
157 |
+
"token_f":0.9294400505,
|
158 |
"tag_acc":0.8957464158,
|
|
|
|
|
|
|
|
|
|
|
159 |
"sents_p":0.7817728729,
|
160 |
"sents_r":0.7311469952,
|
161 |
"sents_f":0.7556129032,
|
162 |
+
"dep_uas":0.6965379684,
|
163 |
+
"dep_las":0.6426392548,
|
164 |
"dep_las_per_type":{
|
165 |
"dep":{
|
166 |
"p":0.4702473498,
|
|
|
388 |
"f":0.8941176471
|
389 |
}
|
390 |
},
|
391 |
+
"ents_p":0.7224990884,
|
392 |
+
"ents_r":0.6531868132,
|
393 |
+
"ents_f":0.6860968431,
|
394 |
"ents_per_type":{
|
395 |
"DATE":{
|
396 |
"p":0.75,
|
|
|
432 |
"r":0.5110887097,
|
433 |
"f":0.5439914163
|
434 |
},
|
|
|
|
|
|
|
|
|
|
|
435 |
"NORP":{
|
436 |
"p":0.6774193548,
|
437 |
"r":0.4411764706,
|
438 |
"f":0.534351145
|
439 |
},
|
440 |
+
"LOC":{
|
441 |
+
"p":0.5319148936,
|
442 |
+
"r":0.3360215054,
|
443 |
+
"f":0.4118616145
|
444 |
},
|
445 |
"TIME":{
|
446 |
"p":0.7438423645,
|
447 |
"r":0.7330097087,
|
448 |
"f":0.7383863081
|
449 |
},
|
450 |
+
"WORK_OF_ART":{
|
451 |
+
"p":0.4520547945,
|
452 |
+
"r":0.22,
|
453 |
+
"f":0.2959641256
|
454 |
+
},
|
455 |
"MONEY":{
|
456 |
"p":0.9292035398,
|
457 |
"r":0.7777777778,
|
|
|
482 |
"r":0.5555555556,
|
483 |
"f":0.5263157895
|
484 |
}
|
485 |
+
},
|
486 |
+
"speed":6703.9223469178
|
487 |
},
|
488 |
"sources":[
|
489 |
{
|
ner/model
CHANGED
Binary files a/ner/model and b/ner/model differ
|
|
parser/model
CHANGED
Binary files a/parser/model and b/parser/model differ
|
|
senter/cfg
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
{
|
2 |
-
|
3 |
}
|
|
|
1 |
{
|
2 |
+
"overwrite":false
|
3 |
}
|
senter/model
CHANGED
Binary files a/senter/model and b/senter/model differ
|
|
tagger/cfg
CHANGED
@@ -36,5 +36,6 @@
|
|
36 |
"VE",
|
37 |
"VV",
|
38 |
"X"
|
39 |
-
]
|
|
|
40 |
}
|
|
|
36 |
"VE",
|
37 |
"VV",
|
38 |
"X"
|
39 |
+
],
|
40 |
+
"overwrite":false
|
41 |
}
|
tagger/model
CHANGED
Binary files a/tagger/model and b/tagger/model differ
|
|
tok2vec/model
CHANGED
Binary files a/tok2vec/model and b/tok2vec/model differ
|
|
vocab/vectors.cfg
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"mode":"default"
|
3 |
+
}
|
zh_core_web_sm-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0629b5fe5fc8979fa895be4956363e7721e040908b0dd4c9ed469ad7309dd5cf
|
3 |
+
size 49466491
|