adrianeboyd
commited on
Commit
•
500cb7d
1
Parent(s):
02cc4ae
Update spaCy pipeline
Browse files- README.md +2 -2
- accuracy.json +6 -6
- config.cfg +2 -0
- fi_core_news_lg-any-py3-none-any.whl +2 -2
- meta.json +9 -9
- morphologizer/cfg +1 -0
- tagger/cfg +1 -0
README.md
CHANGED
@@ -78,8 +78,8 @@ Finnish pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer,
|
|
78 |
| Feature | Description |
|
79 |
| --- | --- |
|
80 |
| **Name** | `fi_core_news_lg` |
|
81 |
-
| **Version** | `3.
|
82 |
-
| **spaCy** | `>=3.
|
83 |
| **Default Pipeline** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `attribute_ruler`, `ner` |
|
84 |
| **Components** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `senter`, `attribute_ruler`, `ner` |
|
85 |
| **Vectors** | floret (200000, 300) |
|
|
|
78 |
| Feature | Description |
|
79 |
| --- | --- |
|
80 |
| **Name** | `fi_core_news_lg` |
|
81 |
+
| **Version** | `3.6.0` |
|
82 |
+
| **spaCy** | `>=3.6.0,<3.7.0` |
|
83 |
| **Default Pipeline** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `attribute_ruler`, `ner` |
|
84 |
| **Components** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `senter`, `attribute_ruler`, `ner` |
|
85 |
| **Vectors** | floret (200000, 300) |
|
accuracy.json
CHANGED
@@ -378,11 +378,6 @@
|
|
378 |
"r": 0.8258196721,
|
379 |
"f": 0.83436853
|
380 |
},
|
381 |
-
"WORK_OF_ART": {
|
382 |
-
"p": 0.6363636364,
|
383 |
-
"r": 0.375,
|
384 |
-
"f": 0.4719101124
|
385 |
-
},
|
386 |
"CARDINAL": {
|
387 |
"p": 0.9365079365,
|
388 |
"r": 0.9407744875,
|
@@ -393,6 +388,11 @@
|
|
393 |
"r": 0.7875288684,
|
394 |
"f": 0.7830080367
|
395 |
},
|
|
|
|
|
|
|
|
|
|
|
396 |
"PRODUCT": {
|
397 |
"p": 0.7095435685,
|
398 |
"r": 0.6951219512,
|
@@ -454,5 +454,5 @@
|
|
454 |
"f": 0.8275862069
|
455 |
}
|
456 |
},
|
457 |
-
"speed":
|
458 |
}
|
|
|
378 |
"r": 0.8258196721,
|
379 |
"f": 0.83436853
|
380 |
},
|
|
|
|
|
|
|
|
|
|
|
381 |
"CARDINAL": {
|
382 |
"p": 0.9365079365,
|
383 |
"r": 0.9407744875,
|
|
|
388 |
"r": 0.7875288684,
|
389 |
"f": 0.7830080367
|
390 |
},
|
391 |
+
"WORK_OF_ART": {
|
392 |
+
"p": 0.6363636364,
|
393 |
+
"r": 0.375,
|
394 |
+
"f": 0.4719101124
|
395 |
+
},
|
396 |
"PRODUCT": {
|
397 |
"p": 0.7095435685,
|
398 |
"r": 0.6951219512,
|
|
|
454 |
"f": 0.8275862069
|
455 |
}
|
456 |
},
|
457 |
+
"speed": 7690.3240081011
|
458 |
}
|
config.cfg
CHANGED
@@ -46,6 +46,7 @@ upstream = "tok2vec"
|
|
46 |
[components.morphologizer]
|
47 |
factory = "morphologizer"
|
48 |
extend = false
|
|
|
49 |
overwrite = true
|
50 |
scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
|
51 |
|
@@ -143,6 +144,7 @@ maxout_pieces = 2
|
|
143 |
|
144 |
[components.tagger]
|
145 |
factory = "tagger"
|
|
|
146 |
neg_prefix = "!"
|
147 |
overwrite = false
|
148 |
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
|
|
46 |
[components.morphologizer]
|
47 |
factory = "morphologizer"
|
48 |
extend = false
|
49 |
+
label_smoothing = 0.0
|
50 |
overwrite = true
|
51 |
scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
|
52 |
|
|
|
144 |
|
145 |
[components.tagger]
|
146 |
factory = "tagger"
|
147 |
+
label_smoothing = 0.0
|
148 |
neg_prefix = "!"
|
149 |
overwrite = false
|
150 |
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
fi_core_news_lg-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f9ea84fc160d85ce185f664b099d49b909d7cf620af52a436a0290a920f7e85
|
3 |
+
size 230768256
|
meta.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"lang":"fi",
|
3 |
"name":"core_news_lg",
|
4 |
-
"version":"3.
|
5 |
"description":"Finnish pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.",
|
6 |
"author":"Explosion",
|
7 |
"email":"contact@explosion.ai",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"CC BY-SA 4.0",
|
10 |
-
"spacy_version":">=3.
|
11 |
-
"spacy_git_version":"
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":200000,
|
@@ -2578,11 +2578,6 @@
|
|
2578 |
"r":0.8258196721,
|
2579 |
"f":0.83436853
|
2580 |
},
|
2581 |
-
"WORK_OF_ART":{
|
2582 |
-
"p":0.6363636364,
|
2583 |
-
"r":0.375,
|
2584 |
-
"f":0.4719101124
|
2585 |
-
},
|
2586 |
"CARDINAL":{
|
2587 |
"p":0.9365079365,
|
2588 |
"r":0.9407744875,
|
@@ -2593,6 +2588,11 @@
|
|
2593 |
"r":0.7875288684,
|
2594 |
"f":0.7830080367
|
2595 |
},
|
|
|
|
|
|
|
|
|
|
|
2596 |
"PRODUCT":{
|
2597 |
"p":0.7095435685,
|
2598 |
"r":0.6951219512,
|
@@ -2654,7 +2654,7 @@
|
|
2654 |
"f":0.8275862069
|
2655 |
}
|
2656 |
},
|
2657 |
-
"speed":
|
2658 |
},
|
2659 |
"sources":[
|
2660 |
{
|
|
|
1 |
{
|
2 |
"lang":"fi",
|
3 |
"name":"core_news_lg",
|
4 |
+
"version":"3.6.0",
|
5 |
"description":"Finnish pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.",
|
6 |
"author":"Explosion",
|
7 |
"email":"contact@explosion.ai",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"CC BY-SA 4.0",
|
10 |
+
"spacy_version":">=3.6.0,<3.7.0",
|
11 |
+
"spacy_git_version":"cb4fdc83e",
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":200000,
|
|
|
2578 |
"r":0.8258196721,
|
2579 |
"f":0.83436853
|
2580 |
},
|
|
|
|
|
|
|
|
|
|
|
2581 |
"CARDINAL":{
|
2582 |
"p":0.9365079365,
|
2583 |
"r":0.9407744875,
|
|
|
2588 |
"r":0.7875288684,
|
2589 |
"f":0.7830080367
|
2590 |
},
|
2591 |
+
"WORK_OF_ART":{
|
2592 |
+
"p":0.6363636364,
|
2593 |
+
"r":0.375,
|
2594 |
+
"f":0.4719101124
|
2595 |
+
},
|
2596 |
"PRODUCT":{
|
2597 |
"p":0.7095435685,
|
2598 |
"r":0.6951219512,
|
|
|
2654 |
"f":0.8275862069
|
2655 |
}
|
2656 |
},
|
2657 |
+
"speed":7690.3240081011
|
2658 |
},
|
2659 |
"sources":[
|
2660 |
{
|
morphologizer/cfg
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
{
|
2 |
"extend":false,
|
|
|
3 |
"labels_morph":{
|
4 |
"Case=Nom|Number=Sing|POS=NOUN":"Case=Nom|Number=Sing",
|
5 |
"NumType=Ord|POS=ADJ":"NumType=Ord",
|
|
|
1 |
{
|
2 |
"extend":false,
|
3 |
+
"label_smoothing":0.0,
|
4 |
"labels_morph":{
|
5 |
"Case=Nom|Number=Sing|POS=NOUN":"Case=Nom|Number=Sing",
|
6 |
"NumType=Ord|POS=ADJ":"NumType=Ord",
|
tagger/cfg
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"labels":[
|
3 |
"A",
|
4 |
"Adj",
|
|
|
1 |
{
|
2 |
+
"label_smoothing":0.0,
|
3 |
"labels":[
|
4 |
"A",
|
5 |
"Adj",
|