Update spaCy pipeline
Browse files- .gitattributes +4 -0
- config.cfg +31 -2
- da_spacy_sentiment-any-py3-none-any.whl +0 -0
- meta.json +28 -22
- textcat/model +2 -2
- tok2vec/cfg +3 -0
- tok2vec/model +3 -0
- vocab/key2row +3 -1
- vocab/strings.json +0 -0
- vocab/vectors +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
textcat/model filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
textcat/model filter=lfs diff=lfs merge=lfs -text
|
36 |
+
da_spacy_sentiment-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
tok2vec/model filter=lfs diff=lfs merge=lfs -text
|
38 |
+
vocab/key2row filter=lfs diff=lfs merge=lfs -text
|
39 |
+
vocab/vectors filter=lfs diff=lfs merge=lfs -text
|
config.cfg
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
[paths]
|
2 |
train = "./corpus/train.sentiment.spacy"
|
3 |
dev = "./corpus/dev.sentiment.spacy"
|
4 |
-
vectors =
|
5 |
init_tok2vec = null
|
6 |
|
7 |
[system]
|
@@ -10,7 +10,7 @@ seed = 0
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "da"
|
13 |
-
pipeline = ["textcat"]
|
14 |
batch_size = 1000
|
15 |
disabled = []
|
16 |
before_creation = null
|
@@ -26,12 +26,41 @@ scorer = {"@scorers":"spacy.textcat_scorer.v2"}
|
|
26 |
threshold = 0.0
|
27 |
|
28 |
[components.textcat.model]
|
|
|
|
|
|
|
|
|
29 |
@architectures = "spacy.TextCatBOW.v2"
|
30 |
exclusive_classes = true
|
31 |
ngram_size = 1
|
32 |
no_output_layer = false
|
33 |
nO = null
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
[corpora]
|
36 |
|
37 |
[corpora.dev]
|
|
|
1 |
[paths]
|
2 |
train = "./corpus/train.sentiment.spacy"
|
3 |
dev = "./corpus/dev.sentiment.spacy"
|
4 |
+
vectors = "da_core_news_md"
|
5 |
init_tok2vec = null
|
6 |
|
7 |
[system]
|
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "da"
|
13 |
+
pipeline = ["tok2vec","textcat"]
|
14 |
batch_size = 1000
|
15 |
disabled = []
|
16 |
before_creation = null
|
|
|
26 |
threshold = 0.0
|
27 |
|
28 |
[components.textcat.model]
|
29 |
+
@architectures = "spacy.TextCatEnsemble.v2"
|
30 |
+
nO = null
|
31 |
+
|
32 |
+
[components.textcat.model.linear_model]
|
33 |
@architectures = "spacy.TextCatBOW.v2"
|
34 |
exclusive_classes = true
|
35 |
ngram_size = 1
|
36 |
no_output_layer = false
|
37 |
nO = null
|
38 |
|
39 |
+
[components.textcat.model.tok2vec]
|
40 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
41 |
+
width = ${components.tok2vec.model.encode.width}
|
42 |
+
upstream = "*"
|
43 |
+
|
44 |
+
[components.tok2vec]
|
45 |
+
factory = "tok2vec"
|
46 |
+
|
47 |
+
[components.tok2vec.model]
|
48 |
+
@architectures = "spacy.Tok2Vec.v2"
|
49 |
+
|
50 |
+
[components.tok2vec.model.embed]
|
51 |
+
@architectures = "spacy.MultiHashEmbed.v2"
|
52 |
+
width = ${components.tok2vec.model.encode.width}
|
53 |
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
|
54 |
+
rows = [5000,1000,2500,2500]
|
55 |
+
include_static_vectors = true
|
56 |
+
|
57 |
+
[components.tok2vec.model.encode]
|
58 |
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
59 |
+
width = 256
|
60 |
+
depth = 8
|
61 |
+
window_size = 1
|
62 |
+
maxout_pieces = 3
|
63 |
+
|
64 |
[corpora]
|
65 |
|
66 |
[corpora.dev]
|
da_spacy_sentiment-any-py3-none-any.whl
CHANGED
Binary files a/da_spacy_sentiment-any-py3-none-any.whl and b/da_spacy_sentiment-any-py3-none-any.whl differ
|
|
meta.json
CHANGED
@@ -10,12 +10,15 @@
|
|
10 |
"spacy_version":">=3.5.1,<3.6.0",
|
11 |
"spacy_git_version":"Unknown",
|
12 |
"vectors":{
|
13 |
-
"width":
|
14 |
-
"vectors":
|
15 |
-
"keys":
|
16 |
-
"name":
|
17 |
},
|
18 |
"labels":{
|
|
|
|
|
|
|
19 |
"textcat":[
|
20 |
"neutral",
|
21 |
"negative",
|
@@ -23,42 +26,45 @@
|
|
23 |
]
|
24 |
},
|
25 |
"pipeline":[
|
|
|
26 |
"textcat"
|
27 |
],
|
28 |
"components":[
|
|
|
29 |
"textcat"
|
30 |
],
|
31 |
"disabled":[
|
32 |
|
33 |
],
|
34 |
"performance":{
|
35 |
-
"cats_score":0.
|
36 |
"cats_score_desc":"macro F",
|
37 |
-
"cats_micro_p":0.
|
38 |
-
"cats_micro_r":0.
|
39 |
-
"cats_micro_f":0.
|
40 |
-
"cats_macro_p":0.
|
41 |
-
"cats_macro_r":0.
|
42 |
-
"cats_macro_f":0.
|
43 |
-
"cats_macro_auc":0.
|
44 |
"cats_f_per_type":{
|
45 |
"neutral":{
|
46 |
-
"p":0.
|
47 |
-
"r":0.
|
48 |
-
"f":0.
|
49 |
},
|
50 |
"negative":{
|
51 |
-
"p":0.
|
52 |
-
"r":0.
|
53 |
-
"f":0.
|
54 |
},
|
55 |
"positive":{
|
56 |
-
"p":0.
|
57 |
-
"r":0.
|
58 |
-
"f":0.
|
59 |
}
|
60 |
},
|
61 |
-
"
|
|
|
62 |
},
|
63 |
"requirements":[
|
64 |
|
|
|
10 |
"spacy_version":">=3.5.1,<3.6.0",
|
11 |
"spacy_git_version":"Unknown",
|
12 |
"vectors":{
|
13 |
+
"width":300,
|
14 |
+
"vectors":20000,
|
15 |
+
"keys":500000,
|
16 |
+
"name":"da_vectors"
|
17 |
},
|
18 |
"labels":{
|
19 |
+
"tok2vec":[
|
20 |
+
|
21 |
+
],
|
22 |
"textcat":[
|
23 |
"neutral",
|
24 |
"negative",
|
|
|
26 |
]
|
27 |
},
|
28 |
"pipeline":[
|
29 |
+
"tok2vec",
|
30 |
"textcat"
|
31 |
],
|
32 |
"components":[
|
33 |
+
"tok2vec",
|
34 |
"textcat"
|
35 |
],
|
36 |
"disabled":[
|
37 |
|
38 |
],
|
39 |
"performance":{
|
40 |
+
"cats_score":0.4935672515,
|
41 |
"cats_score_desc":"macro F",
|
42 |
+
"cats_micro_p":0.6904761905,
|
43 |
+
"cats_micro_r":0.6904761905,
|
44 |
+
"cats_micro_f":0.6904761905,
|
45 |
+
"cats_macro_p":0.4864024864,
|
46 |
+
"cats_macro_r":0.502577943,
|
47 |
+
"cats_macro_f":0.4935672515,
|
48 |
+
"cats_macro_auc":0.6640492205,
|
49 |
"cats_f_per_type":{
|
50 |
"neutral":{
|
51 |
+
"p":0.8181818182,
|
52 |
+
"r":0.792746114,
|
53 |
+
"f":0.8052631579
|
54 |
},
|
55 |
"negative":{
|
56 |
+
"p":0.3076923077,
|
57 |
+
"r":0.3636363636,
|
58 |
+
"f":0.3333333333
|
59 |
},
|
60 |
"positive":{
|
61 |
+
"p":0.3333333333,
|
62 |
+
"r":0.3513513514,
|
63 |
+
"f":0.3421052632
|
64 |
}
|
65 |
},
|
66 |
+
"tok2vec_loss":51.8325935973,
|
67 |
+
"textcat_loss":16.7191163995
|
68 |
},
|
69 |
"requirements":[
|
70 |
|
textcat/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:751028d05294d492933266f6e518ed982b7051a96b4f63820ddf58d7d196d0c1
|
3 |
+
size 3944012
|
tok2vec/cfg
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
|
3 |
+
}
|
tok2vec/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e4285eced09bf237b2cf427339a9b2dc379b0963bba52b6256d458e076eb0dc
|
3 |
+
size 34434008
|
vocab/key2row
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29b9f9abc60252769aa9a64072337af1f81f0f15d568dd09c8bdad03789ce961
|
3 |
+
size 5996984
|
vocab/strings.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
vocab/vectors
CHANGED
Binary files a/vocab/vectors and b/vocab/vectors differ
|
|