mirfan899 commited on
Commit
e25080a
1 Parent(s): cd99158

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  textcat/model filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  textcat/model filter=lfs diff=lfs merge=lfs -text
36
+ da_spacy_sentiment-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
37
+ tok2vec/model filter=lfs diff=lfs merge=lfs -text
38
+ vocab/key2row filter=lfs diff=lfs merge=lfs -text
39
+ vocab/vectors filter=lfs diff=lfs merge=lfs -text
config.cfg CHANGED
@@ -1,7 +1,7 @@
1
  [paths]
2
  train = "./corpus/train.sentiment.spacy"
3
  dev = "./corpus/dev.sentiment.spacy"
4
- vectors = null
5
  init_tok2vec = null
6
 
7
  [system]
@@ -10,7 +10,7 @@ seed = 0
10
 
11
  [nlp]
12
  lang = "da"
13
- pipeline = ["textcat"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
@@ -26,12 +26,41 @@ scorer = {"@scorers":"spacy.textcat_scorer.v2"}
26
  threshold = 0.0
27
 
28
  [components.textcat.model]
 
 
 
 
29
  @architectures = "spacy.TextCatBOW.v2"
30
  exclusive_classes = true
31
  ngram_size = 1
32
  no_output_layer = false
33
  nO = null
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  [corpora]
36
 
37
  [corpora.dev]
 
1
  [paths]
2
  train = "./corpus/train.sentiment.spacy"
3
  dev = "./corpus/dev.sentiment.spacy"
4
+ vectors = "da_core_news_md"
5
  init_tok2vec = null
6
 
7
  [system]
 
10
 
11
  [nlp]
12
  lang = "da"
13
+ pipeline = ["tok2vec","textcat"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
 
26
  threshold = 0.0
27
 
28
  [components.textcat.model]
29
+ @architectures = "spacy.TextCatEnsemble.v2"
30
+ nO = null
31
+
32
+ [components.textcat.model.linear_model]
33
  @architectures = "spacy.TextCatBOW.v2"
34
  exclusive_classes = true
35
  ngram_size = 1
36
  no_output_layer = false
37
  nO = null
38
 
39
+ [components.textcat.model.tok2vec]
40
+ @architectures = "spacy.Tok2VecListener.v1"
41
+ width = ${components.tok2vec.model.encode.width}
42
+ upstream = "*"
43
+
44
+ [components.tok2vec]
45
+ factory = "tok2vec"
46
+
47
+ [components.tok2vec.model]
48
+ @architectures = "spacy.Tok2Vec.v2"
49
+
50
+ [components.tok2vec.model.embed]
51
+ @architectures = "spacy.MultiHashEmbed.v2"
52
+ width = ${components.tok2vec.model.encode.width}
53
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
54
+ rows = [5000,1000,2500,2500]
55
+ include_static_vectors = true
56
+
57
+ [components.tok2vec.model.encode]
58
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
59
+ width = 256
60
+ depth = 8
61
+ window_size = 1
62
+ maxout_pieces = 3
63
+
64
  [corpora]
65
 
66
  [corpora.dev]
da_spacy_sentiment-any-py3-none-any.whl CHANGED
Binary files a/da_spacy_sentiment-any-py3-none-any.whl and b/da_spacy_sentiment-any-py3-none-any.whl differ
 
meta.json CHANGED
@@ -10,12 +10,15 @@
10
  "spacy_version":">=3.5.1,<3.6.0",
11
  "spacy_git_version":"Unknown",
12
  "vectors":{
13
- "width":0,
14
- "vectors":0,
15
- "keys":0,
16
- "name":null
17
  },
18
  "labels":{
 
 
 
19
  "textcat":[
20
  "neutral",
21
  "negative",
@@ -23,42 +26,45 @@
23
  ]
24
  },
25
  "pipeline":[
 
26
  "textcat"
27
  ],
28
  "components":[
 
29
  "textcat"
30
  ],
31
  "disabled":[
32
 
33
  ],
34
  "performance":{
35
- "cats_score":0.4644417144,
36
  "cats_score_desc":"macro F",
37
- "cats_micro_p":0.7301587302,
38
- "cats_micro_r":0.7301587302,
39
- "cats_micro_f":0.7301587302,
40
- "cats_macro_p":0.5166357238,
41
- "cats_macro_r":0.4431664354,
42
- "cats_macro_f":0.4644417144,
43
- "cats_macro_auc":0.6167244706,
44
  "cats_f_per_type":{
45
  "neutral":{
46
- "p":0.7990654206,
47
- "r":0.8860103627,
48
- "f":0.8402948403
49
  },
50
  "negative":{
51
- "p":0.4545454545,
52
- "r":0.2272727273,
53
- "f":0.303030303
54
  },
55
  "positive":{
56
- "p":0.2962962963,
57
- "r":0.2162162162,
58
- "f":0.25
59
  }
60
  },
61
- "textcat_loss":7.6585407546
 
62
  },
63
  "requirements":[
64
 
 
10
  "spacy_version":">=3.5.1,<3.6.0",
11
  "spacy_git_version":"Unknown",
12
  "vectors":{
13
+ "width":300,
14
+ "vectors":20000,
15
+ "keys":500000,
16
+ "name":"da_vectors"
17
  },
18
  "labels":{
19
+ "tok2vec":[
20
+
21
+ ],
22
  "textcat":[
23
  "neutral",
24
  "negative",
 
26
  ]
27
  },
28
  "pipeline":[
29
+ "tok2vec",
30
  "textcat"
31
  ],
32
  "components":[
33
+ "tok2vec",
34
  "textcat"
35
  ],
36
  "disabled":[
37
 
38
  ],
39
  "performance":{
40
+ "cats_score":0.4935672515,
41
  "cats_score_desc":"macro F",
42
+ "cats_micro_p":0.6904761905,
43
+ "cats_micro_r":0.6904761905,
44
+ "cats_micro_f":0.6904761905,
45
+ "cats_macro_p":0.4864024864,
46
+ "cats_macro_r":0.502577943,
47
+ "cats_macro_f":0.4935672515,
48
+ "cats_macro_auc":0.6640492205,
49
  "cats_f_per_type":{
50
  "neutral":{
51
+ "p":0.8181818182,
52
+ "r":0.792746114,
53
+ "f":0.8052631579
54
  },
55
  "negative":{
56
+ "p":0.3076923077,
57
+ "r":0.3636363636,
58
+ "f":0.3333333333
59
  },
60
  "positive":{
61
+ "p":0.3333333333,
62
+ "r":0.3513513514,
63
+ "f":0.3421052632
64
  }
65
  },
66
+ "tok2vec_loss":51.8325935973,
67
+ "textcat_loss":16.7191163995
68
  },
69
  "requirements":[
70
 
textcat/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f757344345aaa840842e42feb51d88d09743dbd845898f861c7269037720bfae
3
- size 3146475
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:751028d05294d492933266f6e518ed982b7051a96b4f63820ddf58d7d196d0c1
3
+ size 3944012
tok2vec/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
tok2vec/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4285eced09bf237b2cf427339a9b2dc379b0963bba52b6256d458e076eb0dc
3
+ size 34434008
vocab/key2row CHANGED
@@ -1 +1,3 @@
1
-
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b9f9abc60252769aa9a64072337af1f81f0f15d568dd09c8bdad03789ce961
3
+ size 5996984
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff
 
vocab/vectors CHANGED
Binary files a/vocab/vectors and b/vocab/vectors differ