RaThorat commited on
Commit
2034e52
·
1 Parent(s): 48b16d1

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ textcat/model filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - text-classification
5
+ language:
6
+ - nl
7
+ model-index:
8
+ - name: nl_meningen
9
+ results: []
10
+ ---
11
+ | Feature | Description |
12
+ | --- | --- |
13
+ | **Name** | `nl_meningen` |
14
+ | **Version** | `0.0.0` |
15
+ | **spaCy** | `>=3.4.3,<3.5.0` |
16
+ | **Default Pipeline** | `textcat` |
17
+ | **Components** | `textcat` |
18
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
19
+ | **Sources** | n/a |
20
+ | **License** | n/a |
21
+ | **Author** | [n/a]() |
22
+
23
+ ### Label Scheme
24
+
25
+ <details>
26
+
27
+ <summary>View label scheme (2 labels for 1 components)</summary>
28
+
29
+ | Component | Labels |
30
+ | --- | --- |
31
+ | **`textcat`** | `Positief`, `Negatief` |
32
+
33
+ </details>
34
+
35
+ ### Accuracy
36
+
37
+ | Type | Score |
38
+ | --- | --- |
39
+ | `CATS_SCORE` | 96.54 |
40
+ | `CATS_MICRO_P` | 96.77 |
41
+ | `CATS_MICRO_R` | 96.77 |
42
+ | `CATS_MICRO_F` | 96.77 |
43
+ | `CATS_MACRO_P` | 95.83 |
44
+ | `CATS_MACRO_R` | 97.50 |
45
+ | `CATS_MACRO_F` | 96.54 |
46
+ | `CATS_MACRO_AUC` | 98.18 |
47
+ | `CATS_MACRO_AUC_PER_TYPE` | 0.00 |
48
+ | `TEXTCAT_LOSS` | 8.31 |
config.cfg ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "/home/gebruiker/anaconda3/envs/corpus/train.spacy"
3
+ dev = "/home/gebruiker/anaconda3/envs/corpus/dev.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "nl"
13
+ pipeline = ["textcat"]
14
+ batch_size = 1000
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.textcat]
24
+ factory = "textcat"
25
+ scorer = {"@scorers":"spacy.textcat_scorer.v1"}
26
+ threshold = 0.5
27
+
28
+ [components.textcat.model]
29
+ @architectures = "spacy.TextCatBOW.v2"
30
+ exclusive_classes = true
31
+ ngram_size = 1
32
+ no_output_layer = false
33
+ nO = null
34
+
35
+ [corpora]
36
+
37
+ [corpora.dev]
38
+ @readers = "spacy.Corpus.v1"
39
+ path = ${paths.dev}
40
+ max_length = 0
41
+ gold_preproc = false
42
+ limit = 0
43
+ augmenter = null
44
+
45
+ [corpora.train]
46
+ @readers = "spacy.Corpus.v1"
47
+ path = ${paths.train}
48
+ max_length = 0
49
+ gold_preproc = false
50
+ limit = 0
51
+ augmenter = null
52
+
53
+ [training]
54
+ dev_corpus = "corpora.dev"
55
+ train_corpus = "corpora.train"
56
+ seed = ${system.seed}
57
+ gpu_allocator = ${system.gpu_allocator}
58
+ dropout = 0.1
59
+ accumulate_gradient = 1
60
+ patience = 1600
61
+ max_epochs = 0
62
+ max_steps = 20000
63
+ eval_frequency = 200
64
+ frozen_components = []
65
+ annotating_components = []
66
+ before_to_disk = null
67
+
68
+ [training.batcher]
69
+ @batchers = "spacy.batch_by_words.v1"
70
+ discard_oversize = false
71
+ tolerance = 0.2
72
+ get_length = null
73
+
74
+ [training.batcher.size]
75
+ @schedules = "compounding.v1"
76
+ start = 100
77
+ stop = 1000
78
+ compound = 1.001
79
+ t = 0.0
80
+
81
+ [training.logger]
82
+ @loggers = "spacy.ConsoleLogger.v1"
83
+ progress_bar = false
84
+
85
+ [training.optimizer]
86
+ @optimizers = "Adam.v1"
87
+ beta1 = 0.9
88
+ beta2 = 0.999
89
+ L2_is_weight_decay = true
90
+ L2 = 0.01
91
+ grad_clip = 1.0
92
+ use_averages = false
93
+ eps = 0.00000001
94
+ learn_rate = 0.001
95
+
96
+ [training.score_weights]
97
+ cats_score = 1.0
98
+ cats_score_desc = null
99
+ cats_micro_p = null
100
+ cats_micro_r = null
101
+ cats_micro_f = null
102
+ cats_macro_p = null
103
+ cats_macro_r = null
104
+ cats_macro_f = null
105
+ cats_macro_auc = null
106
+ cats_f_per_type = null
107
+ cats_macro_auc_per_type = null
108
+
109
+ [pretraining]
110
+
111
+ [initialize]
112
+ vectors = ${paths.vectors}
113
+ init_tok2vec = ${paths.init_tok2vec}
114
+ vocab_data = null
115
+ lookups = null
116
+ before_init = null
117
+ after_init = null
118
+
119
+ [initialize.components]
120
+
121
+ [initialize.components.textcat]
122
+
123
+ [initialize.components.textcat.labels]
124
+ @readers = "spacy.read_labels.v1"
125
+ path = "/home/gebruiker/anaconda3/envs/corpus/labels/textcat.json"
126
+ require = false
127
+
128
+ [initialize.tokenizer]
meta.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"nl",
3
+ "name":"meningen",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.4.3,<3.5.0",
11
+ "spacy_git_version":"Unknown",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "textcat":[
20
+ "Positief",
21
+ "Negatief"
22
+ ]
23
+ },
24
+ "pipeline":[
25
+ "textcat"
26
+ ],
27
+ "components":[
28
+ "textcat"
29
+ ],
30
+ "disabled":[
31
+
32
+ ],
33
+ "performance":{
34
+ "cats_score":0.9654403567,
35
+ "cats_score_desc":"macro F",
36
+ "cats_micro_p":0.9677419355,
37
+ "cats_micro_r":0.9677419355,
38
+ "cats_micro_f":0.9677419355,
39
+ "cats_macro_p":0.9583333333,
40
+ "cats_macro_r":0.975,
41
+ "cats_macro_f":0.9654403567,
42
+ "cats_macro_auc":0.9818181818,
43
+ "cats_f_per_type":{
44
+ "Positief":{
45
+ "p":1.0,
46
+ "r":0.95,
47
+ "f":0.9743589744
48
+ },
49
+ "Negatief":{
50
+ "p":0.9166666667,
51
+ "r":1.0,
52
+ "f":0.9565217391
53
+ }
54
+ },
55
+ "cats_macro_auc_per_type":0.0,
56
+ "textcat_loss":0.083148308
57
+ },
58
+ "requirements":[
59
+
60
+ ]
61
+ }
nl_meningen-any-py3-none-any.whl ADDED
Binary file (96.2 kB). View file
 
textcat/cfg ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "Positief",
4
+ "Negatief"
5
+ ],
6
+ "threshold":0.5,
7
+ "positive_label":null
8
+ }
textcat/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bd59f069d64ac38dc33589fc5b775b6a63425e283e7ec3e10b6e55651f3186
3
+ size 2097880
tokenizer ADDED
The diff for this file is too large to render. See raw diff
 
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }