scfengv commited on
Commit
074114b
1 Parent(s): 71bac5c

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ textcat_multilabel/model filter=lfs diff=lfs merge=lfs -text
37
+ transformer/model filter=lfs diff=lfs merge=lfs -text
38
+ zh_Overall_Layer_Classifier-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - text-classification
5
+ language:
6
+ - zh
7
+ model-index:
8
+ - name: zh_Overall_Layer_Classifier
9
+ results: []
10
+ ---
11
+ | Feature | Description |
12
+ | --- | --- |
13
+ | **Name** | `zh_Overall_Layer_Classifier` |
14
+ | **Version** | `0.0.0` |
15
+ | **spaCy** | `>=3.6.1,<3.7.0` |
16
+ | **Default Pipeline** | `transformer`, `textcat_multilabel` |
17
+ | **Components** | `transformer`, `textcat_multilabel` |
18
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
19
+ | **Sources** | n/a |
20
+ | **License** | n/a |
21
+ | **Author** | [n/a]() |
22
+
23
+ ### Label Scheme
24
+
25
+ <details>
26
+
27
+ <summary>View label scheme (4 labels for 1 components)</summary>
28
+
29
+ | Component | Labels |
30
+ | --- | --- |
31
+ | **`textcat_multilabel`** | `比賽`, `加油`, `轉播`, `閒聊` |
32
+
33
+ </details>
34
+
35
+ ### Accuracy
36
+
37
+ | Type | Score |
38
+ | --- | --- |
39
+ | `CATS_SCORE` | 99.55 |
40
+ | `CATS_MICRO_P` | 97.83 |
41
+ | `CATS_MICRO_R` | 96.92 |
42
+ | `CATS_MICRO_F` | 97.37 |
43
+ | `CATS_MACRO_P` | 97.85 |
44
+ | `CATS_MACRO_R` | 96.77 |
45
+ | `CATS_MACRO_F` | 97.28 |
46
+ | `CATS_MACRO_AUC` | 99.55 |
47
+ | `TRANSFORMER_LOSS` | 4021.60 |
48
+ | `TEXTCAT_MULTILABEL_LOSS` | 930.96 |
config.cfg ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "./data/train.spacy"
3
+ dev = "./data/valid.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = "pytorch"
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "zh"
13
+ pipeline = ["transformer","textcat_multilabel"]
14
+ batch_size = 128
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+
20
+ [nlp.tokenizer]
21
+ @tokenizers = "spacy.zh.ChineseTokenizer"
22
+ segmenter = "char"
23
+
24
+ [components]
25
+
26
+ [components.textcat_multilabel]
27
+ factory = "textcat_multilabel"
28
+ scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v2"}
29
+ threshold = 0.5
30
+
31
+ [components.textcat_multilabel.model]
32
+ @architectures = "spacy.TextCatEnsemble.v2"
33
+ nO = null
34
+
35
+ [components.textcat_multilabel.model.linear_model]
36
+ @architectures = "spacy.TextCatBOW.v2"
37
+ exclusive_classes = false
38
+ ngram_size = 1
39
+ no_output_layer = false
40
+ nO = null
41
+
42
+ [components.textcat_multilabel.model.tok2vec]
43
+ @architectures = "spacy-transformers.TransformerListener.v1"
44
+ grad_factor = 1.0
45
+ pooling = {"@layers":"reduce_mean.v1"}
46
+ upstream = "*"
47
+
48
+ [components.transformer]
49
+ factory = "transformer"
50
+ max_batch_items = 4096
51
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
52
+
53
+ [components.transformer.model]
54
+ @architectures = "spacy-transformers.TransformerModel.v3"
55
+ name = "bert-base-chinese"
56
+ mixed_precision = false
57
+
58
+ [components.transformer.model.get_spans]
59
+ @span_getters = "spacy-transformers.strided_spans.v1"
60
+ window = 128
61
+ stride = 96
62
+
63
+ [components.transformer.model.grad_scaler_config]
64
+
65
+ [components.transformer.model.tokenizer_config]
66
+ use_fast = true
67
+
68
+ [components.transformer.model.transformer_config]
69
+
70
+ [corpora]
71
+
72
+ [corpora.dev]
73
+ @readers = "spacy.Corpus.v1"
74
+ path = ${paths.dev}
75
+ max_length = 0
76
+ gold_preproc = false
77
+ limit = 0
78
+ augmenter = null
79
+
80
+ [corpora.train]
81
+ @readers = "spacy.Corpus.v1"
82
+ path = ${paths.train}
83
+ max_length = 0
84
+ gold_preproc = false
85
+ limit = 0
86
+ augmenter = null
87
+
88
+ [training]
89
+ accumulate_gradient = 3
90
+ dev_corpus = "corpora.dev"
91
+ train_corpus = "corpora.train"
92
+ seed = ${system.seed}
93
+ gpu_allocator = ${system.gpu_allocator}
94
+ dropout = 0.1
95
+ patience = 1600
96
+ max_epochs = 0
97
+ max_steps = 20000
98
+ eval_frequency = 200
99
+ frozen_components = []
100
+ annotating_components = []
101
+ before_to_disk = null
102
+ before_update = null
103
+
104
+ [training.batcher]
105
+ @batchers = "spacy.batch_by_padded.v1"
106
+ discard_oversize = true
107
+ size = 2000
108
+ buffer = 256
109
+ get_length = null
110
+
111
+ [training.logger]
112
+ @loggers = "spacy.ConsoleLogger.v1"
113
+ progress_bar = false
114
+
115
+ [training.optimizer]
116
+ @optimizers = "Adam.v1"
117
+ beta1 = 0.9
118
+ beta2 = 0.999
119
+ L2_is_weight_decay = true
120
+ L2 = 0.01
121
+ grad_clip = 1.0
122
+ use_averages = false
123
+ eps = 0.00000001
124
+
125
+ [training.optimizer.learn_rate]
126
+ @schedules = "warmup_linear.v1"
127
+ warmup_steps = 250
128
+ total_steps = 20000
129
+ initial_rate = 0.00005
130
+
131
+ [training.score_weights]
132
+ cats_score = 1.0
133
+ cats_score_desc = null
134
+ cats_micro_p = null
135
+ cats_micro_r = null
136
+ cats_micro_f = null
137
+ cats_macro_p = null
138
+ cats_macro_r = null
139
+ cats_macro_f = null
140
+ cats_macro_auc = null
141
+ cats_f_per_type = null
142
+
143
+ [pretraining]
144
+
145
+ [initialize]
146
+ vectors = ${paths.vectors}
147
+ init_tok2vec = ${paths.init_tok2vec}
148
+ vocab_data = null
149
+ lookups = null
150
+ before_init = null
151
+ after_init = null
152
+
153
+ [initialize.components]
154
+
155
+ [initialize.tokenizer]
156
+ pkuseg_model = null
157
+ pkuseg_user_dict = "default"
meta.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"zh",
3
+ "name":"Overall_Layer_Classifier",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.6.1,<3.7.0",
11
+ "spacy_git_version":"458bc5f45",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "transformer":[
20
+
21
+ ],
22
+ "textcat_multilabel":[
23
+ "\u6bd4\u8cfd",
24
+ "\u52a0\u6cb9",
25
+ "\u8f49\u64ad",
26
+ "\u9592\u804a"
27
+ ]
28
+ },
29
+ "pipeline":[
30
+ "transformer",
31
+ "textcat_multilabel"
32
+ ],
33
+ "components":[
34
+ "transformer",
35
+ "textcat_multilabel"
36
+ ],
37
+ "disabled":[
38
+
39
+ ],
40
+ "performance":{
41
+ "cats_score":0.9955167645,
42
+ "cats_score_desc":"macro AUC",
43
+ "cats_micro_p":0.978286011,
44
+ "cats_micro_r":0.9691818458,
45
+ "cats_micro_f":0.973712648,
46
+ "cats_macro_p":0.9784797905,
47
+ "cats_macro_r":0.9677062023,
48
+ "cats_macro_f":0.9728313633,
49
+ "cats_macro_auc":0.9955167645,
50
+ "cats_f_per_type":{
51
+ "\u6bd4\u8cfd":{
52
+ "p":0.9677419355,
53
+ "r":0.9914762328,
54
+ "f":0.9794653235
55
+ },
56
+ "\u52a0\u6cb9":{
57
+ "p":0.9884738088,
58
+ "r":0.9394561815,
59
+ "f":0.9633418585
60
+ },
61
+ "\u8f49\u64ad":{
62
+ "p":0.9693795326,
63
+ "r":0.9820408163,
64
+ "f":0.9756690998
65
+ },
66
+ "\u9592\u804a":{
67
+ "p":0.9883238853,
68
+ "r":0.9578515785,
69
+ "f":0.9728491713
70
+ }
71
+ },
72
+ "transformer_loss":40.2160341321,
73
+ "textcat_multilabel_loss":9.3095552224
74
+ },
75
+ "requirements":[
76
+ "spacy-transformers>=1.2.5,<1.3.0"
77
+ ]
78
+ }
textcat_multilabel/cfg ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "\u6bd4\u8cfd",
4
+ "\u52a0\u6cb9",
5
+ "\u8f49\u64ad",
6
+ "\u9592\u804a"
7
+ ],
8
+ "threshold":0.5
9
+ }
textcat_multilabel/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d77b22bcf400cf35703cb3e18322617a16f2c02ebfabc6cc6be4ce2e701385
3
+ size 11305871
tokenizer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "segmenter":"char"
3
+ }
transformer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "max_batch_items":4096
3
+ }
transformer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d8abca024a3ed0b9fe2b2c8f985ee6f5d19538f58066303cca6ba8c7066015
3
+ size 409704459
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }
zh_Overall_Layer_Classifier-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf2d62746dfe12a7278c1423dbf3e6b0d0b63352485d10ecbc31c188faac5148
3
+ size 386871385