Zlovoblachko commited on
Commit
99b3e04
1 Parent(s): ec143d0

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -1,20 +1,20 @@
1
  ---
2
  tags:
3
  - spacy
 
4
  language:
5
  - en
6
  model-index:
7
  - name: en_pipeline
8
  results: []
9
- pipeline_tag: token-classification
10
  ---
11
  | Feature | Description |
12
  | --- | --- |
13
  | **Name** | `en_pipeline` |
14
  | **Version** | `0.0.0` |
15
- | **spaCy** | `>=3.4.4,<3.5.0` |
16
- | **Default Pipeline** | `transformer`, `spancat` |
17
- | **Components** | `transformer`, `spancat` |
18
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
19
  | **Sources** | n/a |
20
  | **License** | n/a |
@@ -24,11 +24,11 @@ pipeline_tag: token-classification
24
 
25
  <details>
26
 
27
- <summary>View label scheme (5 labels for 1 components)</summary>
28
 
29
  | Component | Labels |
30
  | --- | --- |
31
- | **`spancat`** | `Copying expression`, `Word form transmission`, `Transliteration`, `Synonyms`, `Tense semantics` |
32
 
33
  </details>
34
 
@@ -36,8 +36,13 @@ pipeline_tag: token-classification
36
 
37
  | Type | Score |
38
  | --- | --- |
39
- | `SPANS_SC_F` | 83.50 |
40
- | `SPANS_SC_P` | 91.77 |
41
- | `SPANS_SC_R` | 76.60 |
42
- | `TRANSFORMER_LOSS` | 3043.02 |
43
- | `SPANCAT_LOSS` | 132578.01 |
 
 
 
 
 
 
1
  ---
2
  tags:
3
  - spacy
4
+ - text-classification
5
  language:
6
  - en
7
  model-index:
8
  - name: en_pipeline
9
  results: []
 
10
  ---
11
  | Feature | Description |
12
  | --- | --- |
13
  | **Name** | `en_pipeline` |
14
  | **Version** | `0.0.0` |
15
+ | **spaCy** | `>=3.7.5,<3.8.0` |
16
+ | **Default Pipeline** | `transformer`, `textcat` |
17
+ | **Components** | `transformer`, `textcat` |
18
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
19
  | **Sources** | n/a |
20
  | **License** | n/a |
 
24
 
25
  <details>
26
 
27
+ <summary>View label scheme (2 labels for 1 components)</summary>
28
 
29
  | Component | Labels |
30
  | --- | --- |
31
+ | **`textcat`** | `0.0`, `1.0` |
32
 
33
  </details>
34
 
 
36
 
37
  | Type | Score |
38
  | --- | --- |
39
+ | `CATS_SCORE` | 92.75 |
40
+ | `CATS_MICRO_P` | 92.75 |
41
+ | `CATS_MICRO_R` | 92.75 |
42
+ | `CATS_MICRO_F` | 92.75 |
43
+ | `CATS_MACRO_P` | 92.74 |
44
+ | `CATS_MACRO_R` | 92.76 |
45
+ | `CATS_MACRO_F` | 92.75 |
46
+ | `CATS_MACRO_AUC` | 97.20 |
47
+ | `TRANSFORMER_LOSS` | 26.49 |
48
+ | `TEXTCAT_LOSS` | 4617.23 |
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = "/content/train_new.spacy"
3
- dev = "/content/dev_new.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
@@ -10,45 +10,33 @@ seed = 0
10
 
11
  [nlp]
12
  lang = "en"
13
- pipeline = ["transformer","spancat"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
17
  after_creation = null
18
  after_pipeline_creation = null
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 
20
 
21
  [components]
22
 
23
- [components.spancat]
24
- factory = "spancat"
25
- max_positive = null
26
- scorer = {"@scorers":"spacy.spancat_scorer.v1"}
27
- spans_key = "sc"
28
- threshold = 0.5
29
 
30
- [components.spancat.model]
31
- @architectures = "spacy.SpanCategorizer.v1"
32
-
33
- [components.spancat.model.reducer]
34
- @layers = "spacy.mean_max_reducer.v1"
35
- hidden_size = 128
36
-
37
- [components.spancat.model.scorer]
38
- @layers = "spacy.LinearLogistic.v1"
39
  nO = null
40
- nI = null
41
 
42
- [components.spancat.model.tok2vec]
43
  @architectures = "spacy-transformers.TransformerListener.v1"
44
  grad_factor = 1.0
45
  pooling = {"@layers":"reduce_mean.v1"}
46
  upstream = "*"
47
 
48
- [components.spancat.suggester]
49
- @misc = "spacy.ngram_suggester.v1"
50
- sizes = [1,2,3]
51
-
52
  [components.transformer]
53
  factory = "transformer"
54
  max_batch_items = 4096
@@ -97,12 +85,13 @@ seed = ${system.seed}
97
  gpu_allocator = ${system.gpu_allocator}
98
  dropout = 0.1
99
  patience = 1600
100
- max_epochs = 50
101
  max_steps = 20000
102
  eval_frequency = 200
103
  frozen_components = []
104
  annotating_components = []
105
  before_to_disk = null
 
106
 
107
  [training.batcher]
108
  @batchers = "spacy.batch_by_padded.v1"
@@ -112,13 +101,8 @@ buffer = 256
112
  get_length = null
113
 
114
  [training.logger]
115
- @loggers = "spacy.WandbLogger.v3"
116
- project_name = "2024_ouroboros"
117
- remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
118
- model_log_interval = 1000
119
- log_dataset_dir = null
120
- entity = null
121
- run_name = null
122
 
123
  [training.optimizer]
124
  @optimizers = "Adam.v1"
@@ -137,9 +121,16 @@ total_steps = 20000
137
  initial_rate = 0.00005
138
 
139
  [training.score_weights]
140
- spans_sc_f = 0.5
141
- spans_sc_p = 0.5
142
- spans_sc_r = 0.0
 
 
 
 
 
 
 
143
 
144
  [pretraining]
145
 
 
1
  [paths]
2
+ train = "/content/train.spacy"
3
+ dev = "/content/dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
 
10
 
11
  [nlp]
12
  lang = "en"
13
+ pipeline = ["transformer","textcat"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
17
  after_creation = null
18
  after_pipeline_creation = null
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
24
+ [components.textcat]
25
+ factory = "textcat"
26
+ scorer = {"@scorers":"spacy.textcat_scorer.v2"}
27
+ threshold = 0.0
 
 
28
 
29
+ [components.textcat.model]
30
+ @architectures = "spacy.TextCatCNN.v2"
31
+ exclusive_classes = true
 
 
 
 
 
 
32
  nO = null
 
33
 
34
+ [components.textcat.model.tok2vec]
35
  @architectures = "spacy-transformers.TransformerListener.v1"
36
  grad_factor = 1.0
37
  pooling = {"@layers":"reduce_mean.v1"}
38
  upstream = "*"
39
 
 
 
 
 
40
  [components.transformer]
41
  factory = "transformer"
42
  max_batch_items = 4096
 
85
  gpu_allocator = ${system.gpu_allocator}
86
  dropout = 0.1
87
  patience = 1600
88
+ max_epochs = 0
89
  max_steps = 20000
90
  eval_frequency = 200
91
  frozen_components = []
92
  annotating_components = []
93
  before_to_disk = null
94
+ before_update = null
95
 
96
  [training.batcher]
97
  @batchers = "spacy.batch_by_padded.v1"
 
101
  get_length = null
102
 
103
  [training.logger]
104
+ @loggers = "spacy.ConsoleLogger.v1"
105
+ progress_bar = true
 
 
 
 
 
106
 
107
  [training.optimizer]
108
  @optimizers = "Adam.v1"
 
121
  initial_rate = 0.00005
122
 
123
  [training.score_weights]
124
+ cats_score = 1.0
125
+ cats_score_desc = null
126
+ cats_micro_p = null
127
+ cats_micro_r = null
128
+ cats_micro_f = null
129
+ cats_macro_p = null
130
+ cats_macro_r = null
131
+ cats_macro_f = null
132
+ cats_macro_auc = null
133
+ cats_f_per_type = null
134
 
135
  [pretraining]
136
 
en_pipeline-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3a3f343fe60e7963225577479bf831f3f26737b6e89c7d35c84b307c503edd4
3
- size 436354679
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc4b6d9fbc7f361daed941b6fe095316168d9b2e03e2f9052443da4667d93c7c
3
+ size 455652929
meta.json CHANGED
@@ -7,8 +7,8 @@
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.4.4,<3.5.0",
11
- "spacy_git_version":"Unknown",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -19,33 +19,48 @@
19
  "transformer":[
20
 
21
  ],
22
- "spancat":[
23
- "Copying expression",
24
- "Word form transmission",
25
- "Transliteration",
26
- "Synonyms",
27
- "Tense semantics"
28
  ]
29
  },
30
  "pipeline":[
31
  "transformer",
32
- "spancat"
33
  ],
34
  "components":[
35
  "transformer",
36
- "spancat"
37
  ],
38
  "disabled":[
39
 
40
  ],
41
  "performance":{
42
- "spans_sc_f":0.8350021768,
43
- "spans_sc_p":0.9177033493,
44
- "spans_sc_r":0.7659744409,
45
- "transformer_loss":30.4302067425,
46
- "spancat_loss":1325.7801238073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  },
48
  "requirements":[
49
- "spacy-transformers>=1.2.1,<1.3.0"
50
  ]
51
  }
 
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.7.5,<3.8.0",
11
+ "spacy_git_version":"a6d0fc360",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
19
  "transformer":[
20
 
21
  ],
22
+ "textcat":[
23
+ "0.0",
24
+ "1.0"
 
 
 
25
  ]
26
  },
27
  "pipeline":[
28
  "transformer",
29
+ "textcat"
30
  ],
31
  "components":[
32
  "transformer",
33
+ "textcat"
34
  ],
35
  "disabled":[
36
 
37
  ],
38
  "performance":{
39
+ "cats_score":0.9274664714,
40
+ "cats_score_desc":"macro F",
41
+ "cats_micro_p":0.9275,
42
+ "cats_micro_r":0.9275,
43
+ "cats_micro_f":0.9275,
44
+ "cats_macro_p":0.9274025193,
45
+ "cats_macro_r":0.927568036,
46
+ "cats_macro_f":0.9274664714,
47
+ "cats_macro_auc":0.9719873134,
48
+ "cats_f_per_type":{
49
+ "0.0":{
50
+ "p":0.9331366765,
51
+ "r":0.9249512671,
52
+ "f":0.9290259422
53
+ },
54
+ "1.0":{
55
+ "p":0.9216683622,
56
+ "r":0.9301848049,
57
+ "f":0.9259070005
58
+ }
59
+ },
60
+ "transformer_loss":0.2649244879,
61
+ "textcat_loss":46.1722830437
62
  },
63
  "requirements":[
64
+ "spacy-transformers>=1.3.5,<1.4.0"
65
  ]
66
  }
textcat/cfg ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "0.0",
4
+ "1.0"
5
+ ],
6
+ "threshold":0.0,
7
+ "positive_label":null
8
+ }
textcat/model ADDED
Binary file (7.07 kB). View file
 
tokenizer CHANGED
The diff for this file is too large to render. See raw diff
 
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10c967cad2c0b922e1f14dcb5fb5fcb46f46ec79a33c8dd92e34a504fac1c5dc
3
- size 502031113
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f97d21817132eac87779f4acf8dacc5a89b07a27d89bd5d6ee5d78efedad0d9f
3
+ size 502025449
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff