krotzz commited on
Commit
2195d80
1 Parent(s): bc6891a

Update spaCy pipeline

Browse files
Files changed (12) hide show
  1. README.md +11 -11
  2. config.cfg +146 -144
  3. en_pipeline-any-py3-none-any.whl +2 -2
  4. meta.json +28 -28
  5. ner/cfg +12 -12
  6. ner/model +0 -0
  7. ner/moves +1 -1
  8. transformer/model +1 -1
  9. vocab/key2row +2 -2
  10. vocab/strings.json +2 -2
  11. vocab/vectors +2 -2
  12. vocab/vectors.cfg +2 -2
README.md CHANGED
@@ -13,22 +13,22 @@ model-index:
13
  metrics:
14
  - name: NER Precision
15
  type: precision
16
- value: 0.9809885932
17
  - name: NER Recall
18
  type: recall
19
- value: 0.9662921348
20
  - name: NER F Score
21
  type: f_score
22
- value: 0.9735849057
23
  ---
24
  | Feature | Description |
25
  | --- | --- |
26
  | **Name** | `en_pipeline` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.7.5,<3.8.0` |
29
- | **Default Pipeline** | `tok2vec`, `ner` |
30
- | **Components** | `tok2vec`, `ner` |
31
- | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
34
  | **Author** | [n/a]() |
@@ -49,8 +49,8 @@ model-index:
49
 
50
  | Type | Score |
51
  | --- | --- |
52
- | `ENTS_F` | 97.36 |
53
- | `ENTS_P` | 98.10 |
54
- | `ENTS_R` | 96.63 |
55
- | `TOK2VEC_LOSS` | 4351.47 |
56
- | `NER_LOSS` | 53052.01 |
 
13
  metrics:
14
  - name: NER Precision
15
  type: precision
16
+ value: 0.9877300613
17
  - name: NER Recall
18
  type: recall
19
+ value: 0.9962871287
20
  - name: NER F Score
21
  type: f_score
22
+ value: 0.9919901417
23
  ---
24
  | Feature | Description |
25
  | --- | --- |
26
  | **Name** | `en_pipeline` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.7.5,<3.8.0` |
29
+ | **Default Pipeline** | `transformer`, `ner` |
30
+ | **Components** | `transformer`, `ner` |
31
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
34
  | **Author** | [n/a]() |
 
49
 
50
  | Type | Score |
51
  | --- | --- |
52
+ | `ENTS_F` | 99.20 |
53
+ | `ENTS_P` | 98.77 |
54
+ | `ENTS_R` | 99.63 |
55
+ | `TRANSFORMER_LOSS` | 36683.04 |
56
+ | `NER_LOSS` | 35818.80 |
config.cfg CHANGED
@@ -1,145 +1,147 @@
1
- [paths]
2
- train = "sit/train3.spacy"
3
- dev = "sit/train3.spacy"
4
- vectors = "en_core_web_lg"
5
- init_tok2vec = null
6
-
7
- [system]
8
- gpu_allocator = null
9
- seed = 0
10
-
11
- [nlp]
12
- lang = "en"
13
- pipeline = ["tok2vec","ner"]
14
- batch_size = 1000
15
- disabled = []
16
- before_creation = null
17
- after_creation = null
18
- after_pipeline_creation = null
19
- tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
- vectors = {"@vectors":"spacy.Vectors.v1"}
21
-
22
- [components]
23
-
24
- [components.ner]
25
- factory = "ner"
26
- incorrect_spans_key = null
27
- moves = null
28
- scorer = {"@scorers":"spacy.ner_scorer.v1"}
29
- update_with_oracle_cut_size = 100
30
-
31
- [components.ner.model]
32
- @architectures = "spacy.TransitionBasedParser.v2"
33
- state_type = "ner"
34
- extra_state_tokens = false
35
- hidden_width = 64
36
- maxout_pieces = 2
37
- use_upper = true
38
- nO = null
39
-
40
- [components.ner.model.tok2vec]
41
- @architectures = "spacy.Tok2VecListener.v1"
42
- width = ${components.tok2vec.model.encode.width}
43
- upstream = "*"
44
-
45
- [components.tok2vec]
46
- factory = "tok2vec"
47
-
48
- [components.tok2vec.model]
49
- @architectures = "spacy.Tok2Vec.v2"
50
-
51
- [components.tok2vec.model.embed]
52
- @architectures = "spacy.MultiHashEmbed.v2"
53
- width = ${components.tok2vec.model.encode.width}
54
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
55
- rows = [5000,1000,2500,2500]
56
- include_static_vectors = true
57
-
58
- [components.tok2vec.model.encode]
59
- @architectures = "spacy.MaxoutWindowEncoder.v2"
60
- width = 256
61
- depth = 8
62
- window_size = 1
63
- maxout_pieces = 3
64
-
65
- [corpora]
66
-
67
- [corpora.dev]
68
- @readers = "spacy.Corpus.v1"
69
- path = ${paths.dev}
70
- max_length = 0
71
- gold_preproc = false
72
- limit = 0
73
- augmenter = null
74
-
75
- [corpora.train]
76
- @readers = "spacy.Corpus.v1"
77
- path = ${paths.train}
78
- max_length = 0
79
- gold_preproc = false
80
- limit = 0
81
- augmenter = null
82
-
83
- [training]
84
- dev_corpus = "corpora.dev"
85
- train_corpus = "corpora.train"
86
- seed = ${system.seed}
87
- gpu_allocator = ${system.gpu_allocator}
88
- dropout = 0.1
89
- accumulate_gradient = 1
90
- patience = 1600
91
- max_epochs = 0
92
- max_steps = 20000
93
- eval_frequency = 200
94
- frozen_components = []
95
- annotating_components = []
96
- before_to_disk = null
97
- before_update = null
98
-
99
- [training.batcher]
100
- @batchers = "spacy.batch_by_words.v1"
101
- discard_oversize = false
102
- tolerance = 0.2
103
- get_length = null
104
-
105
- [training.batcher.size]
106
- @schedules = "compounding.v1"
107
- start = 100
108
- stop = 1000
109
- compound = 1.001
110
- t = 0.0
111
-
112
- [training.logger]
113
- @loggers = "spacy.ConsoleLogger.v1"
114
- progress_bar = false
115
-
116
- [training.optimizer]
117
- @optimizers = "Adam.v1"
118
- beta1 = 0.9
119
- beta2 = 0.999
120
- L2_is_weight_decay = true
121
- L2 = 0.01
122
- grad_clip = 1.0
123
- use_averages = false
124
- eps = 0.00000001
125
- learn_rate = 0.001
126
-
127
- [training.score_weights]
128
- ents_f = 1.0
129
- ents_p = 0.0
130
- ents_r = 0.0
131
- ents_per_type = null
132
-
133
- [pretraining]
134
-
135
- [initialize]
136
- vectors = ${paths.vectors}
137
- init_tok2vec = ${paths.init_tok2vec}
138
- vocab_data = null
139
- lookups = null
140
- before_init = null
141
- after_init = null
142
-
143
- [initialize.components]
144
-
 
 
145
  [initialize.tokenizer]
 
1
+ [paths]
2
+ train = "/content/drive/MyDrive/secmodel/train3.spacy"
3
+ dev = "/content/drive/MyDrive/secmodel/train3.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = "pytorch"
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "en"
13
+ pipeline = ["transformer","ner"]
14
+ batch_size = 128
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
+
22
+ [components]
23
+
24
+ [components.ner]
25
+ factory = "ner"
26
+ incorrect_spans_key = null
27
+ moves = null
28
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
29
+ update_with_oracle_cut_size = 100
30
+
31
+ [components.ner.model]
32
+ @architectures = "spacy.TransitionBasedParser.v2"
33
+ state_type = "ner"
34
+ extra_state_tokens = false
35
+ hidden_width = 64
36
+ maxout_pieces = 2
37
+ use_upper = false
38
+ nO = null
39
+
40
+ [components.ner.model.tok2vec]
41
+ @architectures = "spacy-transformers.TransformerListener.v1"
42
+ grad_factor = 1.0
43
+ pooling = {"@layers":"reduce_mean.v1"}
44
+ upstream = "*"
45
+
46
+ [components.transformer]
47
+ factory = "transformer"
48
+ max_batch_items = 4096
49
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
50
+
51
+ [components.transformer.model]
52
+ @architectures = "spacy-transformers.TransformerModel.v3"
53
+ name = "roberta-base"
54
+ mixed_precision = false
55
+
56
+ [components.transformer.model.get_spans]
57
+ @span_getters = "spacy-transformers.strided_spans.v1"
58
+ window = 128
59
+ stride = 96
60
+
61
+ [components.transformer.model.grad_scaler_config]
62
+
63
+ [components.transformer.model.tokenizer_config]
64
+ use_fast = true
65
+
66
+ [components.transformer.model.transformer_config]
67
+
68
+ [corpora]
69
+
70
+ [corpora.dev]
71
+ @readers = "spacy.Corpus.v1"
72
+ path = ${paths.dev}
73
+ max_length = 0
74
+ gold_preproc = false
75
+ limit = 0
76
+ augmenter = null
77
+
78
+ [corpora.train]
79
+ @readers = "spacy.Corpus.v1"
80
+ path = ${paths.train}
81
+ max_length = 0
82
+ gold_preproc = false
83
+ limit = 0
84
+ augmenter = null
85
+
86
+ [training]
87
+ accumulate_gradient = 3
88
+ dev_corpus = "corpora.dev"
89
+ train_corpus = "corpora.train"
90
+ seed = ${system.seed}
91
+ gpu_allocator = ${system.gpu_allocator}
92
+ dropout = 0.1
93
+ patience = 1600
94
+ max_epochs = 0
95
+ max_steps = 20000
96
+ eval_frequency = 200
97
+ frozen_components = []
98
+ annotating_components = []
99
+ before_to_disk = null
100
+ before_update = null
101
+
102
+ [training.batcher]
103
+ @batchers = "spacy.batch_by_padded.v1"
104
+ discard_oversize = true
105
+ size = 2000
106
+ buffer = 256
107
+ get_length = null
108
+
109
+ [training.logger]
110
+ @loggers = "spacy.ConsoleLogger.v1"
111
+ progress_bar = false
112
+
113
+ [training.optimizer]
114
+ @optimizers = "Adam.v1"
115
+ beta1 = 0.9
116
+ beta2 = 0.999
117
+ L2_is_weight_decay = true
118
+ L2 = 0.01
119
+ grad_clip = 1.0
120
+ use_averages = false
121
+ eps = 0.00000001
122
+
123
+ [training.optimizer.learn_rate]
124
+ @schedules = "warmup_linear.v1"
125
+ warmup_steps = 250
126
+ total_steps = 20000
127
+ initial_rate = 0.00005
128
+
129
+ [training.score_weights]
130
+ ents_f = 1.0
131
+ ents_p = 0.0
132
+ ents_r = 0.0
133
+ ents_per_type = null
134
+
135
+ [pretraining]
136
+
137
+ [initialize]
138
+ vectors = ${paths.vectors}
139
+ init_tok2vec = ${paths.init_tok2vec}
140
+ vocab_data = null
141
+ lookups = null
142
+ before_init = null
143
+ after_init = null
144
+
145
+ [initialize.components]
146
+
147
  [initialize.tokenizer]
en_pipeline-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80101df649014209b9cc7184f4d506c644567157f4fe7f60d4e5cb3d63435d63
3
- size 606693843
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f2aa9ee41c224f5308d0df8762fdfed2cdc0459bb78234277e721b764001e3
3
+ size 425416363
meta.json CHANGED
@@ -10,13 +10,13 @@
10
  "spacy_version":">=3.7.5,<3.8.0",
11
  "spacy_git_version":"a6d0fc360",
12
  "vectors":{
13
- "width":300,
14
- "vectors":514157,
15
- "keys":514157,
16
- "name":"en_vectors"
17
  },
18
  "labels":{
19
- "tok2vec":[
20
 
21
  ],
22
  "ner":[
@@ -33,35 +33,35 @@
33
  ]
34
  },
35
  "pipeline":[
36
- "tok2vec",
37
  "ner"
38
  ],
39
  "components":[
40
- "tok2vec",
41
  "ner"
42
  ],
43
  "disabled":[
44
 
45
  ],
46
  "performance":{
47
- "ents_f":0.9735849057,
48
- "ents_p":0.9809885932,
49
- "ents_r":0.9662921348,
50
  "ents_per_type":{
51
  "SI UNIT ":{
52
- "p":0.9794238683,
53
- "r":0.9596774194,
54
- "f":0.9694501018
55
  },
56
  "TIME UNIT":{
57
- "p":0.9957081545,
58
  "r":1.0,
59
- "f":0.9978494624
60
  },
61
  "NUMBER":{
62
- "p":0.6666666667,
63
  "r":1.0,
64
- "f":0.8
65
  },
66
  "THOUSANDS OPERATOR":{
67
  "p":1.0,
@@ -70,8 +70,8 @@
70
  },
71
  "OPERATOR":{
72
  "p":1.0,
73
- "r":0.3333333333,
74
- "f":0.5
75
  },
76
  "FRACTION":{
77
  "p":1.0,
@@ -79,9 +79,9 @@
79
  "f":1.0
80
  },
81
  "CHEMICAL TERM":{
82
- "p":0.98,
83
- "r":0.9245283019,
84
- "f":0.9514563107
85
  },
86
  "DECIMAL":{
87
  "p":0.6666666667,
@@ -94,15 +94,15 @@
94
  "f":1.0
95
  },
96
  "RATIO":{
97
- "p":0.75,
98
- "r":0.75,
99
- "f":0.75
100
  }
101
  },
102
- "tok2vec_loss":43.5147486759,
103
- "ner_loss":530.5200964366
104
  },
105
  "requirements":[
106
-
107
  ]
108
  }
 
10
  "spacy_version":">=3.7.5,<3.8.0",
11
  "spacy_git_version":"a6d0fc360",
12
  "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
  },
18
  "labels":{
19
+ "transformer":[
20
 
21
  ],
22
  "ner":[
 
33
  ]
34
  },
35
  "pipeline":[
36
+ "transformer",
37
  "ner"
38
  ],
39
  "components":[
40
+ "transformer",
41
  "ner"
42
  ],
43
  "disabled":[
44
 
45
  ],
46
  "performance":{
47
+ "ents_f":0.9919901417,
48
+ "ents_p":0.9877300613,
49
+ "ents_r":0.9962871287,
50
  "ents_per_type":{
51
  "SI UNIT ":{
52
+ "p":0.9841584158,
53
+ "r":0.994,
54
+ "f":0.9890547264
55
  },
56
  "TIME UNIT":{
57
+ "p":1.0,
58
  "r":1.0,
59
+ "f":1.0
60
  },
61
  "NUMBER":{
62
+ "p":1.0,
63
  "r":1.0,
64
+ "f":1.0
65
  },
66
  "THOUSANDS OPERATOR":{
67
  "p":1.0,
 
70
  },
71
  "OPERATOR":{
72
  "p":1.0,
73
+ "r":1.0,
74
+ "f":1.0
75
  },
76
  "FRACTION":{
77
  "p":1.0,
 
79
  "f":1.0
80
  },
81
  "CHEMICAL TERM":{
82
+ "p":0.9814814815,
83
+ "r":1.0,
84
+ "f":0.9906542056
85
  },
86
  "DECIMAL":{
87
  "p":0.6666666667,
 
94
  "f":1.0
95
  },
96
  "RATIO":{
97
+ "p":1.0,
98
+ "r":1.0,
99
+ "f":1.0
100
  }
101
  },
102
+ "transformer_loss":366.8304179548,
103
+ "ner_loss":358.1879557502
104
  },
105
  "requirements":[
106
+ "spacy-transformers>=1.3.5,<1.4.0"
107
  ]
108
  }
ner/cfg CHANGED
@@ -1,13 +1,13 @@
1
- {
2
- "moves":null,
3
- "update_with_oracle_cut_size":100,
4
- "multitasks":[
5
-
6
- ],
7
- "min_action_freq":1,
8
- "learn_tokens":false,
9
- "beam_width":1,
10
- "beam_density":0.0,
11
- "beam_update_prob":0.0,
12
- "incorrect_spans_key":null
13
  }
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
  }
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves��{"0":{},"1":{"SI UNIT ":950,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":3,"FRACTION":3,"NUMBER":2,"DECIMAL":2,"THOUSANDS OPERATOR":1},"2":{"SI UNIT ":950,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":3,"FRACTION":3,"NUMBER":2,"DECIMAL":2,"THOUSANDS OPERATOR":1},"3":{"SI UNIT ":950,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":3,"FRACTION":3,"NUMBER":2,"DECIMAL":2,"THOUSANDS OPERATOR":1},"4":{"SI UNIT ":950,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":3,"FRACTION":3,"NUMBER":2,"DECIMAL":2,"THOUSANDS OPERATOR":1,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves��{"0":{},"1":{"SI UNIT ":1069,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":5,"NUMBER":3,"FRACTION":3,"DECIMAL":2,"THOUSANDS OPERATOR":1},"2":{"SI UNIT ":1069,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":5,"NUMBER":3,"FRACTION":3,"DECIMAL":2,"THOUSANDS OPERATOR":1},"3":{"SI UNIT ":1069,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":5,"NUMBER":3,"FRACTION":3,"DECIMAL":2,"THOUSANDS OPERATOR":1},"4":{"SI UNIT ":1069,"TIME UNIT":447,"CHEMICAL TERM":72,"LEADING ZERO":6,"RATIO":5,"OPERATOR":5,"NUMBER":3,"FRACTION":3,"DECIMAL":2,"THOUSANDS OPERATOR":1,"":1},"5":{"":1}}�cfg��neg_key�
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14fbfcca9a1590031ade4a3d9af5afd49d7eb76f5a5f7ce9268c1242ca758d8e
3
  size 502026969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bae57e99109ce4380620a2852aefd45e791c60743e55b3cd7c47296ea55dae5f
3
  size 502026969
vocab/key2row CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31566ae010da3d399eb1d930ae142757afd2601034a4be3bdb00d18881c8c06a
3
- size 7066303
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57cf1e27e8783cbb030e434d33c3b2f1d4db5a9e85646f57f6f6952d3f629b84
3
- size 11147861
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282296599e3b7dbbc1cdb0dcb849aff92eb335406e2e31493bd5ffced559408d
3
+ size 25769
vocab/vectors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:234dcf234bfdf01775ae6182715d55eaacfcde8555b189f25440b56d3c39fd5d
3
- size 616988528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14772b683e726436d5948ad3fff2b43d036ef2ebbe3458aafed6004e05a40706
3
+ size 128
vocab/vectors.cfg CHANGED
@@ -1,3 +1,3 @@
1
- {
2
- "mode":"default"
3
  }
 
1
+ {
2
+ "mode":"default"
3
  }