es_core_news_lg / config.cfg
1 [paths]
2 train = null
3 dev = null
4 vectors = null
5 init_tok2vec = null
6
7 [system]
8 gpu_allocator = null
9 seed = 0
10
11 [nlp]
12 lang = "es"
13 pipeline = ["tok2vec","morphologizer","parser","senter","attribute_ruler","lemmatizer","ner"]
14 disabled = ["senter"]
15 before_creation = null
16 after_creation = null
17 after_pipeline_creation = null
18 batch_size = 256
19 tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
21 [components]
22
23 [components.attribute_ruler]
24 factory = "attribute_ruler"
25 scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
26 validate = false
27
28 [components.lemmatizer]
29 factory = "lemmatizer"
30 mode = "rule"
31 model = null
32 overwrite = false
33 scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
34
35 [components.morphologizer]
36 factory = "morphologizer"
37 extend = false
38 overwrite = true
39 scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
40
41 [components.morphologizer.model]
42 @architectures = "spacy.Tagger.v1"
43 nO = null
44
45 [components.morphologizer.model.tok2vec]
46 @architectures = "spacy.Tok2VecListener.v1"
47 width = ${components.tok2vec.model.encode:width}
48 upstream = "tok2vec"
49
50 [components.ner]
51 factory = "ner"
52 incorrect_spans_key = null
53 moves = null
54 scorer = {"@scorers":"spacy.ner_scorer.v1"}
55 update_with_oracle_cut_size = 100
56
57 [components.ner.model]
58 @architectures = "spacy.TransitionBasedParser.v2"
59 state_type = "ner"
60 extra_state_tokens = false
61 hidden_width = 64
62 maxout_pieces = 2
63 use_upper = true
64 nO = null
65
66 [components.ner.model.tok2vec]
67 @architectures = "spacy.Tok2Vec.v2"
68
69 [components.ner.model.tok2vec.embed]
70 @architectures = "spacy.MultiHashEmbed.v2"
71 width = 96
72 attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
73 rows = [5000,2500,2500,2500,100]
74 include_static_vectors = true
75
76 [components.ner.model.tok2vec.encode]
77 @architectures = "spacy.MaxoutWindowEncoder.v2"
78 width = 96
79 depth = 4
80 window_size = 1
81 maxout_pieces = 3
82
83 [components.parser]
84 factory = "parser"
85 learn_tokens = false
86 min_action_freq = 30
87 moves = null
88 scorer = {"@scorers":"spacy.parser_scorer.v1"}
89 update_with_oracle_cut_size = 100
90
91 [components.parser.model]
92 @architectures = "spacy.TransitionBasedParser.v2"
93 state_type = "parser"
94 extra_state_tokens = false
95 hidden_width = 64
96 maxout_pieces = 2
97 use_upper = true
98 nO = null
99
100 [components.parser.model.tok2vec]
101 @architectures = "spacy.Tok2VecListener.v1"
102 width = ${components.tok2vec.model.encode:width}
103 upstream = "tok2vec"
104
105 [components.senter]
106 factory = "senter"
107 overwrite = false
108 scorer = {"@scorers":"spacy.senter_scorer.v1"}
109
110 [components.senter.model]
111 @architectures = "spacy.Tagger.v1"
112 nO = null
113
114 [components.senter.model.tok2vec]
115 @architectures = "spacy.Tok2Vec.v2"
116
117 [components.senter.model.tok2vec.embed]
118 @architectures = "spacy.MultiHashEmbed.v2"
119 width = 16
120 attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
121 rows = [1000,500,500,500,50]
122 include_static_vectors = true
123
124 [components.senter.model.tok2vec.encode]
125 @architectures = "spacy.MaxoutWindowEncoder.v2"
126 width = 16
127 depth = 2
128 window_size = 1
129 maxout_pieces = 2
130
131 [components.tok2vec]
132 factory = "tok2vec"
133
134 [components.tok2vec.model]
135 @architectures = "spacy.Tok2Vec.v2"
136
137 [components.tok2vec.model.embed]
138 @architectures = "spacy.MultiHashEmbed.v2"
139 width = ${components.tok2vec.model.encode:width}
140 attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
141 rows = [5000,2500,2500,2500,100]
142 include_static_vectors = true
143
144 [components.tok2vec.model.encode]
145 @architectures = "spacy.MaxoutWindowEncoder.v2"
146 width = 96
147 depth = 4
148 window_size = 1
149 maxout_pieces = 3
150
151 [corpora]
152
153 [corpora.dev]
154 @readers = "spacy.Corpus.v1"
155 path = ${paths.dev}
156 gold_preproc = false
157 max_length = 0
158 limit = 0
159 augmenter = null
160
161 [corpora.train]
162 @readers = "spacy.Corpus.v1"
163 path = ${paths.train}
164 gold_preproc = false
165 max_length = 0
166 limit = 0
167 augmenter = null
168
169 [training]
170 train_corpus = "corpora.train"
171 dev_corpus = "corpora.dev"
172 seed = ${system:seed}
173 gpu_allocator = ${system:gpu_allocator}
174 dropout = 0.1
175 accumulate_gradient = 1
176 patience = 5000
177 max_epochs = 0
178 max_steps = 0
179 eval_frequency = 1000
180 frozen_components = []
181 before_to_disk = null
182 annotating_components = []
183
184 [training.batcher]
185 @batchers = "spacy.batch_by_words.v1"
186 discard_oversize = false
187 tolerance = 0.2
188 get_length = null
189
190 [training.batcher.size]
191 @schedules = "compounding.v1"
192 start = 100
193 stop = 1000
194 compound = 1.001
195 t = 0.0
196
197 [training.logger]
198 @loggers = "spacy.ConsoleLogger.v1"
199 progress_bar = false
200
201 [training.optimizer]
202 @optimizers = "Adam.v1"
203 beta1 = 0.9
204 beta2 = 0.999
205 L2_is_weight_decay = true
206 L2 = 0.01
207 grad_clip = 1.0
208 use_averages = true
209 eps = 0.00000001
210 learn_rate = 0.001
211
212 [training.score_weights]
213 pos_acc = 0.08
214 morph_acc = 0.08
215 morph_per_feat = null
216 dep_uas = 0.0
217 dep_las = 0.16
218 dep_las_per_type = null
219 sents_p = null
220 sents_r = null
221 sents_f = 0.02
222 lemma_acc = 0.5
223 ents_f = 0.16
224 ents_p = 0.0
225 ents_r = 0.0
226 ents_per_type = null
227 speed = 0.0
228
229 [pretraining]
230
231 [initialize]
232 vocab_data = null
233 vectors = ${paths.vectors}
234 init_tok2vec = ${paths.init_tok2vec}
235 before_init = null
236 after_init = null
237
238 [initialize.components]
239
240 [initialize.components.morphologizer]
241
242 [initialize.components.morphologizer.labels]
243 @readers = "spacy.read_labels.v1"
244 path = "corpus/labels/morphologizer.json"
245 require = false
246
247 [initialize.components.ner]
248
249 [initialize.components.ner.labels]
250 @readers = "spacy.read_labels.v1"
251 path = "corpus/labels/ner.json"
252 require = false
253
254 [initialize.components.parser]
255
256 [initialize.components.parser.labels]
257 @readers = "spacy.read_labels.v1"
258 path = "corpus/labels/parser.json"
259 require = false
260
261 [initialize.lookups]
262 @misc = "spacy.LookupsDataLoader.v1"
263 lang = ${nlp.lang}
264 tables = []
265
266 [initialize.tokenizer]