dlicari commited on
Commit
a1e3769
1 Parent(s): 4781faf

Upload ner_it_legalbert.cfg

Browse files
Files changed (1) hide show
  1. ner_it_legalbert.cfg +146 -0
ner_it_legalbert.cfg ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "train_data.spacy"
3
+ dev = "dev_data.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = "pytorch"
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "it"
13
+ pipeline = ["transformer","ner"]
14
+ batch_size = 128
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.ner]
24
+ factory = "ner"
25
+ incorrect_spans_key = null
26
+ moves = null
27
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
28
+ update_with_oracle_cut_size = 100
29
+
30
+ [components.ner.model]
31
+ @architectures = "spacy.TransitionBasedParser.v2"
32
+ state_type = "ner"
33
+ extra_state_tokens = false
34
+ hidden_width = 64
35
+ maxout_pieces = 2
36
+ use_upper = false
37
+ nO = null
38
+
39
+ [components.ner.model.tok2vec]
40
+ @architectures = "spacy-transformers.TransformerListener.v1"
41
+ grad_factor = 1.0
42
+ pooling = {"@layers":"reduce_mean.v1"}
43
+ upstream = "*"
44
+
45
+ [components.transformer]
46
+ factory = "transformer"
47
+ max_batch_items = 4096
48
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
49
+
50
+ [components.transformer.model]
51
+ @architectures = "spacy-transformers.TransformerModel.v3"
52
+ name = "dbmdz/bert-base-italian-xxl-cased"
53
+ mixed_precision = false
54
+ # tokenizer_config = {"use_fast": true}
55
+
56
+ [components.transformer.model.get_spans]
57
+ @span_getters = "spacy-transformers.strided_spans.v1"
58
+ window = 128
59
+ stride = 96
60
+
61
+ [components.transformer.model.grad_scaler_config]
62
+
63
+ [components.transformer.model.tokenizer_config]
64
+ # use_fast = true
65
+
66
+ [components.transformer.model.transformer_config]
67
+
68
+ [corpora]
69
+
70
+ [corpora.dev]
71
+ @readers = "spacy.Corpus.v1"
72
+ path = ${paths.dev}
73
+ max_length = 0
74
+ gold_preproc = false
75
+ limit = 0
76
+ augmenter = null
77
+
78
+ [corpora.train]
79
+ @readers = "spacy.Corpus.v1"
80
+ path = ${paths.train}
81
+ max_length = 0
82
+ gold_preproc = false
83
+ limit = 0
84
+ augmenter = null
85
+
86
+ [training]
87
+ accumulate_gradient = 3
88
+ dev_corpus = "corpora.dev"
89
+ train_corpus = "corpora.train"
90
+ seed = ${system.seed}
91
+ gpu_allocator = ${system.gpu_allocator}
92
+ dropout = 0.1
93
+ patience = 600
94
+ max_epochs = 0
95
+ max_steps = 3000
96
+ eval_frequency = 200
97
+ frozen_components = []
98
+ annotating_components = []
99
+ before_to_disk = null
100
+
101
+ [training.batcher]
102
+ @batchers = "spacy.batch_by_padded.v1"
103
+ discard_oversize = true
104
+ size = 2000
105
+ buffer = 256
106
+ get_length = null
107
+
108
+ [training.logger]
109
+ @loggers = "spacy.ConsoleLogger.v1"
110
+ progress_bar = false
111
+
112
+ [training.optimizer]
113
+ @optimizers = "Adam.v1"
114
+ beta1 = 0.9
115
+ beta2 = 0.999
116
+ L2_is_weight_decay = true
117
+ L2 = 0.01
118
+ grad_clip = 1.0
119
+ use_averages = false
120
+ eps = 0.00000001
121
+
122
+ [training.optimizer.learn_rate]
123
+ @schedules = "warmup_linear.v1"
124
+ warmup_steps = 25
125
+ total_steps = 3000
126
+ initial_rate = 0.00005
127
+
128
+ [training.score_weights]
129
+ ents_f = 1.0
130
+ ents_p = 0
131
+ ents_r = 0
132
+ ents_per_type = null
133
+
134
+ [pretraining]
135
+
136
+ [initialize]
137
+ vectors = ${paths.vectors}
138
+ init_tok2vec = ${paths.init_tok2vec}
139
+ vocab_data = null
140
+ lookups = null
141
+ before_init = null
142
+ after_init = null
143
+
144
+ [initialize.components]
145
+
146
+ [initialize.tokenizer]