adrianeboyd commited on
Commit
7af7c51
1 Parent(s): 3960083

Add de_udv25_germanhdt_trf-0.0.1

Browse files
.gitattributes CHANGED
@@ -25,3 +25,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.whl filter=lfs diff=lfs merge=lfs -text
29
+ *.npz filter=lfs diff=lfs merge=lfs -text
30
+ *strings.json filter=lfs diff=lfs merge=lfs -text
31
+ vectors filter=lfs diff=lfs merge=lfs -text
32
+ model filter=lfs diff=lfs merge=lfs -text
33
+ trees filter=lfs diff=lfs merge=lfs -text
34
+ meta.json filter=lfs diff=lfs merge=lfs -text
35
+ README.md filter=lfs diff=lfs merge=lfs -text
LICENSE.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ The annotation of the treebank is licensed under the Creative Commons License Attribution-ShareAlike 4.0 International.
2
+ The text can be distributed for academic use.
3
+
4
+ The complete license text is available at:
5
+ http://creativecommons.org/licenses/by-sa/4.0/legalcode
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fded53a302f9efe6ea053ab6878e126f95ad4cc2b3b59031f01e3c95e4e3e0ed
3
+ size 599615
config.cfg ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "corpus/UD_German-HDT/train.spacy"
3
+ dev = "corpus/UD_German-HDT/dev.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+ tokenizer_source = "training/UD_German-HDT/tokenizer/model-best"
7
+ transformer_source = "training/UD_German-HDT/transformer/model-best"
8
+
9
+ [system]
10
+ gpu_allocator = "pytorch"
11
+ seed = 0
12
+
13
+ [nlp]
14
+ lang = "de"
15
+ pipeline = ["experimental_char_ner_tokenizer","transformer","senter","tagger","morphologizer","parser","experimental_edit_tree_lemmatizer"]
16
+ batch_size = 64
17
+ disabled = ["senter"]
18
+ before_creation = null
19
+ after_creation = null
20
+ after_pipeline_creation = null
21
+ tokenizer = {"@tokenizers":"spacy-experimental.char_pretokenizer.v1"}
22
+
23
+ [components]
24
+
25
+ [components.experimental_char_ner_tokenizer]
26
+ factory = "experimental_char_ner_tokenizer"
27
+ scorer = {"@scorers":"spacy-experimental.tokenizer_scorer.v1"}
28
+
29
+ [components.experimental_char_ner_tokenizer.model]
30
+ @architectures = "spacy.TransitionBasedParser.v2"
31
+ state_type = "ner"
32
+ extra_state_tokens = false
33
+ hidden_width = 64
34
+ maxout_pieces = 2
35
+ use_upper = true
36
+ nO = null
37
+
38
+ [components.experimental_char_ner_tokenizer.model.tok2vec]
39
+ @architectures = "spacy.Tok2Vec.v2"
40
+
41
+ [components.experimental_char_ner_tokenizer.model.tok2vec.embed]
42
+ @architectures = "spacy.MultiHashEmbed.v2"
43
+ width = 128
44
+ attrs = ["ORTH","LOWER","IS_DIGIT","IS_ALPHA","IS_SPACE","IS_PUNCT"]
45
+ rows = [1000,500,50,50,50,50]
46
+ include_static_vectors = false
47
+
48
+ [components.experimental_char_ner_tokenizer.model.tok2vec.encode]
49
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
50
+ width = 128
51
+ depth = 4
52
+ window_size = 4
53
+ maxout_pieces = 2
54
+
55
+ [components.experimental_edit_tree_lemmatizer]
56
+ factory = "experimental_edit_tree_lemmatizer"
57
+ backoff = "orth"
58
+ min_tree_freq = 1
59
+ overwrite = false
60
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
61
+ top_k = 1
62
+
63
+ [components.experimental_edit_tree_lemmatizer.model]
64
+ @architectures = "spacy.Tagger.v1"
65
+ nO = null
66
+
67
+ [components.experimental_edit_tree_lemmatizer.model.tok2vec]
68
+ @architectures = "spacy-transformers.TransformerListener.v1"
69
+ grad_factor = 1.0
70
+ upstream = "transformer"
71
+ pooling = {"@layers":"reduce_mean.v1"}
72
+
73
+ [components.morphologizer]
74
+ factory = "morphologizer"
75
+ extend = false
76
+ overwrite = false
77
+ scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
78
+
79
+ [components.morphologizer.model]
80
+ @architectures = "spacy.Tagger.v1"
81
+ nO = null
82
+
83
+ [components.morphologizer.model.tok2vec]
84
+ @architectures = "spacy-transformers.TransformerListener.v1"
85
+ grad_factor = 1.0
86
+ upstream = "transformer"
87
+ pooling = {"@layers":"reduce_mean.v1"}
88
+
89
+ [components.parser]
90
+ factory = "parser"
91
+ learn_tokens = false
92
+ min_action_freq = 5
93
+ moves = null
94
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
95
+ update_with_oracle_cut_size = 100
96
+
97
+ [components.parser.model]
98
+ @architectures = "spacy.TransitionBasedParser.v2"
99
+ state_type = "parser"
100
+ extra_state_tokens = false
101
+ hidden_width = 64
102
+ maxout_pieces = 3
103
+ use_upper = false
104
+ nO = null
105
+
106
+ [components.parser.model.tok2vec]
107
+ @architectures = "spacy-transformers.TransformerListener.v1"
108
+ grad_factor = 1.0
109
+ upstream = "transformer"
110
+ pooling = {"@layers":"reduce_mean.v1"}
111
+
112
+ [components.senter]
113
+ factory = "senter"
114
+ overwrite = false
115
+ scorer = {"@scorers":"spacy.senter_scorer.v1"}
116
+
117
+ [components.senter.model]
118
+ @architectures = "spacy.Tagger.v1"
119
+ nO = null
120
+
121
+ [components.senter.model.tok2vec]
122
+ @architectures = "spacy-transformers.TransformerListener.v1"
123
+ grad_factor = 1.0
124
+ upstream = "transformer"
125
+ pooling = {"@layers":"reduce_mean.v1"}
126
+
127
+ [components.tagger]
128
+ factory = "tagger"
129
+ neg_prefix = "!"
130
+ overwrite = false
131
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
132
+
133
+ [components.tagger.model]
134
+ @architectures = "spacy.Tagger.v1"
135
+ nO = null
136
+
137
+ [components.tagger.model.tok2vec]
138
+ @architectures = "spacy-transformers.TransformerListener.v1"
139
+ grad_factor = 1.0
140
+ upstream = "transformer"
141
+ pooling = {"@layers":"reduce_mean.v1"}
142
+
143
+ [components.transformer]
144
+ factory = "transformer"
145
+ max_batch_items = 4096
146
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
147
+
148
+ [components.transformer.model]
149
+ @architectures = "spacy-transformers.TransformerModel.v3"
150
+ name = "xlm-roberta-base"
151
+ mixed_precision = true
152
+
153
+ [components.transformer.model.get_spans]
154
+ @span_getters = "spacy-transformers.strided_spans.v1"
155
+ window = 128
156
+ stride = 96
157
+
158
+ [components.transformer.model.grad_scaler_config]
159
+
160
+ [components.transformer.model.tokenizer_config]
161
+ use_fast = true
162
+
163
+ [components.transformer.model.transformer_config]
164
+
165
+ [corpora]
166
+
167
+ [corpora.dev]
168
+ @readers = "spacy.Corpus.v1"
169
+ path = ${paths.dev}
170
+ max_length = 0
171
+ gold_preproc = false
172
+ limit = 0
173
+ augmenter = null
174
+
175
+ [corpora.train]
176
+ @readers = "spacy.Corpus.v1"
177
+ path = ${paths.train}
178
+ max_length = 0
179
+ gold_preproc = false
180
+ limit = 0
181
+ augmenter = null
182
+
183
+ [training]
184
+ train_corpus = "corpora.train"
185
+ dev_corpus = "corpora.dev"
186
+ seed = ${system:seed}
187
+ gpu_allocator = ${system:gpu_allocator}
188
+ dropout = 0.1
189
+ accumulate_gradient = 3
190
+ patience = 5000
191
+ max_epochs = 0
192
+ max_steps = 20000
193
+ eval_frequency = 200
194
+ frozen_components = []
195
+ before_to_disk = null
196
+ annotating_components = []
197
+
198
+ [training.batcher]
199
+ @batchers = "spacy.batch_by_padded.v1"
200
+ discard_oversize = true
201
+ get_length = null
202
+ size = 2000
203
+ buffer = 256
204
+
205
+ [training.logger]
206
+ @loggers = "spacy.ConsoleLogger.v1"
207
+ progress_bar = false
208
+
209
+ [training.optimizer]
210
+ @optimizers = "Adam.v1"
211
+ beta1 = 0.9
212
+ beta2 = 0.999
213
+ L2_is_weight_decay = true
214
+ L2 = 0.01
215
+ grad_clip = 1.0
216
+ use_averages = true
217
+ eps = 0.00000001
218
+
219
+ [training.optimizer.learn_rate]
220
+ @schedules = "warmup_linear.v1"
221
+ warmup_steps = 250
222
+ total_steps = 20000
223
+ initial_rate = 0.00005
224
+
225
+ [training.score_weights]
226
+ token_f = 0.0
227
+ token_p = null
228
+ token_r = null
229
+ token_acc = null
230
+ sents_f = 0.05
231
+ sents_p = 0.0
232
+ sents_r = 0.0
233
+ tag_acc = 0.11
234
+ pos_acc = 0.05
235
+ morph_acc = 0.05
236
+ morph_per_feat = null
237
+ dep_uas = 0.11
238
+ dep_las = 0.11
239
+ dep_las_per_type = null
240
+ lemma_acc = 0.52
241
+
242
+ [pretraining]
243
+
244
+ [initialize]
245
+ vectors = ${paths.vectors}
246
+ init_tok2vec = ${paths.init_tok2vec}
247
+ vocab_data = null
248
+ lookups = null
249
+ before_init = null
250
+ after_init = null
251
+
252
+ [initialize.components]
253
+
254
+ [initialize.tokenizer]
de_udv25_germanhdt_trf-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b3959ba572529d23c2f53627303faef494ec42101d41c0748c64732df810c0c
3
+ size 1051413368
experimental_char_ner_tokenizer/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
experimental_char_ner_tokenizer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bb148537f8b3cfc4e73087fa23648f26bd2bc6a8dc13a3724b581823b119180
3
+ size 6922248
experimental_char_ner_tokenizer/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves�t{"0":{},"1":{"TOKEN":15600401},"2":{"TOKEN":15600401},"3":{"TOKEN":15600401},"4":{"TOKEN":15600401,"":1},"5":{"":1}}�cfg��neg_key�
experimental_edit_tree_lemmatizer/cfg ADDED
The diff for this file is too large to render. See raw diff
 
experimental_edit_tree_lemmatizer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c130c62e2e3f19ba992eb6a4004999a5e8dcfef8e963529e8cec8264890cb1c3
3
+ size 191017127
experimental_edit_tree_lemmatizer/trees ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a372c84df492f47982a7d0ac09a10a64548ac5692591ca2dd9dce71985d99970
3
+ size 8762542
meta.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7a6caac345424083bc713fe995e4a8d69c4fe419a91073aaefb88fcd5f1037
3
+ size 854809
morphologizer/cfg ADDED
The diff for this file is too large to render. See raw diff
 
morphologizer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669274a861bd35e0732da88b0119307c82d1f09db306c251d809054526825faa
3
+ size 1969241
parser/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":5,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
parser/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47170eb715e0a8ba9e7f8967cd19e77ed4617a71e4c45ba226dd2506e0779ab6
3
+ size 1066773
parser/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves��{"0":{"":1716977},"1":{"":883617},"2":{"det":343370,"case":305958,"advmod":164962,"amod":153507,"nsubj":134547,"punct":128313,"obl":123673,"obj":77748,"cc":61331,"aux":49621,"nummod":48875,"mark":38584,"nsubj:pass":23516,"cop":12476,"aux:pass":12035,"nmod":11416,"xcomp":5169,"expl:pv":5020,"advcl":4779,"iobj":3450,"expl":1888,"ccomp":1709,"csubj":1645,"compound:prt":971,"acl":963,"parataxis":398,"obj||xcomp":259,"csubj:pass":161,"obl||xcomp":115,"advmod||xcomp":71,"nsubj||parataxis":69,"obl||parataxis":61,"ccomp||xcomp":39,"reparandum":31,"expl:pv||xcomp":26,"iobj||xcomp":22,"advcl||parataxis":22,"cc||parataxis":21,"obj||parataxis":13,"discourse":12,"ccomp||parataxis":11,"xcomp||parataxis":6,"vocative":6,"obj||advmod":6,"nsubj:pass||parataxis":6,"aux||parataxis":5,"advmod||parataxis":5,"dep":0},"3":{"punct":191477,"nmod":172013,"conj":75577,"obl":55413,"obj":46331,"nsubj":45204,"flat:name":45182,"advmod":44170,"appos":34868,"flat":28607,"aux":18800,"acl":15855,"compound:prt":15636,"aux:pass":14644,"parataxis":13085,"ccomp":12016,"xcomp":11653,"advcl":9592,"cop":8272,"expl:pv":3358,"case":3248,"csubj":2651,"acl||obj":2646,"mark":1606,"iobj":1519,"acl||obl":1308,"acl||nmod":1014,"nsubj:pass":823,"acl||nsubj":740,"expl":613,"csubj:pass":567,"xcomp||obj":504,"conj||nsubj":434,"appos||obj":410,"acl||nsubj:pass":382,"conj||obl":327,"conj||obj":322,"appos||nsubj":244,"ccomp||obj":191,"acl||conj":188,"appos||obl":182,"acl||appos":173,"xcomp||obl":159,"appos||nmod":109,"ccomp||nsubj":93,"advmod||xcomp":93,"conj||nsubj:pass":89,"appos||nsubj:pass":87,"obj||xcomp":78,"xcomp||nsubj":77,"conj||nmod":70,"ccomp||obl":67,"appos||appos":67,"det":64,"xcomp||nsubj:pass":56,"conj||conj":55,"obl||xcomp":52,"appos||conj":48,"conj||appos":37,"conj||advmod":31,"acl||iobj":30,"amod":28,"ccomp||xcomp":27,"ccomp||nsubj:pass":27,"expl:pv||xcomp":22,"xcomp||nmod":21,"ccomp||nmod":21,"mark||xcomp":16,"acl||xcomp":16,"cop||xcomp":14,"xcomp||xcomp":13,"ccomp||conj":13,"conj||advcl":12,"cc||conj":11,"punct||advmod":10,"xcomp||conj":9,"vocative":8,"iobj||xcomp":8,"conj||acl":8,"punct||nsubj":6,"appos||xcomp":6,"punct||nmod":5,"nummod":5,"conj||xcomp":5,"conj||iobj":5,"ccomp||iobj":5,"ccomp||advcl":5,"dep":0},"4":{"ROOT":153035}}�cfg��neg_key�
senter/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "overwrite":false
3
+ }
senter/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef8728f1cadb9e210e67422d9602f1d59c6753a4f8631068fe071cd51b21141
3
+ size 6740
tagger/cfg ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "$(",
4
+ "$,",
5
+ "$.",
6
+ "ADJA",
7
+ "ADJD",
8
+ "ADV",
9
+ "APPO",
10
+ "APPR",
11
+ "APPRART",
12
+ "APZR",
13
+ "ART",
14
+ "CARD",
15
+ "FM",
16
+ "ITJ",
17
+ "KOKOM",
18
+ "KON",
19
+ "KOUI",
20
+ "KOUS",
21
+ "NE",
22
+ "NN",
23
+ "PDAT",
24
+ "PDS",
25
+ "PIAT",
26
+ "PIDAT",
27
+ "PIS",
28
+ "PPER",
29
+ "PPOSAT",
30
+ "PPOSS",
31
+ "PRELAT",
32
+ "PRELS",
33
+ "PRF",
34
+ "PROAV",
35
+ "PTKA",
36
+ "PTKANT",
37
+ "PTKNEG",
38
+ "PTKVZ",
39
+ "PTKZU",
40
+ "PWAT",
41
+ "PWAV",
42
+ "PWS",
43
+ "TRUNC",
44
+ "VAFIN",
45
+ "VAIMP",
46
+ "VAINF",
47
+ "VAPP",
48
+ "VMFIN",
49
+ "VMINF",
50
+ "VMPP",
51
+ "VVFIN",
52
+ "VVIMP",
53
+ "VVINF",
54
+ "VVIZU",
55
+ "VVPP",
56
+ "XY"
57
+ ],
58
+ "neg_prefix":"!",
59
+ "overwrite":false
60
+ }
tagger/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3763ef49e18df089b81535ecafaf94f9bad55a38ff0e5de3cc098bcb4b6e3da8
3
+ size 166694
transformer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "max_batch_items":4096
3
+ }
transformer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f477d5caf61e61e6222796b231cd92c4bd823aa6734df4fff06ceb2f9cf9c287
3
+ size 1126406104
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b008b4f1bc1f5cf91b9f974cc29dc6d32aea5315d24f6e47896e2f1aa494423
3
+ size 15307456
vocab/vectors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14772b683e726436d5948ad3fff2b43d036ef2ebbe3458aafed6004e05a40706
3
+ size 128
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }