1 {
2 "architectures": [
3 "EncoderDecoderModel"
4 ],
5 "decoder": {
6 "_name_or_path": "bert-base-german-cased",
7 "add_cross_attention": true,
8 "architectures": [
9 "BertForMaskedLM"
10 ],
11 "attention_probs_dropout_prob": 0.1,
12 "bad_words_ids": null,
13 "bos_token_id": null,
14 "chunk_size_feed_forward": 0,
15 "decoder_start_token_id": null,
16 "diversity_penalty": 0.0,
17 "do_sample": false,
18 "early_stopping": false,
19 "encoder_no_repeat_ngram_size": 0,
20 "eos_token_id": null,
21 "finetuning_task": null,
22 "forced_bos_token_id": null,
23 "forced_eos_token_id": null,
24 "gradient_checkpointing": false,
25 "hidden_act": "gelu",
26 "hidden_dropout_prob": 0.1,
27 "hidden_size": 768,
28 "id2label": {
29 "0": "LABEL_0",
30 "1": "LABEL_1"
31 },
32 "initializer_range": 0.02,
33 "intermediate_size": 3072,
34 "is_decoder": true,
35 "is_encoder_decoder": false,
36 "label2id": {
37 "LABEL_0": 0,
38 "LABEL_1": 1
39 },
40 "layer_norm_eps": 1e-12,
41 "length_penalty": 1.0,
42 "max_length": 20,
43 "max_position_embeddings": 512,
44 "min_length": 0,
45 "model_type": "bert",
46 "no_repeat_ngram_size": 0,
47 "num_attention_heads": 12,
48 "num_beam_groups": 1,
49 "num_beams": 1,
50 "num_hidden_layers": 12,
51 "num_return_sequences": 1,
52 "output_attentions": false,
53 "output_hidden_states": false,
54 "output_scores": false,
55 "pad_token_id": 0,
56 "position_embedding_type": "absolute",
57 "prefix": null,
58 "problem_type": null,
59 "pruned_heads": {},
60 "remove_invalid_values": false,
61 "repetition_penalty": 1.0,
62 "return_dict": true,
63 "return_dict_in_generate": false,
64 "sep_token_id": null,
65 "task_specific_params": null,
66 "temperature": 1.0,
67 "tie_encoder_decoder": false,
68 "tie_word_embeddings": true,
69 "tokenizer_class": null,
70 "top_k": 50,
71 "top_p": 1.0,
72 "torchscript": false,
73 "transformers_version": "4.7.0.dev0",
74 "type_vocab_size": 2,
75 "use_bfloat16": false,
76 "use_cache": true,
77 "vocab_size": 30000
78 },
79 "decoder_start_token_id": 3,
80 "early_stopping": true,
81 "encoder": {
82 "_name_or_path": "bert-base-german-cased",
83 "add_cross_attention": false,
84 "architectures": [
85 "BertForMaskedLM"
86 ],
87 "attention_probs_dropout_prob": 0.1,
88 "bad_words_ids": null,
89 "bos_token_id": null,
90 "chunk_size_feed_forward": 0,
91 "decoder_start_token_id": null,
92 "diversity_penalty": 0.0,
93 "do_sample": false,
94 "early_stopping": false,
95 "encoder_no_repeat_ngram_size": 0,
96 "eos_token_id": null,
97 "finetuning_task": null,
98 "forced_bos_token_id": null,
99 "forced_eos_token_id": null,
100 "gradient_checkpointing": false,
101 "hidden_act": "gelu",
102 "hidden_dropout_prob": 0.1,
103 "hidden_size": 768,
104 "id2label": {
105 "0": "LABEL_0",
106 "1": "LABEL_1"
107 },
108 "initializer_range": 0.02,
109 "intermediate_size": 3072,
110 "is_decoder": false,
111 "is_encoder_decoder": false,
112 "label2id": {
113 "LABEL_0": 0,
114 "LABEL_1": 1
115 },
116 "layer_norm_eps": 1e-12,
117 "length_penalty": 1.0,
118 "max_length": 20,
119 "max_position_embeddings": 512,
120 "min_length": 0,
121 "model_type": "bert",
122 "no_repeat_ngram_size": 0,
123 "num_attention_heads": 12,
124 "num_beam_groups": 1,
125 "num_beams": 1,
126 "num_hidden_layers": 12,
127 "num_return_sequences": 1,
128 "output_attentions": false,
129 "output_hidden_states": false,
130 "output_scores": false,
131 "pad_token_id": 0,
132 "position_embedding_type": "absolute",
133 "prefix": null,
134 "problem_type": null,
135 "pruned_heads": {},
136 "remove_invalid_values": false,
137 "repetition_penalty": 1.0,
138 "return_dict": true,
139 "return_dict_in_generate": false,
140 "sep_token_id": null,
141 "task_specific_params": null,
142 "temperature": 1.0,
143 "tie_encoder_decoder": false,
144 "tie_word_embeddings": true,
145 "tokenizer_class": null,
146 "top_k": 50,
147 "top_p": 1.0,
148 "torchscript": false,
149 "transformers_version": "4.7.0.dev0",
150 "type_vocab_size": 2,
151 "use_bfloat16": false,
152 "use_cache": true,
153 "vocab_size": 30000
154 },
155 "eos_token_id": 4,
156 "is_encoder_decoder": true,
157 "length_penalty": 2.0,
158 "max_length": 64,
159 "min_length": 8,
160 "model_type": "encoder-decoder",
161 "no_repeat_ngram_size": 3,
162 "num_beams": 4,
163 "pad_token_id": 0,
164 "tie_encoder_decoder": true,
165 "transformers_version": null,
166 "vocab_size": 30000
167 }
168