1 {
2 "_name_or_path": "/content/drive/MyDrive/bsc-r2r_shared_mlsum-es-4/checkpoint-2000",
3 "architectures": [
4 "EncoderDecoderModel"
5 ],
6 "decoder": {
7 "_name_or_path": "BSC-TeMU/roberta-base-bne",
8 "add_cross_attention": true,
9 "architectures": [
10 "RobertaForMaskedLM"
11 ],
12 "attention_probs_dropout_prob": 0.0,
13 "bad_words_ids": null,
14 "bos_token_id": 0,
15 "chunk_size_feed_forward": 0,
16 "classifier_dropout": null,
17 "decoder_start_token_id": null,
18 "diversity_penalty": 0.0,
19 "do_sample": false,
20 "early_stopping": false,
21 "encoder_no_repeat_ngram_size": 0,
22 "eos_token_id": 2,
23 "finetuning_task": null,
24 "forced_bos_token_id": null,
25 "forced_eos_token_id": null,
26 "gradient_checkpointing": false,
27 "hidden_act": "gelu",
28 "hidden_dropout_prob": 0.0,
29 "hidden_size": 768,
30 "id2label": {
31 "0": "LABEL_0",
32 "1": "LABEL_1"
33 },
34 "initializer_range": 0.02,
35 "intermediate_size": 3072,
36 "is_decoder": true,
37 "is_encoder_decoder": false,
38 "label2id": {
39 "LABEL_0": 0,
40 "LABEL_1": 1
41 },
42 "layer_norm_eps": 1e-05,
43 "length_penalty": 1.0,
44 "max_length": 20,
45 "max_position_embeddings": 514,
46 "min_length": 0,
47 "model_type": "roberta",
48 "no_repeat_ngram_size": 0,
49 "num_attention_heads": 12,
50 "num_beam_groups": 1,
51 "num_beams": 1,
52 "num_hidden_layers": 12,
53 "num_return_sequences": 1,
54 "output_attentions": false,
55 "output_hidden_states": false,
56 "output_scores": false,
57 "pad_token_id": 1,
58 "position_embedding_type": "absolute",
59 "prefix": null,
60 "problem_type": null,
61 "pruned_heads": {},
62 "remove_invalid_values": false,
63 "repetition_penalty": 1.0,
64 "return_dict": true,
65 "return_dict_in_generate": false,
66 "sep_token_id": null,
67 "task_specific_params": null,
68 "temperature": 1.0,
69 "tie_encoder_decoder": false,
70 "tie_word_embeddings": true,
71 "tokenizer_class": null,
72 "top_k": 50,
73 "top_p": 1.0,
74 "torch_dtype": null,
75 "torchscript": false,
76 "transformers_version": "4.10.0.dev0",
77 "type_vocab_size": 1,
78 "use_bfloat16": false,
79 "use_cache": true,
80 "vocab_size": 50262
81 },
82 "decoder_start_token_id": 0,
83 "early_stopping": true,
84 "encoder": {
85 "_name_or_path": "BSC-TeMU/roberta-base-bne",
86 "add_cross_attention": false,
87 "architectures": [
88 "RobertaForMaskedLM"
89 ],
90 "attention_probs_dropout_prob": 0.0,
91 "bad_words_ids": null,
92 "bos_token_id": 0,
93 "chunk_size_feed_forward": 0,
94 "classifier_dropout": null,
95 "decoder_start_token_id": null,
96 "diversity_penalty": 0.0,
97 "do_sample": false,
98 "early_stopping": false,
99 "encoder_no_repeat_ngram_size": 0,
100 "eos_token_id": 2,
101 "finetuning_task": null,
102 "forced_bos_token_id": null,
103 "forced_eos_token_id": null,
104 "gradient_checkpointing": false,
105 "hidden_act": "gelu",
106 "hidden_dropout_prob": 0.0,
107 "hidden_size": 768,
108 "id2label": {
109 "0": "LABEL_0",
110 "1": "LABEL_1"
111 },
112 "initializer_range": 0.02,
113 "intermediate_size": 3072,
114 "is_decoder": false,
115 "is_encoder_decoder": false,
116 "label2id": {
117 "LABEL_0": 0,
118 "LABEL_1": 1
119 },
120 "layer_norm_eps": 1e-05,
121 "length_penalty": 1.0,
122 "max_length": 20,
123 "max_position_embeddings": 514,
124 "min_length": 0,
125 "model_type": "roberta",
126 "no_repeat_ngram_size": 0,
127 "num_attention_heads": 12,
128 "num_beam_groups": 1,
129 "num_beams": 1,
130 "num_hidden_layers": 12,
131 "num_return_sequences": 1,
132 "output_attentions": false,
133 "output_hidden_states": false,
134 "output_scores": false,
135 "pad_token_id": 1,
136 "position_embedding_type": "absolute",
137 "prefix": null,
138 "problem_type": null,
139 "pruned_heads": {},
140 "remove_invalid_values": false,
141 "repetition_penalty": 1.0,
142 "return_dict": true,
143 "return_dict_in_generate": false,
144 "sep_token_id": null,
145 "task_specific_params": null,
146 "temperature": 1.0,
147 "tie_encoder_decoder": false,
148 "tie_word_embeddings": true,
149 "tokenizer_class": null,
150 "top_k": 50,
151 "top_p": 1.0,
152 "torch_dtype": null,
153 "torchscript": false,
154 "transformers_version": "4.10.0.dev0",
155 "type_vocab_size": 1,
156 "use_bfloat16": false,
157 "use_cache": true,
158 "vocab_size": 50262
159 },
160 "eos_token_id": 2,
161 "is_encoder_decoder": true,
162 "length_penalty": 2.0,
163 "max_length": 64,
164 "model_type": "encoder-decoder",
165 "no_repeat_ngram_size": 3,
166 "num_beams": 4,
167 "tie_encoder_decoder": true,
168 "torch_dtype": "float32",
169 "transformers_version": null,
170 "vocab_size": 50262
171 }
172