josh-oo commited on
Commit
4accedb
1 Parent(s): 3519726

mBART + fine-tuned dbmdz/german-gpt2

Browse files
Files changed (3) hide show
  1. config.json +10 -10
  2. generation_config.json +1 -1
  3. pytorch_model.bin +2 -2
config.json CHANGED
@@ -49,16 +49,16 @@
49
  "d_model": 1024,
50
  "decoder_attention_heads": 16,
51
  "decoder_config": {
52
- "_name_or_path": "benjamin/gerpt2",
53
  "activation_function": "gelu_new",
54
- "add_cross_attention": false,
55
  "architectures": [
56
  "GPT2LMHeadModel"
57
  ],
58
  "attn_pdrop": 0.1,
59
  "bad_words_ids": null,
60
  "begin_suppress_tokens": null,
61
- "bos_token_id": 0,
62
  "chunk_size_feed_forward": 0,
63
  "cross_attention_hidden_size": null,
64
  "decoder_start_token_id": null,
@@ -67,7 +67,7 @@
67
  "early_stopping": false,
68
  "embd_pdrop": 0.1,
69
  "encoder_no_repeat_ngram_size": 0,
70
- "eos_token_id": 0,
71
  "exponential_decay_length_penalty": null,
72
  "finetuning_task": null,
73
  "forced_bos_token_id": null,
@@ -78,7 +78,7 @@
78
  "1": "LABEL_1"
79
  },
80
  "initializer_range": 0.02,
81
- "is_decoder": false,
82
  "is_encoder_decoder": false,
83
  "label2id": {
84
  "LABEL_0": 0,
@@ -102,7 +102,7 @@
102
  "output_attentions": false,
103
  "output_hidden_states": false,
104
  "output_scores": false,
105
- "pad_token_id": 1,
106
  "prefix": null,
107
  "problem_type": null,
108
  "pruned_heads": {},
@@ -124,13 +124,13 @@
124
  "task_specific_params": {
125
  "text-generation": {
126
  "do_sample": true,
127
- "max_length": 100
128
  }
129
  },
130
  "temperature": 1.0,
131
  "tf_legacy_loss": false,
132
  "tie_encoder_decoder": false,
133
- "tie_word_embeddings": false,
134
  "tokenizer_class": null,
135
  "top_k": 50,
136
  "top_p": 1.0,
@@ -140,7 +140,7 @@
140
  "typical_p": 1.0,
141
  "use_bfloat16": false,
142
  "use_cache": true,
143
- "vocab_size": 50258
144
  },
145
  "decoder_ffn_dim": 4096,
146
  "decoder_layerdrop": 0.0,
@@ -150,7 +150,7 @@
150
  "encoder_ffn_dim": 4096,
151
  "encoder_layerdrop": 0.0,
152
  "encoder_layers": 12,
153
- "eos_token_id": 0,
154
  "forced_eos_token_id": 2,
155
  "from_mbart": false,
156
  "global_attention_indices": [
 
49
  "d_model": 1024,
50
  "decoder_attention_heads": 16,
51
  "decoder_config": {
52
+ "_name_or_path": "josh-oo/german-gpt2-easy",
53
  "activation_function": "gelu_new",
54
+ "add_cross_attention": true,
55
  "architectures": [
56
  "GPT2LMHeadModel"
57
  ],
58
  "attn_pdrop": 0.1,
59
  "bad_words_ids": null,
60
  "begin_suppress_tokens": null,
61
+ "bos_token_id": 50267,
62
  "chunk_size_feed_forward": 0,
63
  "cross_attention_hidden_size": null,
64
  "decoder_start_token_id": null,
 
67
  "early_stopping": false,
68
  "embd_pdrop": 0.1,
69
  "encoder_no_repeat_ngram_size": 0,
70
+ "eos_token_id": 50266,
71
  "exponential_decay_length_penalty": null,
72
  "finetuning_task": null,
73
  "forced_bos_token_id": null,
 
78
  "1": "LABEL_1"
79
  },
80
  "initializer_range": 0.02,
81
+ "is_decoder": true,
82
  "is_encoder_decoder": false,
83
  "label2id": {
84
  "LABEL_0": 0,
 
102
  "output_attentions": false,
103
  "output_hidden_states": false,
104
  "output_scores": false,
105
+ "pad_token_id": 50268,
106
  "prefix": null,
107
  "problem_type": null,
108
  "pruned_heads": {},
 
124
  "task_specific_params": {
125
  "text-generation": {
126
  "do_sample": true,
127
+ "max_length": 50
128
  }
129
  },
130
  "temperature": 1.0,
131
  "tf_legacy_loss": false,
132
  "tie_encoder_decoder": false,
133
+ "tie_word_embeddings": true,
134
  "tokenizer_class": null,
135
  "top_k": 50,
136
  "top_p": 1.0,
 
140
  "typical_p": 1.0,
141
  "use_bfloat16": false,
142
  "use_cache": true,
143
+ "vocab_size": 50269
144
  },
145
  "decoder_ffn_dim": 4096,
146
  "decoder_layerdrop": 0.0,
 
150
  "encoder_ffn_dim": 4096,
151
  "encoder_layerdrop": 0.0,
152
  "encoder_layers": 12,
153
+ "eos_token_id": 50266,
154
  "forced_eos_token_id": 2,
155
  "from_mbart": false,
156
  "global_attention_indices": [
generation_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
- "eos_token_id": 0,
5
  "forced_eos_token_id": 2,
6
  "max_length": 1024,
7
  "num_beams": 5,
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
+ "eos_token_id": 50266,
5
  "forced_eos_token_id": 2,
6
  "max_length": 1024,
7
  "num_beams": 5,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74434cbc4348f9491b766c9acc11ac4a55a46e00f6e61d486d77e41094a386c1
3
- size 1648941221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c748af332639939d651577ce0c95204a617fab747d23854ddd6bd809b475685f
3
+ size 1649008805