ylacombe HF staff commited on
Commit
9092094
1 Parent(s): c023d90

Upload MusicgenMelodyForConditionalGeneration

Browse files
config.json CHANGED
@@ -105,7 +105,7 @@
105
  "add_cross_attention": false,
106
  "architectures": null,
107
  "attention_dropout": 0.0,
108
- "audio_channels": 1,
109
  "bad_words_ids": null,
110
  "begin_suppress_tokens": null,
111
  "bos_token_id": 2048,
@@ -145,7 +145,7 @@
145
  "num_attention_heads": 32,
146
  "num_beam_groups": 1,
147
  "num_beams": 1,
148
- "num_codebooks": 4,
149
  "num_hidden_layers": 48,
150
  "num_return_sequences": 1,
151
  "output_attentions": false,
 
105
  "add_cross_attention": false,
106
  "architectures": null,
107
  "attention_dropout": 0.0,
108
+ "audio_channels": 2,
109
  "bad_words_ids": null,
110
  "begin_suppress_tokens": null,
111
  "bos_token_id": 2048,
 
145
  "num_attention_heads": 32,
146
  "num_beam_groups": 1,
147
  "num_beams": 1,
148
+ "num_codebooks": 8,
149
  "num_hidden_layers": 48,
150
  "num_return_sequences": 1,
151
  "output_attentions": false,
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a983b7787526417b280dee6d70d28fbc825dd661b0dd50c4ae4391602d2c1f7
3
- size 4986994392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eda9af3032ccd59e8d198260e1bb75b3b5155ef23940f710aa3ca2648112132
3
+ size 4987010880
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff7f57ef40b093d1f69851ecde50f57eb89904aa2abd02912868447b73b212b8
3
- size 4966890568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df2e518045a95c6ced5aaad33d34eea40346d7e158539595134dbff3b47e469
3
+ size 4966907208
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5aace972db765f0d5c9c7ba88119b1c1e7aae9fed43d226522f596af65c1ccf
3
- size 543379288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0621f095b1fb03d926437ce35af9510f50438b7d15b237aae25c7d632f6c362
3
+ size 677597536
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 10497173784
4
  },
5
  "weight_map": {
6
  "audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
@@ -125,11 +125,19 @@
125
  "decoder.lm_heads.1.weight": "model-00003-of-00003.safetensors",
126
  "decoder.lm_heads.2.weight": "model-00003-of-00003.safetensors",
127
  "decoder.lm_heads.3.weight": "model-00003-of-00003.safetensors",
 
 
 
 
128
  "decoder.model.decoder.embed_positions.weights": "model-00001-of-00003.safetensors",
129
  "decoder.model.decoder.embed_tokens.0.weight": "model-00001-of-00003.safetensors",
130
  "decoder.model.decoder.embed_tokens.1.weight": "model-00001-of-00003.safetensors",
131
  "decoder.model.decoder.embed_tokens.2.weight": "model-00001-of-00003.safetensors",
132
  "decoder.model.decoder.embed_tokens.3.weight": "model-00001-of-00003.safetensors",
 
 
 
 
133
  "decoder.model.decoder.layer_norm.bias": "model-00003-of-00003.safetensors",
134
  "decoder.model.decoder.layer_norm.weight": "model-00003-of-00003.safetensors",
135
  "decoder.model.decoder.layers.0.fc1.weight": "model-00001-of-00003.safetensors",
@@ -263,9 +271,9 @@
263
  "decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
264
  "decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
265
  "decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
266
- "decoder.model.decoder.layers.20.fc2.weight": "model-00001-of-00003.safetensors",
267
- "decoder.model.decoder.layers.20.final_layer_norm.bias": "model-00001-of-00003.safetensors",
268
- "decoder.model.decoder.layers.20.final_layer_norm.weight": "model-00001-of-00003.safetensors",
269
  "decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
270
  "decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
271
  "decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
@@ -532,7 +540,7 @@
532
  "decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
533
  "decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
534
  "decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
535
- "decoder.model.decoder.layers.45.fc1.weight": "model-00002-of-00003.safetensors",
536
  "decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
537
  "decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
538
  "decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 10631424280
4
  },
5
  "weight_map": {
6
  "audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
 
125
  "decoder.lm_heads.1.weight": "model-00003-of-00003.safetensors",
126
  "decoder.lm_heads.2.weight": "model-00003-of-00003.safetensors",
127
  "decoder.lm_heads.3.weight": "model-00003-of-00003.safetensors",
128
+ "decoder.lm_heads.4.weight": "model-00003-of-00003.safetensors",
129
+ "decoder.lm_heads.5.weight": "model-00003-of-00003.safetensors",
130
+ "decoder.lm_heads.6.weight": "model-00003-of-00003.safetensors",
131
+ "decoder.lm_heads.7.weight": "model-00003-of-00003.safetensors",
132
  "decoder.model.decoder.embed_positions.weights": "model-00001-of-00003.safetensors",
133
  "decoder.model.decoder.embed_tokens.0.weight": "model-00001-of-00003.safetensors",
134
  "decoder.model.decoder.embed_tokens.1.weight": "model-00001-of-00003.safetensors",
135
  "decoder.model.decoder.embed_tokens.2.weight": "model-00001-of-00003.safetensors",
136
  "decoder.model.decoder.embed_tokens.3.weight": "model-00001-of-00003.safetensors",
137
+ "decoder.model.decoder.embed_tokens.4.weight": "model-00001-of-00003.safetensors",
138
+ "decoder.model.decoder.embed_tokens.5.weight": "model-00001-of-00003.safetensors",
139
+ "decoder.model.decoder.embed_tokens.6.weight": "model-00001-of-00003.safetensors",
140
+ "decoder.model.decoder.embed_tokens.7.weight": "model-00001-of-00003.safetensors",
141
  "decoder.model.decoder.layer_norm.bias": "model-00003-of-00003.safetensors",
142
  "decoder.model.decoder.layer_norm.weight": "model-00003-of-00003.safetensors",
143
  "decoder.model.decoder.layers.0.fc1.weight": "model-00001-of-00003.safetensors",
 
271
  "decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
272
  "decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
273
  "decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
274
+ "decoder.model.decoder.layers.20.fc2.weight": "model-00002-of-00003.safetensors",
275
+ "decoder.model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00003.safetensors",
276
+ "decoder.model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00003.safetensors",
277
  "decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
278
  "decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
279
  "decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
540
  "decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
541
  "decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
542
  "decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
543
+ "decoder.model.decoder.layers.45.fc1.weight": "model-00003-of-00003.safetensors",
544
  "decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
545
  "decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
546
  "decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",