Upload MusicgenMelodyForConditionalGeneration

#4
by ylacombe HF staff - opened
config.json CHANGED
@@ -292,5 +292,5 @@
292
  "vocab_size": 32128
293
  },
294
  "torch_dtype": "float32",
295
- "transformers_version": "4.38.0.dev0"
296
  }
 
292
  "vocab_size": 32128
293
  },
294
  "torch_dtype": "float32",
295
+ "transformers_version": "4.40.0.dev0"
296
  }
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  "guidance_scale": 3.0,
7
  "max_length": 1500,
8
  "pad_token_id": 2048,
9
- "transformers_version": "4.38.0.dev0"
10
  }
 
6
  "guidance_scale": 3.0,
7
  "max_length": 1500,
8
  "pad_token_id": 2048,
9
+ "transformers_version": "4.40.0.dev0"
10
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e43bd318ddca719ccbb8562eec0da47fddc2ac5a428032a528414fc1587bea4
3
- size 4986895072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de1f894560846ddcaa2c8a2259229e135bcab586ec5002145869777ce0a15a5a
3
+ size 4987010864
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd20fc6e8fed30387d8322c84224986918fbd6d9e051d25e062f1ab21c324cd7
3
- size 4933335864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df2e518045a95c6ced5aaad33d34eea40346d7e158539595134dbff3b47e469
3
+ size 4966907208
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7e25e0a8ed5d455a5b58152cbb3467a52170a2245c376995846b9f37dd1e58c
3
- size 610488552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0621f095b1fb03d926437ce35af9510f50438b7d15b237aae25c7d632f6c362
3
+ size 677597536
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 10530629912
4
  },
5
  "weight_map": {
6
  "audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
@@ -8,7 +8,14 @@
8
  "audio_encoder.decoder.layers.0.conv.bias": "model-00001-of-00003.safetensors",
9
  "audio_encoder.decoder.layers.0.conv.weight_g": "model-00001-of-00003.safetensors",
10
  "audio_encoder.decoder.layers.0.conv.weight_v": "model-00001-of-00003.safetensors",
 
 
 
 
 
 
11
  "audio_encoder.decoder.layers.1.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
 
12
  "audio_encoder.decoder.layers.10.block.1.conv.bias": "model-00001-of-00003.safetensors",
13
  "audio_encoder.decoder.layers.10.block.1.conv.weight_g": "model-00001-of-00003.safetensors",
14
  "audio_encoder.decoder.layers.10.block.1.conv.weight_v": "model-00001-of-00003.safetensors",
@@ -66,7 +73,14 @@
66
  "audio_encoder.encoder.layers.12.conv.bias": "model-00001-of-00003.safetensors",
67
  "audio_encoder.encoder.layers.12.conv.weight_g": "model-00001-of-00003.safetensors",
68
  "audio_encoder.encoder.layers.12.conv.weight_v": "model-00001-of-00003.safetensors",
 
 
 
 
 
 
69
  "audio_encoder.encoder.layers.13.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
 
70
  "audio_encoder.encoder.layers.15.conv.bias": "model-00001-of-00003.safetensors",
71
  "audio_encoder.encoder.layers.15.conv.weight_g": "model-00001-of-00003.safetensors",
72
  "audio_encoder.encoder.layers.15.conv.weight_v": "model-00001-of-00003.safetensors",
@@ -257,9 +271,9 @@
257
  "decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
258
  "decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
259
  "decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
260
- "decoder.model.decoder.layers.20.fc2.weight": "model-00001-of-00003.safetensors",
261
- "decoder.model.decoder.layers.20.final_layer_norm.bias": "model-00001-of-00003.safetensors",
262
- "decoder.model.decoder.layers.20.final_layer_norm.weight": "model-00001-of-00003.safetensors",
263
  "decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
264
  "decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
265
  "decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
@@ -270,10 +284,10 @@
270
  "decoder.model.decoder.layers.21.fc2.weight": "model-00002-of-00003.safetensors",
271
  "decoder.model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00003.safetensors",
272
  "decoder.model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00003.safetensors",
273
- "decoder.model.decoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
274
  "decoder.model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
275
  "decoder.model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
276
- "decoder.model.decoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
277
  "decoder.model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
278
  "decoder.model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
279
  "decoder.model.decoder.layers.22.fc1.weight": "model-00002-of-00003.safetensors",
@@ -526,7 +540,7 @@
526
  "decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
527
  "decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
528
  "decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
529
- "decoder.model.decoder.layers.45.fc1.weight": "model-00002-of-00003.safetensors",
530
  "decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
531
  "decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
532
  "decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 10631424280
4
  },
5
  "weight_map": {
6
  "audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
 
8
  "audio_encoder.decoder.layers.0.conv.bias": "model-00001-of-00003.safetensors",
9
  "audio_encoder.decoder.layers.0.conv.weight_g": "model-00001-of-00003.safetensors",
10
  "audio_encoder.decoder.layers.0.conv.weight_v": "model-00001-of-00003.safetensors",
11
+ "audio_encoder.decoder.layers.1.lstm.bias_hh_l0": "model-00001-of-00003.safetensors",
12
+ "audio_encoder.decoder.layers.1.lstm.bias_hh_l1": "model-00001-of-00003.safetensors",
13
+ "audio_encoder.decoder.layers.1.lstm.bias_ih_l0": "model-00001-of-00003.safetensors",
14
+ "audio_encoder.decoder.layers.1.lstm.bias_ih_l1": "model-00001-of-00003.safetensors",
15
+ "audio_encoder.decoder.layers.1.lstm.weight_hh_l0": "model-00001-of-00003.safetensors",
16
+ "audio_encoder.decoder.layers.1.lstm.weight_hh_l1": "model-00001-of-00003.safetensors",
17
  "audio_encoder.decoder.layers.1.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
18
+ "audio_encoder.decoder.layers.1.lstm.weight_ih_l1": "model-00001-of-00003.safetensors",
19
  "audio_encoder.decoder.layers.10.block.1.conv.bias": "model-00001-of-00003.safetensors",
20
  "audio_encoder.decoder.layers.10.block.1.conv.weight_g": "model-00001-of-00003.safetensors",
21
  "audio_encoder.decoder.layers.10.block.1.conv.weight_v": "model-00001-of-00003.safetensors",
 
73
  "audio_encoder.encoder.layers.12.conv.bias": "model-00001-of-00003.safetensors",
74
  "audio_encoder.encoder.layers.12.conv.weight_g": "model-00001-of-00003.safetensors",
75
  "audio_encoder.encoder.layers.12.conv.weight_v": "model-00001-of-00003.safetensors",
76
+ "audio_encoder.encoder.layers.13.lstm.bias_hh_l0": "model-00001-of-00003.safetensors",
77
+ "audio_encoder.encoder.layers.13.lstm.bias_hh_l1": "model-00001-of-00003.safetensors",
78
+ "audio_encoder.encoder.layers.13.lstm.bias_ih_l0": "model-00001-of-00003.safetensors",
79
+ "audio_encoder.encoder.layers.13.lstm.bias_ih_l1": "model-00001-of-00003.safetensors",
80
+ "audio_encoder.encoder.layers.13.lstm.weight_hh_l0": "model-00001-of-00003.safetensors",
81
+ "audio_encoder.encoder.layers.13.lstm.weight_hh_l1": "model-00001-of-00003.safetensors",
82
  "audio_encoder.encoder.layers.13.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
83
+ "audio_encoder.encoder.layers.13.lstm.weight_ih_l1": "model-00001-of-00003.safetensors",
84
  "audio_encoder.encoder.layers.15.conv.bias": "model-00001-of-00003.safetensors",
85
  "audio_encoder.encoder.layers.15.conv.weight_g": "model-00001-of-00003.safetensors",
86
  "audio_encoder.encoder.layers.15.conv.weight_v": "model-00001-of-00003.safetensors",
 
271
  "decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
272
  "decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
273
  "decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
274
+ "decoder.model.decoder.layers.20.fc2.weight": "model-00002-of-00003.safetensors",
275
+ "decoder.model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00003.safetensors",
276
+ "decoder.model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00003.safetensors",
277
  "decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
278
  "decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
279
  "decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
284
  "decoder.model.decoder.layers.21.fc2.weight": "model-00002-of-00003.safetensors",
285
  "decoder.model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00003.safetensors",
286
  "decoder.model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00003.safetensors",
287
+ "decoder.model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
288
  "decoder.model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
289
  "decoder.model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
290
+ "decoder.model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
291
  "decoder.model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
292
  "decoder.model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
293
  "decoder.model.decoder.layers.22.fc1.weight": "model-00002-of-00003.safetensors",
 
540
  "decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
541
  "decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
542
  "decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
543
+ "decoder.model.decoder.layers.45.fc1.weight": "model-00003-of-00003.safetensors",
544
  "decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
545
  "decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
546
  "decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",