ylacombe commited on
Commit
6eb8539
1 Parent(s): baa22af

Upload MusicgenMelodyForConditionalGeneration

Browse files
config.json CHANGED
@@ -292,5 +292,5 @@
292
  "vocab_size": 32128
293
  },
294
  "torch_dtype": "float32",
295
- "transformers_version": "4.38.0.dev0"
296
  }
 
292
  "vocab_size": 32128
293
  },
294
  "torch_dtype": "float32",
295
+ "transformers_version": "4.40.0.dev0"
296
  }
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  "guidance_scale": 3.0,
7
  "max_length": 1500,
8
  "pad_token_id": 2048,
9
- "transformers_version": "4.38.0.dev0"
10
  }
 
6
  "guidance_scale": 3.0,
7
  "max_length": 1500,
8
  "pad_token_id": 2048,
9
+ "transformers_version": "4.40.0.dev0"
10
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:803d16553a24106a5de3cefe5876f31647b6d28ae1107e8e61a71f38957040d5
3
- size 4978745456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3deb583fd977128b24f59c86d6706f01b35b4ab8553e8355cca6969a9b7440b4
3
+ size 4966269512
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7b35f065d8992304194601dbf3cfd32621194aa0fd111b84acd4fff9efe9105
3
- size 1150108320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01268b3a5f9819026a9ee6b76df35574e7dd521de3e6431c7fad16b44ce5a9c0
3
+ size 1263380400
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 6128765208
4
  },
5
  "weight_map": {
6
  "audio_enc_to_dec_proj.bias": "model-00002-of-00002.safetensors",
@@ -8,7 +8,14 @@
8
  "audio_encoder.decoder.layers.0.conv.bias": "model-00001-of-00002.safetensors",
9
  "audio_encoder.decoder.layers.0.conv.weight_g": "model-00001-of-00002.safetensors",
10
  "audio_encoder.decoder.layers.0.conv.weight_v": "model-00001-of-00002.safetensors",
 
 
 
 
 
 
11
  "audio_encoder.decoder.layers.1.lstm.weight_ih_l0": "model-00001-of-00002.safetensors",
 
12
  "audio_encoder.decoder.layers.10.block.1.conv.bias": "model-00001-of-00002.safetensors",
13
  "audio_encoder.decoder.layers.10.block.1.conv.weight_g": "model-00001-of-00002.safetensors",
14
  "audio_encoder.decoder.layers.10.block.1.conv.weight_v": "model-00001-of-00002.safetensors",
@@ -66,7 +73,14 @@
66
  "audio_encoder.encoder.layers.12.conv.bias": "model-00001-of-00002.safetensors",
67
  "audio_encoder.encoder.layers.12.conv.weight_g": "model-00001-of-00002.safetensors",
68
  "audio_encoder.encoder.layers.12.conv.weight_v": "model-00001-of-00002.safetensors",
 
 
 
 
 
 
69
  "audio_encoder.encoder.layers.13.lstm.weight_ih_l0": "model-00001-of-00002.safetensors",
 
70
  "audio_encoder.encoder.layers.15.conv.bias": "model-00001-of-00002.safetensors",
71
  "audio_encoder.encoder.layers.15.conv.weight_g": "model-00001-of-00002.safetensors",
72
  "audio_encoder.encoder.layers.15.conv.weight_v": "model-00001-of-00002.safetensors",
@@ -428,10 +442,10 @@
428
  "decoder.model.decoder.layers.36.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
429
  "decoder.model.decoder.layers.36.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors",
430
  "decoder.model.decoder.layers.36.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors",
431
- "decoder.model.decoder.layers.37.fc1.weight": "model-00001-of-00002.safetensors",
432
- "decoder.model.decoder.layers.37.fc2.weight": "model-00001-of-00002.safetensors",
433
- "decoder.model.decoder.layers.37.final_layer_norm.bias": "model-00001-of-00002.safetensors",
434
- "decoder.model.decoder.layers.37.final_layer_norm.weight": "model-00001-of-00002.safetensors",
435
  "decoder.model.decoder.layers.37.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
436
  "decoder.model.decoder.layers.37.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
437
  "decoder.model.decoder.layers.37.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
@@ -442,12 +456,12 @@
442
  "decoder.model.decoder.layers.38.fc2.weight": "model-00002-of-00002.safetensors",
443
  "decoder.model.decoder.layers.38.final_layer_norm.bias": "model-00002-of-00002.safetensors",
444
  "decoder.model.decoder.layers.38.final_layer_norm.weight": "model-00002-of-00002.safetensors",
445
- "decoder.model.decoder.layers.38.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
446
- "decoder.model.decoder.layers.38.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
447
- "decoder.model.decoder.layers.38.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
448
- "decoder.model.decoder.layers.38.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
449
- "decoder.model.decoder.layers.38.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors",
450
- "decoder.model.decoder.layers.38.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors",
451
  "decoder.model.decoder.layers.39.fc1.weight": "model-00002-of-00002.safetensors",
452
  "decoder.model.decoder.layers.39.fc2.weight": "model-00002-of-00002.safetensors",
453
  "decoder.model.decoder.layers.39.final_layer_norm.bias": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 6229559576
4
  },
5
  "weight_map": {
6
  "audio_enc_to_dec_proj.bias": "model-00002-of-00002.safetensors",
 
8
  "audio_encoder.decoder.layers.0.conv.bias": "model-00001-of-00002.safetensors",
9
  "audio_encoder.decoder.layers.0.conv.weight_g": "model-00001-of-00002.safetensors",
10
  "audio_encoder.decoder.layers.0.conv.weight_v": "model-00001-of-00002.safetensors",
11
+ "audio_encoder.decoder.layers.1.lstm.bias_hh_l0": "model-00001-of-00002.safetensors",
12
+ "audio_encoder.decoder.layers.1.lstm.bias_hh_l1": "model-00001-of-00002.safetensors",
13
+ "audio_encoder.decoder.layers.1.lstm.bias_ih_l0": "model-00001-of-00002.safetensors",
14
+ "audio_encoder.decoder.layers.1.lstm.bias_ih_l1": "model-00001-of-00002.safetensors",
15
+ "audio_encoder.decoder.layers.1.lstm.weight_hh_l0": "model-00001-of-00002.safetensors",
16
+ "audio_encoder.decoder.layers.1.lstm.weight_hh_l1": "model-00001-of-00002.safetensors",
17
  "audio_encoder.decoder.layers.1.lstm.weight_ih_l0": "model-00001-of-00002.safetensors",
18
+ "audio_encoder.decoder.layers.1.lstm.weight_ih_l1": "model-00001-of-00002.safetensors",
19
  "audio_encoder.decoder.layers.10.block.1.conv.bias": "model-00001-of-00002.safetensors",
20
  "audio_encoder.decoder.layers.10.block.1.conv.weight_g": "model-00001-of-00002.safetensors",
21
  "audio_encoder.decoder.layers.10.block.1.conv.weight_v": "model-00001-of-00002.safetensors",
 
73
  "audio_encoder.encoder.layers.12.conv.bias": "model-00001-of-00002.safetensors",
74
  "audio_encoder.encoder.layers.12.conv.weight_g": "model-00001-of-00002.safetensors",
75
  "audio_encoder.encoder.layers.12.conv.weight_v": "model-00001-of-00002.safetensors",
76
+ "audio_encoder.encoder.layers.13.lstm.bias_hh_l0": "model-00001-of-00002.safetensors",
77
+ "audio_encoder.encoder.layers.13.lstm.bias_hh_l1": "model-00001-of-00002.safetensors",
78
+ "audio_encoder.encoder.layers.13.lstm.bias_ih_l0": "model-00001-of-00002.safetensors",
79
+ "audio_encoder.encoder.layers.13.lstm.bias_ih_l1": "model-00001-of-00002.safetensors",
80
+ "audio_encoder.encoder.layers.13.lstm.weight_hh_l0": "model-00001-of-00002.safetensors",
81
+ "audio_encoder.encoder.layers.13.lstm.weight_hh_l1": "model-00001-of-00002.safetensors",
82
  "audio_encoder.encoder.layers.13.lstm.weight_ih_l0": "model-00001-of-00002.safetensors",
83
+ "audio_encoder.encoder.layers.13.lstm.weight_ih_l1": "model-00001-of-00002.safetensors",
84
  "audio_encoder.encoder.layers.15.conv.bias": "model-00001-of-00002.safetensors",
85
  "audio_encoder.encoder.layers.15.conv.weight_g": "model-00001-of-00002.safetensors",
86
  "audio_encoder.encoder.layers.15.conv.weight_v": "model-00001-of-00002.safetensors",
 
442
  "decoder.model.decoder.layers.36.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
443
  "decoder.model.decoder.layers.36.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors",
444
  "decoder.model.decoder.layers.36.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors",
445
+ "decoder.model.decoder.layers.37.fc1.weight": "model-00002-of-00002.safetensors",
446
+ "decoder.model.decoder.layers.37.fc2.weight": "model-00002-of-00002.safetensors",
447
+ "decoder.model.decoder.layers.37.final_layer_norm.bias": "model-00002-of-00002.safetensors",
448
+ "decoder.model.decoder.layers.37.final_layer_norm.weight": "model-00002-of-00002.safetensors",
449
  "decoder.model.decoder.layers.37.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
450
  "decoder.model.decoder.layers.37.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
451
  "decoder.model.decoder.layers.37.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
456
  "decoder.model.decoder.layers.38.fc2.weight": "model-00002-of-00002.safetensors",
457
  "decoder.model.decoder.layers.38.final_layer_norm.bias": "model-00002-of-00002.safetensors",
458
  "decoder.model.decoder.layers.38.final_layer_norm.weight": "model-00002-of-00002.safetensors",
459
+ "decoder.model.decoder.layers.38.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
460
+ "decoder.model.decoder.layers.38.self_attn.out_proj.weight": "model-00002-of-00002.safetensors",
461
+ "decoder.model.decoder.layers.38.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
462
+ "decoder.model.decoder.layers.38.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
463
+ "decoder.model.decoder.layers.38.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors",
464
+ "decoder.model.decoder.layers.38.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors",
465
  "decoder.model.decoder.layers.39.fc1.weight": "model-00002-of-00002.safetensors",
466
  "decoder.model.decoder.layers.39.fc2.weight": "model-00002-of-00002.safetensors",
467
  "decoder.model.decoder.layers.39.final_layer_norm.bias": "model-00002-of-00002.safetensors",