|
{ |
|
"_commit_hash": null, |
|
"_name_or_path": "jukebox-5b-lyrics", |
|
"architectures": [ |
|
"JukeboxModel" |
|
], |
|
"hop_fraction": [ |
|
0.125, |
|
0.5, |
|
0.5 |
|
], |
|
"init_std": 0.2, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"metadata_conditioning": true, |
|
"min_duration": 23.8, |
|
"model_type": "jukebox", |
|
"nb_priors": 3, |
|
"prior_0": { |
|
"act_fn": "quick_gelu", |
|
"alignment_head": 2, |
|
"alignment_layer": 68, |
|
"attention_multiplier": 0.25, |
|
"attention_pattern": "large_separated_enc_dec_w_lyrics", |
|
"attn_dropout": 0, |
|
"attn_res_scale": false, |
|
"blocks": 64, |
|
"conv_res_scale": null, |
|
"emb_dropout": 0, |
|
"encoder_config": { |
|
"act_fn": "quick_gelu", |
|
"alignment_head": 2, |
|
"alignment_layer": 68, |
|
"attention_multiplier": 0.25, |
|
"attention_pattern": "raw_column_previous_row_attention", |
|
"attn_dropout": 0.0, |
|
"attn_res_scale": false, |
|
"blocks": 32, |
|
"conv_res_scale": null, |
|
"emb_dropout": 0.0, |
|
"encoder_config": null, |
|
"encoder_loss_fraction": 0.4, |
|
"hidden_size": 1280, |
|
"init_scale": 0.1, |
|
"is_encoder_decoder": false, |
|
"level": 0, |
|
"lyric_vocab_size": 80, |
|
"mask": true, |
|
"max_duration": 600, |
|
"max_nb_genres": 1, |
|
"merged_decoder": false, |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
604, |
|
7898 |
|
], |
|
"min_duration": 0, |
|
"mlp_multiplier": 1.0, |
|
"model_type": "jukebox_prior", |
|
"music_vocab_size": 2048, |
|
"n_ctx": 6144, |
|
"n_heads": 4, |
|
"nb_relevant_lyric_tokens": 384, |
|
"num_layers": 18, |
|
"res_conv_depth": 3, |
|
"res_conv_width": 128, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": null, |
|
"res_dilation_growth_rate": 1, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"resid_dropout": 0.0, |
|
"sampling_rate": 44100, |
|
"spread": null, |
|
"timing_dims": 64, |
|
"zero_out": false |
|
}, |
|
"encoder_loss_fraction": 0.4, |
|
"hidden_size": 4800, |
|
"init_scale": 0.2, |
|
"is_encoder_decoder": false, |
|
"level": 0, |
|
"lyric_vocab_size": 80, |
|
"mask": true, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"merged_decoder": true, |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
120, |
|
4111 |
|
], |
|
"min_duration": 23.8, |
|
"mlp_multiplier": 1.0, |
|
"model_type": "jukebox_prior", |
|
"music_vocab_size": 2048, |
|
"n_ctx": 8192, |
|
"n_heads": 8, |
|
"nb_relevant_lyric_tokens": 512, |
|
"num_layers": 79, |
|
"res_conv_depth": null, |
|
"res_conv_width": null, |
|
"res_convolution_multiplier": null, |
|
"res_dilation_cycle": null, |
|
"res_dilation_growth_rate": null, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"resid_dropout": 0, |
|
"sampling_rate": 44100, |
|
"spread": null, |
|
"timing_dims": 128, |
|
"zero_out": false |
|
}, |
|
"prior_1": { |
|
"act_fn": "quick_gelu", |
|
"alignment_head": null, |
|
"alignment_layer": null, |
|
"attention_multiplier": 0.25, |
|
"attention_pattern": "raw_column_previous_row_attention", |
|
"attn_dropout": 0, |
|
"attn_res_scale": false, |
|
"blocks": 64, |
|
"conv_res_scale": true, |
|
"emb_dropout": 0, |
|
"embed_dim": 2048, |
|
"encoder_config": null, |
|
"encoder_loss_fraction": 0.4, |
|
"hidden_size": 1920, |
|
"init_scale": 1, |
|
"is_encoder_decoder": false, |
|
"level": 1, |
|
"lyric_vocab_size": 80, |
|
"mask": true, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"merged_decoder": false, |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
120, |
|
4111 |
|
], |
|
"min_duration": 23.8, |
|
"mlp_multiplier": 1.0, |
|
"model_type": "jukebox_prior", |
|
"music_vocab_size": 2048, |
|
"n_ctx": 8192, |
|
"n_heads": 1, |
|
"nb_relevant_lyric_tokens": 0, |
|
"num_layers": 72, |
|
"res_conv_depth": 16, |
|
"res_conv_width": 1024, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": 8, |
|
"res_dilation_growth_rate": 3, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"resid_dropout": 0, |
|
"sampling_rate": 44100, |
|
"spread": null, |
|
"timing_dims": 128, |
|
"zero_out": false |
|
}, |
|
"prior_2": { |
|
"act_fn": "quick_gelu", |
|
"alignment_head": null, |
|
"alignment_layer": null, |
|
"attention_multiplier": 0.25, |
|
"attention_pattern": "raw_column_previous_row_attention", |
|
"attn_dropout": 0, |
|
"attn_res_scale": false, |
|
"blocks": 64, |
|
"conv_res_scale": false, |
|
"emb_dropout": 0, |
|
"embed_dim": 2048, |
|
"encoder_config": null, |
|
"encoder_loss_fraction": 0.4, |
|
"hidden_size": 1920, |
|
"init_scale": 1, |
|
"is_encoder_decoder": false, |
|
"level": 2, |
|
"lyric_vocab_size": 80, |
|
"mask": true, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"merged_decoder": false, |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
120, |
|
4111 |
|
], |
|
"min_duration": 23.8, |
|
"mlp_multiplier": 1.0, |
|
"model_type": "jukebox_prior", |
|
"music_vocab_size": 2048, |
|
"n_ctx": 8192, |
|
"n_heads": 1, |
|
"nb_relevant_lyric_tokens": 0, |
|
"num_layers": 72, |
|
"res_conv_depth": 16, |
|
"res_conv_width": 1024, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": 8, |
|
"res_dilation_growth_rate": 3, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"resid_dropout": 0, |
|
"sampling_rate": 44100, |
|
"spread": null, |
|
"timing_dims": 128, |
|
"zero_out": false |
|
}, |
|
"sampling_rate": 44100, |
|
"timing_dims": 128, |
|
"torch_dtype": "float32", |
|
"transformers_version": null, |
|
"vqvae_config": { |
|
"act_fn": "relu", |
|
"commit": 0.02, |
|
"conv_input_shape": 1, |
|
"conv_res_scale": false, |
|
"embed_dim": 64, |
|
"hop_fraction": [ |
|
0.125, |
|
0.5, |
|
0.5 |
|
], |
|
"init_scale": 0.2, |
|
"levels": 3, |
|
"lmu": 0.99, |
|
"model_type": "jukebox_vqvae", |
|
"multipliers": [ |
|
2, |
|
1, |
|
1 |
|
], |
|
"nb_discrete_codes": 2048, |
|
"res_conv_depth": 4, |
|
"res_conv_width": 32, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": null, |
|
"res_dilation_growth_rate": 3, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"sample_length": 1058304, |
|
"transformers_version": "4.25.0.dev0", |
|
"zero_out": false |
|
} |
|
} |
|
|