{ "_name_or_path": "Audiogen/apolloxl-pretrain-030724", "ada_dim": 512, "architectures": [ "Apollo" ], "codec_config": "625e102f2cf534109a62d19b484d5259222fcaa7", "codec_mean": -0.09, "codec_name": "vae-100hz-32ch-030624", "codec_revision": "625e102f2cf534109a62d19b484d5259222fcaa7", "codec_std": 1.215, "conditionings": [ { "cond_net": "mlp", "dim": 4096, "name": "text", "pathway": "cross", "stem_noise_sigma": null }, { "cond_net": "mlp", "dim": 512, "name": "global_audio", "pathway": "ada", "stem_noise_sigma": null }, { "cond_net": "conv", "dim": 32, "name": "stem", "pathway": "embed", "stem_noise_sigma": 0.1 }, { "cond_net": "conv", "dim": 33, "name": "inpainting", "pathway": "embed", "stem_noise_sigma": null } ], "cross_layer_factor": 6, "cross_query_sharing": false, "cross_sequence_length": 128, "cross_share_query": false, "eps": 1e-08, "head_dim": 128, "hidden_dim": 3072, "input_dim": 32, "intermediate_ada_dim": 1280, "intermediate_dim": 8192, "model_type": "apollo", "num_heads": 24, "num_layers": 24, "patch_size": 2, "rope_base": 1000000, "rope_n_elem": 128, "sequence_length": 1152, "time_dim": 512, "torch_dtype": "bfloat16", "transformers_version": "4.42.4" }