{ "audio_encoder_config": { "n_mels": 80, "n_audio_ctx": 1500, "n_audio_state": 512, "n_audio_head": 8, "n_audio_layer": 6 }, "text_decoder_config": { "n_vocab": 51865, "n_text_ctx": 448, "n_text_state": 512, "n_text_head": 8, "n_text_layer": 6 } }