{ "audio_encoder_config": { "n_mels": 80, "n_audio_ctx": 1500, "n_audio_state": 384, "n_audio_head": 6, "n_audio_layer": 4 }, "text_decoder_config": { "n_vocab": 51864, "n_text_ctx": 448, "n_text_state": 384, "n_text_head": 6, "n_text_layer": 4 } }