{ "auto_map": { "AutoConfig": "configuration_meralion.MERaLiONConfig" }, "head_dim": 256, "hidden_size": 3584, "intermediate_size": 14336, "model_type": "meralion", "num_attention_heads": 16, "num_hidden_layers": 42, "num_key_value_heads": 8, "sliding_window": 4096, "speech_config": { "_name_or_path": "openai/whisper-large-v3", "apply_spec_augment": true, "architectures": [ "WhisperForConditionalGeneration" ], "begin_suppress_tokens": [ 220, 50257 ], "bos_token_id": 50257, "d_model": 1280, "decoder_attention_heads": 20, "decoder_ffn_dim": 5120, "decoder_layers": 32, "decoder_start_token_id": 50258, "encoder_attention_heads": 20, "encoder_ffn_dim": 5120, "encoder_layers": 32, "eos_token_id": 50257, "mask_time_length": 20, "max_length": 448, "model_type": "meralion_speech_encoder", "num_hidden_layers": 32, "num_mel_bins": 128, "torch_dtype": "bfloat16", "vocab_size": 51866 }, "speech_mlp_scale_factor": 15, "speech_token_index": 255999, "text_config": { "_name_or_path": "aisingapore/gemma2-9b-cpt-sea-lionv3-instruct", "architectures": [ "Gemma2ForCausalLM" ], "eos_token_id": 107, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 3584, "intermediate_size": 14336, "model_type": "meralion_text_decoder", "num_hidden_layers": 42, "num_key_value_heads": 8, "query_pre_attn_scalar": 256, "sliding_window_size": 4096, "torch_dtype": "bfloat16" }, "transformers_version": "4.44.2" }