MERaLiON
/

MERaLiON-AudioLLM-Whisper-SEA-LION

@@ -1,10 +1,7 @@
 {
-  "architectures": [
-    "MERaLiONForConditionalGeneration"
-  ],
   "auto_map": {
-    "AutoConfig": "configuration_meralion.MERaLiONConfig",
-    "AutoModelForSpeechSeq2Seq": "modeling_meralion.MERaLiONForConditionalGeneration"
   },
   "head_dim": 256,
   "hidden_size": 3584,
@@ -15,7 +12,8 @@
   "num_key_value_heads": 8,
   "sliding_window": 4096,
   "speech_config": {
-    "_name_or_path": "openai/whisper-large-v3",
     "apply_spec_augment": true,
     "architectures": [
       "WhisperForConditionalGeneration"
@@ -34,17 +32,122 @@
     "encoder_ffn_dim": 5120,
     "encoder_layers": 32,
     "eos_token_id": 50257,
     "mask_time_length": 20,
     "max_length": 448,
     "model_type": "meralion_speech_encoder",
     "num_hidden_layers": 32,
-    "num_mel_bins": 128,
-    "torch_dtype": "bfloat16",
-    "vocab_size": 51866
   },
   "speech_mlp_scale_factor": 15,
   "speech_token_index": 255999,
   "text_config": {
     "_name_or_path": "aisingapore/gemma2-9b-cpt-sea-lionv3-instruct",
     "architectures": [
       "Gemma2ForCausalLM"
@@ -60,6 +163,5 @@
     "sliding_window_size": 4096,
     "torch_dtype": "bfloat16"
   },
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.44.2"
 }

 {
+  "_attn_implementation_autoset": true,
   "auto_map": {
+    "AutoConfig": "configuration_meralion.MERaLiONConfig"
   },
   "head_dim": 256,
   "hidden_size": 3584,
   "num_key_value_heads": 8,
   "sliding_window": 4096,
   "speech_config": {
+    "_attn_implementation_autoset": true,
+    "_name_or_path": "openai/whisper-large-v2",
     "apply_spec_augment": true,
     "architectures": [
       "WhisperForConditionalGeneration"
     "encoder_ffn_dim": 5120,
     "encoder_layers": 32,
     "eos_token_id": 50257,
+    "forced_decoder_ids": [
+      [
+        1,
+        50259
+      ],
+      [
+        2,
+        50359
+      ],
+      [
+        3,
+        50363
+      ]
+    ],
     "mask_time_length": 20,
     "max_length": 448,
     "model_type": "meralion_speech_encoder",
     "num_hidden_layers": 32,
+    "num_mel_bins": 80,
+    "pad_token_id": 50257,
+    "suppress_tokens": [
+      1,
+      2,
+      7,
+      8,
+      9,
+      10,
+      14,
+      25,
+      26,
+      27,
+      28,
+      29,
+      31,
+      58,
+      59,
+      60,
+      61,
+      62,
+      63,
+      90,
+      91,
+      92,
+      93,
+      359,
+      503,
+      522,
+      542,
+      873,
+      893,
+      902,
+      918,
+      922,
+      931,
+      1350,
+      1853,
+      1982,
+      2460,
+      2627,
+      3246,
+      3253,
+      3268,
+      3536,
+      3846,
+      3961,
+      4183,
+      4667,
+      6585,
+      6647,
+      7273,
+      9061,
+      9383,
+      10428,
+      10929,
+      11938,
+      12033,
+      12331,
+      12562,
+      13793,
+      14157,
+      14635,
+      15265,
+      15618,
+      16553,
+      16604,
+      18362,
+      18956,
+      20075,
+      21675,
+      22520,
+      26130,
+      26161,
+      26435,
+      28279,
+      29464,
+      31650,
+      32302,
+      32470,
+      36865,
+      42863,
+      47425,
+      49870,
+      50254,
+      50258,
+      50358,
+      50359,
+      50360,
+      50361,
+      50362
+    ],
+    "torch_dtype": "bfloat16"
   },
   "speech_mlp_scale_factor": 15,
   "speech_token_index": 255999,
   "text_config": {
+    "_attn_implementation_autoset": true,
     "_name_or_path": "aisingapore/gemma2-9b-cpt-sea-lionv3-instruct",
     "architectures": [
       "Gemma2ForCausalLM"
     "sliding_window_size": 4096,
     "torch_dtype": "bfloat16"
   },
+  "transformers_version": "4.46.3"
 }