MERaLiON
/

MERaLiON-AudioLLM-Whisper-SEA-LION

@@ -4,7 +4,7 @@
   },
   "chunk_length": 30,
   "feature_extractor_type": "WhisperFeatureExtractor",
-  "feature_size": 128,
   "hop_length": 160,
   "n_fft": 400,
   "n_samples": 480000,

   },
   "chunk_length": 30,
   "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
   "hop_length": 160,
   "n_fft": 400,
   "n_samples": 480000,

processing_meralion.py CHANGED Viewed

@@ -46,7 +46,13 @@ class MERaLiONProcessor(ProcessorMixin):
     attributes = ["feature_extractor", "tokenizer"]
     feature_extractor_class = "WhisperFeatureExtractor"
     tokenizer_class = "GemmaTokenizer"
-    valid_kwargs = ["fixed_speech_embeds_length", "speech_signature", "speech_token_index", "time_duration_limit", "do_normalize"]
     def __init__(
         self,
@@ -95,9 +101,9 @@ class MERaLiONProcessor(ProcessorMixin):
         audios: Union[np.ndarray, List[np.ndarray]] = None,
         padding: Union[bool, str, PaddingStrategy] = True,
         sampling_rate: Optional[int] = None,
-        speech_signature = None,
-        time_duration_limit = None,
-        do_normalize = None,
         **kwargs,
     ) -> BatchFeature:
         """
@@ -125,6 +131,13 @@ class MERaLiONProcessor(ProcessorMixin):
                   lengths).
             sampling_rate (`int`, defaults to 16000):
                 The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
         """
         if text is None:

     attributes = ["feature_extractor", "tokenizer"]
     feature_extractor_class = "WhisperFeatureExtractor"
     tokenizer_class = "GemmaTokenizer"
+    valid_kwargs = [
+        "fixed_speech_embeds_length",
+        "speech_signature",
+        "speech_token_index",
+        "time_duration_limit",
+        "do_normalize"
+    ]
     def __init__(
         self,
         audios: Union[np.ndarray, List[np.ndarray]] = None,
         padding: Union[bool, str, PaddingStrategy] = True,
         sampling_rate: Optional[int] = None,
+        speech_signature: Optional[str] = None,
+        time_duration_limit: Optional[int] = None,
+        do_normalize: Optional[bool] = None,
         **kwargs,
     ) -> BatchFeature:
         """
                   lengths).
             sampling_rate (`int`, defaults to 16000):
                 The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
+            speech_signature (`str`, defaults to `<SpeechHere>`):
+                The special string marking the location of speech tokens.
+            time_duration_limit (`int`, defaults -1):
+                The max input time duration in seconds.
+            do_normalize (`bool`, defaults to `True`):
+                Whether or not to zero-mean unit-variance normalize the input.
+                Normalizing can help to significantly improve the performance of the model.
         """
         if text is None:

tokenizer_config.json CHANGED Viewed

@@ -1987,7 +1987,7 @@
       "special": false
     },
     "255999": {
-      "content": "<speech_token>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -1999,7 +1999,7 @@
     "<end_of_turn>"
   ],
   "auto_map": {
-    "AutoProcessor": "processing_merlion.MERaLiONProcessor"
   },
   "bos_token": "<bos>",
   "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",

       "special": false
     },
     "255999": {
+      "content": "<unused99>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
     "<end_of_turn>"
   ],
   "auto_map": {
+    "AutoProcessor": "processing_meralion.MERaLiONProcessor"
   },
   "bos_token": "<bos>",
   "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",