Yingxu He commited on
Commit
ee6fe1d
·
verified ·
1 Parent(s): aacf106

Upload processor

Browse files
preprocessor_config.json CHANGED
@@ -4,7 +4,7 @@
4
  },
5
  "chunk_length": 30,
6
  "feature_extractor_type": "WhisperFeatureExtractor",
7
- "feature_size": 128,
8
  "hop_length": 160,
9
  "n_fft": 400,
10
  "n_samples": 480000,
 
4
  },
5
  "chunk_length": 30,
6
  "feature_extractor_type": "WhisperFeatureExtractor",
7
+ "feature_size": 80,
8
  "hop_length": 160,
9
  "n_fft": 400,
10
  "n_samples": 480000,
processing_meralion.py CHANGED
@@ -46,7 +46,13 @@ class MERaLiONProcessor(ProcessorMixin):
46
  attributes = ["feature_extractor", "tokenizer"]
47
  feature_extractor_class = "WhisperFeatureExtractor"
48
  tokenizer_class = "GemmaTokenizer"
49
- valid_kwargs = ["fixed_speech_embeds_length", "speech_signature", "speech_token_index", "time_duration_limit", "do_normalize"]
 
 
 
 
 
 
50
 
51
  def __init__(
52
  self,
@@ -95,9 +101,9 @@ class MERaLiONProcessor(ProcessorMixin):
95
  audios: Union[np.ndarray, List[np.ndarray]] = None,
96
  padding: Union[bool, str, PaddingStrategy] = True,
97
  sampling_rate: Optional[int] = None,
98
- speech_signature = None,
99
- time_duration_limit = None,
100
- do_normalize = None,
101
  **kwargs,
102
  ) -> BatchFeature:
103
  """
@@ -125,6 +131,13 @@ class MERaLiONProcessor(ProcessorMixin):
125
  lengths).
126
  sampling_rate (`int`, defaults to 16000):
127
  The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
 
 
 
 
 
 
 
128
  """
129
 
130
  if text is None:
 
46
  attributes = ["feature_extractor", "tokenizer"]
47
  feature_extractor_class = "WhisperFeatureExtractor"
48
  tokenizer_class = "GemmaTokenizer"
49
+ valid_kwargs = [
50
+ "fixed_speech_embeds_length",
51
+ "speech_signature",
52
+ "speech_token_index",
53
+ "time_duration_limit",
54
+ "do_normalize"
55
+ ]
56
 
57
  def __init__(
58
  self,
 
101
  audios: Union[np.ndarray, List[np.ndarray]] = None,
102
  padding: Union[bool, str, PaddingStrategy] = True,
103
  sampling_rate: Optional[int] = None,
104
+ speech_signature: Optional[str] = None,
105
+ time_duration_limit: Optional[int] = None,
106
+ do_normalize: Optional[bool] = None,
107
  **kwargs,
108
  ) -> BatchFeature:
109
  """
 
131
  lengths).
132
  sampling_rate (`int`, defaults to 16000):
133
  The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
134
+ speech_signature (`str`, defaults to `<SpeechHere>`):
135
+ The special string marking the location of speech tokens.
136
+ time_duration_limit (`int`, defaults -1):
137
+ The max input time duration in seconds.
138
+ do_normalize (`bool`, defaults to `True`):
139
+ Whether or not to zero-mean unit-variance normalize the input.
140
+ Normalizing can help to significantly improve the performance of the model.
141
  """
142
 
143
  if text is None:
tokenizer_config.json CHANGED
@@ -1987,7 +1987,7 @@
1987
  "special": false
1988
  },
1989
  "255999": {
1990
- "content": "<speech_token>",
1991
  "lstrip": false,
1992
  "normalized": false,
1993
  "rstrip": false,
@@ -1999,7 +1999,7 @@
1999
  "<end_of_turn>"
2000
  ],
2001
  "auto_map": {
2002
- "AutoProcessor": "processing_merlion.MERaLiONProcessor"
2003
  },
2004
  "bos_token": "<bos>",
2005
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
 
1987
  "special": false
1988
  },
1989
  "255999": {
1990
+ "content": "<unused99>",
1991
  "lstrip": false,
1992
  "normalized": false,
1993
  "rstrip": false,
 
1999
  "<end_of_turn>"
2000
  ],
2001
  "auto_map": {
2002
+ "AutoProcessor": "processing_meralion.MERaLiONProcessor"
2003
  },
2004
  "bos_token": "<bos>",
2005
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",