p-alonso commited on
Commit
c36f680
1 Parent(s): c6558f2

Upload feature extractor

Browse files
feature_extraction_maest.py CHANGED
@@ -99,12 +99,21 @@ class MAESTFeatureExtractor(SequenceFeatureExtractor):
99
  self.std = std
100
  self.return_attention_mask = return_attention_mask
101
 
102
- self.window = window_function(
 
 
 
 
 
 
 
 
 
103
  window_length=self.n_fft,
104
  name="hann",
105
- ).tolist()
106
 
107
- self.mel_fb = mel_filter_bank(
108
  num_frequency_bins=self.n_fft // 2 + 1,
109
  num_mel_filters=self.num_mel_bins,
110
  min_frequency=0,
@@ -112,24 +121,15 @@ class MAESTFeatureExtractor(SequenceFeatureExtractor):
112
  sampling_rate=self.sampling_rate,
113
  norm="slaney",
114
  mel_scale="slaney",
115
- ).tolist()
116
-
117
- def _extract_fbank_features(
118
- self,
119
- waveform: np.ndarray,
120
- max_length: int,
121
- ) -> np.ndarray:
122
- """
123
- Get mel-spectrogram features using audio_utils.
124
- """
125
 
126
  melspec = spectrogram(
127
  waveform,
128
- window=np.array(self.window),
129
  frame_length=self.n_fft,
130
  hop_length=self.hop_length,
131
  power=2,
132
- mel_filters=np.array(self.mel_fb),
133
  min_value=1e-30,
134
  mel_floor=1e-30,
135
  pad_mode="constant",
 
99
  self.std = std
100
  self.return_attention_mask = return_attention_mask
101
 
102
+ def _extract_fbank_features(
103
+ self,
104
+ waveform: np.ndarray,
105
+ max_length: int,
106
+ ) -> np.ndarray:
107
+ """
108
+ Get mel-spectrogram features using audio_utils.
109
+ """
110
+
111
+ window = window_function(
112
  window_length=self.n_fft,
113
  name="hann",
114
+ )
115
 
116
+ mel_fb = mel_filter_bank(
117
  num_frequency_bins=self.n_fft // 2 + 1,
118
  num_mel_filters=self.num_mel_bins,
119
  min_frequency=0,
 
121
  sampling_rate=self.sampling_rate,
122
  norm="slaney",
123
  mel_scale="slaney",
124
+ )
 
 
 
 
 
 
 
 
 
125
 
126
  melspec = spectrogram(
127
  waveform,
128
+ window=window,
129
  frame_length=self.n_fft,
130
  hop_length=self.hop_length,
131
  power=2,
132
+ mel_filters=mel_fb,
133
  min_value=1e-30,
134
  mel_floor=1e-30,
135
  pad_mode="constant",
preprocessor_config.json CHANGED
The diff for this file is too large to render. See raw diff