Spaces:
Build error
Build error
# Copyright 2022 The MT3 Authors. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Audio spectrogram functions.""" | |
import dataclasses | |
from ddsp import spectral_ops | |
import tensorflow as tf | |
# defaults for spectrogram config | |
DEFAULT_SAMPLE_RATE = 16000 | |
DEFAULT_HOP_WIDTH = 128 | |
DEFAULT_NUM_MEL_BINS = 512 | |
# fixed constants; add these to SpectrogramConfig before changing | |
FFT_SIZE = 2048 | |
MEL_LO_HZ = 20.0 | |
class SpectrogramConfig: | |
"""Spectrogram configuration parameters.""" | |
sample_rate: int = DEFAULT_SAMPLE_RATE | |
hop_width: int = DEFAULT_HOP_WIDTH | |
num_mel_bins: int = DEFAULT_NUM_MEL_BINS | |
def abbrev_str(self): | |
s = '' | |
if self.sample_rate != DEFAULT_SAMPLE_RATE: | |
s += 'sr%d' % self.sample_rate | |
if self.hop_width != DEFAULT_HOP_WIDTH: | |
s += 'hw%d' % self.hop_width | |
if self.num_mel_bins != DEFAULT_NUM_MEL_BINS: | |
s += 'mb%d' % self.num_mel_bins | |
return s | |
def frames_per_second(self): | |
return self.sample_rate / self.hop_width | |
def split_audio(samples, spectrogram_config): | |
"""Split audio into frames.""" | |
return tf.signal.frame( | |
samples, | |
frame_length=spectrogram_config.hop_width, | |
frame_step=spectrogram_config.hop_width, | |
pad_end=True) | |
def compute_spectrogram(samples, spectrogram_config): | |
"""Compute a mel spectrogram.""" | |
overlap = 1 - (spectrogram_config.hop_width / FFT_SIZE) | |
return spectral_ops.compute_logmel( | |
samples, | |
bins=spectrogram_config.num_mel_bins, | |
lo_hz=MEL_LO_HZ, | |
overlap=overlap, | |
fft_size=FFT_SIZE, | |
sample_rate=spectrogram_config.sample_rate) | |
def flatten_frames(frames): | |
"""Convert frames back into a flat array of samples.""" | |
return tf.reshape(frames, [-1]) | |
def input_depth(spectrogram_config): | |
return spectrogram_config.num_mel_bins | |