File size: 2,425 Bytes
7ee3434 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import torch
# ZERO = 1e-12
def gaussian_normalize_mel_channel(mel, mu, sigma):
"""
Shift to Standorm Normal Distribution
Args:
mel: (n_mels, frame_len)
mu: (n_mels,), mean value
sigma: (n_mels,), sd value
Return:
Tensor like mel
"""
mu = np.expand_dims(mu, -1)
sigma = np.expand_dims(sigma, -1)
return (mel - mu) / sigma
def de_gaussian_normalize_mel_channel(mel, mu, sigma):
"""
Args:
mel: (n_mels, frame_len)
mu: (n_mels,), mean value
sigma: (n_mels,), sd value
Return:
Tensor like mel
"""
mu = np.expand_dims(mu, -1)
sigma = np.expand_dims(sigma, -1)
return sigma * mel + mu
def decompress(audio_compressed, bits):
mu = 2**bits - 1
audio = np.sign(audio_compressed) / mu * ((1 + mu) ** np.abs(audio_compressed) - 1)
return audio
def compress(audio, bits):
mu = 2**bits - 1
audio_compressed = np.sign(audio) * np.log(1 + mu * np.abs(audio)) / np.log(mu + 1)
return audio_compressed
def label_to_audio(quant, bits):
classes = 2**bits
audio = 2 * quant / (classes - 1.0) - 1.0
return audio
def audio_to_label(audio, bits):
"""Normalized audio data tensor to digit array
Args:
audio (tensor): audio data
bits (int): data bits
Returns:
array<int>: digit array of audio data
"""
classes = 2**bits
# initialize an increasing array with values from -1 to 1
bins = np.linspace(-1, 1, classes)
# change value in audio tensor to digits
quant = np.digitize(audio, bins) - 1
return quant
def label_to_onehot(x, bits):
"""Converts a class vector (integers) to binary class matrix.
Args:
x: class vector to be converted into a matrix
(integers from 0 to num_classes).
num_classes: total number of classes.
Returns:
A binary matrix representation of the input. The classes axis
is placed last.
"""
classes = 2**bits
result = torch.zeros((x.shape[0], classes), dtype=torch.float32)
for i in range(x.shape[0]):
result[i, x[i]] = 1
output_shape = x.shape + (classes,)
output = torch.reshape(result, output_shape)
return output
|