|
import torch |
|
from tqdm import tqdm |
|
from multiprocessing import Pool |
|
from mel_processing import spectrogram_torch, mel_spectrogram_torch |
|
from utils import load_wav_to_torch |
|
|
|
|
|
class AudioProcessor: |
|
def __init__( |
|
self, |
|
max_wav_value, |
|
use_mel_spec_posterior, |
|
filter_length, |
|
n_mel_channels, |
|
sampling_rate, |
|
hop_length, |
|
win_length, |
|
mel_fmin, |
|
mel_fmax, |
|
): |
|
self.max_wav_value = max_wav_value |
|
self.use_mel_spec_posterior = use_mel_spec_posterior |
|
self.filter_length = filter_length |
|
self.n_mel_channels = n_mel_channels |
|
self.sampling_rate = sampling_rate |
|
self.hop_length = hop_length |
|
self.win_length = win_length |
|
self.mel_fmin = mel_fmin |
|
self.mel_fmax = mel_fmax |
|
|
|
def process_audio(self, filename): |
|
audio, sampling_rate = load_wav_to_torch(filename) |
|
audio_norm = audio / self.max_wav_value |
|
audio_norm = audio_norm.unsqueeze(0) |
|
spec_filename = filename.replace(".wav", ".spec.pt") |
|
if self.use_mel_spec_posterior: |
|
spec_filename = spec_filename.replace(".spec.pt", ".mel.pt") |
|
try: |
|
spec = torch.load(spec_filename) |
|
except: |
|
if self.use_mel_spec_posterior: |
|
spec = mel_spectrogram_torch( |
|
audio_norm, |
|
self.filter_length, |
|
self.n_mel_channels, |
|
self.sampling_rate, |
|
self.hop_length, |
|
self.win_length, |
|
self.mel_fmin, |
|
self.mel_fmax, |
|
center=False, |
|
) |
|
else: |
|
spec = spectrogram_torch( |
|
audio_norm, |
|
self.filter_length, |
|
self.sampling_rate, |
|
self.hop_length, |
|
self.win_length, |
|
center=False, |
|
) |
|
spec = torch.squeeze(spec, 0) |
|
torch.save(spec, spec_filename) |
|
return spec, audio_norm |
|
|
|
|
|
|
|
processor = AudioProcessor( |
|
max_wav_value=32768.0, |
|
use_mel_spec_posterior=False, |
|
filter_length=2048, |
|
n_mel_channels=128, |
|
sampling_rate=44100, |
|
hop_length=512, |
|
win_length=2048, |
|
mel_fmin=0.0, |
|
mel_fmax="null", |
|
) |
|
|
|
with open("filelists/train.list", "r") as f: |
|
filepaths = [line.split("|")[0] for line in f] |
|
|
|
|
|
with Pool(processes=32) as pool: |
|
with tqdm(total=len(filepaths)) as pbar: |
|
for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)): |
|
pbar.update() |
|
|