#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Copyright 2019 Tomoki Hayashi # MIT License (https://opensource.org/licenses/MIT) """Normalize feature files and dump them.""" import argparse import logging import os import numpy as np import yaml from sklearn.preprocessing import StandardScaler from tqdm import tqdm from parallel_wavegan.datasets import AudioMelDataset from parallel_wavegan.datasets import AudioMelSCPDataset from parallel_wavegan.datasets import MelDataset from parallel_wavegan.datasets import MelSCPDataset from parallel_wavegan.utils import read_hdf5 from parallel_wavegan.utils import write_hdf5 def main(): """Run preprocessing process.""" parser = argparse.ArgumentParser( description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)." ) parser.add_argument( "--rootdir", default=None, type=str, help="directory including feature files to be normalized. " "you need to specify either *-scp or rootdir.", ) parser.add_argument( "--wav-scp", default=None, type=str, help="kaldi-style wav.scp file. " "you need to specify either *-scp or rootdir.", ) parser.add_argument( "--feats-scp", default=None, type=str, help="kaldi-style feats.scp file. " "you need to specify either *-scp or rootdir.", ) parser.add_argument( "--segments", default=None, type=str, help="kaldi-style segments file.", ) parser.add_argument( "--dumpdir", type=str, required=True, help="directory to dump normalized feature files.", ) parser.add_argument( "--stats", type=str, required=True, help="statistics file.", ) parser.add_argument( "--skip-wav-copy", default=False, action="store_true", help="whether to skip the copy of wav files.", ) parser.add_argument( "--config", type=str, required=True, help="yaml format configuration file." ) parser.add_argument( "--verbose", type=int, default=1, help="logging level. higher is more logging. (default=1)", ) args = parser.parse_args() # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) else: logging.basicConfig( level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) logging.warning("Skip DEBUG/INFO messages") # load config with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check arguments if (args.feats_scp is not None and args.rootdir is not None) or ( args.feats_scp is None and args.rootdir is None ): raise ValueError("Please specify either --rootdir or --feats-scp.") # check directory existence if not os.path.exists(args.dumpdir): os.makedirs(args.dumpdir) # get dataset if args.rootdir is not None: if config["format"] == "hdf5": audio_query, mel_query = "*.h5", "*.h5" audio_load_fn = lambda x: read_hdf5(x, "wave") # NOQA mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": audio_query, mel_query = "*-wave.npy", "*-feats.npy" audio_load_fn = np.load mel_load_fn = np.load else: raise ValueError("support only hdf5 or npy format.") if not args.skip_wav_copy: dataset = AudioMelDataset( root_dir=args.rootdir, audio_query=audio_query, mel_query=mel_query, audio_load_fn=audio_load_fn, mel_load_fn=mel_load_fn, return_utt_id=True, ) else: dataset = MelDataset( root_dir=args.rootdir, mel_query=mel_query, mel_load_fn=mel_load_fn, return_utt_id=True, ) else: if not args.skip_wav_copy: dataset = AudioMelSCPDataset( wav_scp=args.wav_scp, feats_scp=args.feats_scp, segments=args.segments, return_utt_id=True, ) else: dataset = MelSCPDataset( feats_scp=args.feats_scp, return_utt_id=True, ) logging.info(f"The number of files = {len(dataset)}.") # restore scaler scaler = StandardScaler() if config["format"] == "hdf5": scaler.mean_ = read_hdf5(args.stats, "mean") scaler.scale_ = read_hdf5(args.stats, "scale") elif config["format"] == "npy": scaler.mean_ = np.load(args.stats)[0] scaler.scale_ = np.load(args.stats)[1] else: raise ValueError("support only hdf5 or npy format.") # from version 0.23.0, this information is needed scaler.n_features_in_ = scaler.mean_.shape[0] # process each file for items in tqdm(dataset): if not args.skip_wav_copy: utt_id, audio, mel = items else: utt_id, mel = items # normalize mel = scaler.transform(mel) # save if config["format"] == "hdf5": write_hdf5( os.path.join(args.dumpdir, f"{utt_id}.h5"), "feats", mel.astype(np.float32), ) if not args.skip_wav_copy: write_hdf5( os.path.join(args.dumpdir, f"{utt_id}.h5"), "wave", audio.astype(np.float32), ) elif config["format"] == "npy": np.save( os.path.join(args.dumpdir, f"{utt_id}-feats.npy"), mel.astype(np.float32), allow_pickle=False, ) if not args.skip_wav_copy: np.save( os.path.join(args.dumpdir, f"{utt_id}-wave.npy"), audio.astype(np.float32), allow_pickle=False, ) else: raise ValueError("support only hdf5 or npy format.") if __name__ == "__main__": main()