akhaliq3
spaces demo
2b7bf83
raw
history blame
No virus
6.66 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2019 Tomoki Hayashi
# MIT License (https://opensource.org/licenses/MIT)
"""Normalize feature files and dump them."""
import argparse
import logging
import os
import numpy as np
import yaml
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from parallel_wavegan.datasets import AudioMelDataset
from parallel_wavegan.datasets import AudioMelSCPDataset
from parallel_wavegan.datasets import MelDataset
from parallel_wavegan.datasets import MelSCPDataset
from parallel_wavegan.utils import read_hdf5
from parallel_wavegan.utils import write_hdf5
def main():
"""Run preprocessing process."""
parser = argparse.ArgumentParser(
description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
)
parser.add_argument(
"--rootdir",
default=None,
type=str,
help="directory including feature files to be normalized. "
"you need to specify either *-scp or rootdir.",
)
parser.add_argument(
"--wav-scp",
default=None,
type=str,
help="kaldi-style wav.scp file. "
"you need to specify either *-scp or rootdir.",
)
parser.add_argument(
"--feats-scp",
default=None,
type=str,
help="kaldi-style feats.scp file. "
"you need to specify either *-scp or rootdir.",
)
parser.add_argument(
"--segments",
default=None,
type=str,
help="kaldi-style segments file.",
)
parser.add_argument(
"--dumpdir",
type=str,
required=True,
help="directory to dump normalized feature files.",
)
parser.add_argument(
"--stats",
type=str,
required=True,
help="statistics file.",
)
parser.add_argument(
"--skip-wav-copy",
default=False,
action="store_true",
help="whether to skip the copy of wav files.",
)
parser.add_argument(
"--config", type=str, required=True, help="yaml format configuration file."
)
parser.add_argument(
"--verbose",
type=int,
default=1,
help="logging level. higher is more logging. (default=1)",
)
args = parser.parse_args()
# set logger
if args.verbose > 1:
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
)
elif args.verbose > 0:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
)
else:
logging.basicConfig(
level=logging.WARN,
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
)
logging.warning("Skip DEBUG/INFO messages")
# load config
with open(args.config) as f:
config = yaml.load(f, Loader=yaml.Loader)
config.update(vars(args))
# check arguments
if (args.feats_scp is not None and args.rootdir is not None) or (
args.feats_scp is None and args.rootdir is None
):
raise ValueError("Please specify either --rootdir or --feats-scp.")
# check directory existence
if not os.path.exists(args.dumpdir):
os.makedirs(args.dumpdir)
# get dataset
if args.rootdir is not None:
if config["format"] == "hdf5":
audio_query, mel_query = "*.h5", "*.h5"
audio_load_fn = lambda x: read_hdf5(x, "wave") # NOQA
mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA
elif config["format"] == "npy":
audio_query, mel_query = "*-wave.npy", "*-feats.npy"
audio_load_fn = np.load
mel_load_fn = np.load
else:
raise ValueError("support only hdf5 or npy format.")
if not args.skip_wav_copy:
dataset = AudioMelDataset(
root_dir=args.rootdir,
audio_query=audio_query,
mel_query=mel_query,
audio_load_fn=audio_load_fn,
mel_load_fn=mel_load_fn,
return_utt_id=True,
)
else:
dataset = MelDataset(
root_dir=args.rootdir,
mel_query=mel_query,
mel_load_fn=mel_load_fn,
return_utt_id=True,
)
else:
if not args.skip_wav_copy:
dataset = AudioMelSCPDataset(
wav_scp=args.wav_scp,
feats_scp=args.feats_scp,
segments=args.segments,
return_utt_id=True,
)
else:
dataset = MelSCPDataset(
feats_scp=args.feats_scp,
return_utt_id=True,
)
logging.info(f"The number of files = {len(dataset)}.")
# restore scaler
scaler = StandardScaler()
if config["format"] == "hdf5":
scaler.mean_ = read_hdf5(args.stats, "mean")
scaler.scale_ = read_hdf5(args.stats, "scale")
elif config["format"] == "npy":
scaler.mean_ = np.load(args.stats)[0]
scaler.scale_ = np.load(args.stats)[1]
else:
raise ValueError("support only hdf5 or npy format.")
# from version 0.23.0, this information is needed
scaler.n_features_in_ = scaler.mean_.shape[0]
# process each file
for items in tqdm(dataset):
if not args.skip_wav_copy:
utt_id, audio, mel = items
else:
utt_id, mel = items
# normalize
mel = scaler.transform(mel)
# save
if config["format"] == "hdf5":
write_hdf5(
os.path.join(args.dumpdir, f"{utt_id}.h5"),
"feats",
mel.astype(np.float32),
)
if not args.skip_wav_copy:
write_hdf5(
os.path.join(args.dumpdir, f"{utt_id}.h5"),
"wave",
audio.astype(np.float32),
)
elif config["format"] == "npy":
np.save(
os.path.join(args.dumpdir, f"{utt_id}-feats.npy"),
mel.astype(np.float32),
allow_pickle=False,
)
if not args.skip_wav_copy:
np.save(
os.path.join(args.dumpdir, f"{utt_id}-wave.npy"),
audio.astype(np.float32),
allow_pickle=False,
)
else:
raise ValueError("support only hdf5 or npy format.")
if __name__ == "__main__":
main()