File size: 2,061 Bytes
93b9482 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import csv
from pathlib import Path
def main(args):
"""
`uid syn ref text`
"""
in_root = Path(args.generation_root).resolve()
ext = args.audio_format
with open(args.audio_manifest) as f, open(args.output_path, "w") as f_out:
reader = csv.DictReader(
f, delimiter="\t", quotechar=None, doublequote=False,
lineterminator="\n", quoting=csv.QUOTE_NONE
)
header = ["id", "syn", "ref", "text", "speaker"]
f_out.write("\t".join(header) + "\n")
for row in reader:
dir_name = f"{ext}_{args.sample_rate}hz_{args.vocoder}"
id_ = row["id"]
syn = (in_root / dir_name / f"{id_}.{ext}").as_posix()
ref = row["audio"]
if args.use_resynthesized_target:
ref = (in_root / f"{dir_name}_tgt" / f"{id_}.{ext}").as_posix()
sample = [id_, syn, ref, row["tgt_text"], row["speaker"]]
f_out.write("\t".join(sample) + "\n")
print(f"wrote evaluation file to {args.output_path}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--generation-root", help="output directory for generate_waveform.py"
)
parser.add_argument(
"--audio-manifest",
help="used to determine the original utterance ID and text"
)
parser.add_argument(
"--output-path", help="path to output evaluation spec file"
)
parser.add_argument(
"--use-resynthesized-target", action="store_true",
help="use resynthesized reference instead of the original audio"
)
parser.add_argument("--vocoder", type=str, default="griffin_lim")
parser.add_argument("--sample-rate", type=int, default=22_050)
parser.add_argument("--audio-format", type=str, default="wav")
args = parser.parse_args()
main(args)
|