|
|
|
|
|
|
|
|
|
|
|
|
|
import csv |
|
from pathlib import Path |
|
|
|
|
|
def main(args): |
|
""" |
|
`uid syn ref text` |
|
""" |
|
in_root = Path(args.generation_root).resolve() |
|
ext = args.audio_format |
|
with open(args.audio_manifest) as f, open(args.output_path, "w") as f_out: |
|
reader = csv.DictReader( |
|
f, delimiter="\t", quotechar=None, doublequote=False, |
|
lineterminator="\n", quoting=csv.QUOTE_NONE |
|
) |
|
header = ["id", "syn", "ref", "text", "speaker"] |
|
f_out.write("\t".join(header) + "\n") |
|
for row in reader: |
|
dir_name = f"{ext}_{args.sample_rate}hz_{args.vocoder}" |
|
id_ = row["id"] |
|
syn = (in_root / dir_name / f"{id_}.{ext}").as_posix() |
|
ref = row["audio"] |
|
if args.use_resynthesized_target: |
|
ref = (in_root / f"{dir_name}_tgt" / f"{id_}.{ext}").as_posix() |
|
sample = [id_, syn, ref, row["tgt_text"], row["speaker"]] |
|
f_out.write("\t".join(sample) + "\n") |
|
print(f"wrote evaluation file to {args.output_path}") |
|
|
|
|
|
if __name__ == "__main__": |
|
import argparse |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--generation-root", help="output directory for generate_waveform.py" |
|
) |
|
parser.add_argument( |
|
"--audio-manifest", |
|
help="used to determine the original utterance ID and text" |
|
) |
|
parser.add_argument( |
|
"--output-path", help="path to output evaluation spec file" |
|
) |
|
parser.add_argument( |
|
"--use-resynthesized-target", action="store_true", |
|
help="use resynthesized reference instead of the original audio" |
|
) |
|
parser.add_argument("--vocoder", type=str, default="griffin_lim") |
|
parser.add_argument("--sample-rate", type=int, default=22_050) |
|
parser.add_argument("--audio-format", type=str, default="wav") |
|
args = parser.parse_args() |
|
|
|
main(args) |
|
|