| | from speaker_encoder import inference as encoder |
| | from multiprocessing.pool import Pool |
| | from functools import partial |
| | from pathlib import Path |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | def embed_utterance(fpaths, encoder_model_fpath): |
| | if not encoder.is_loaded(): |
| | encoder.load_model(encoder_model_fpath) |
| |
|
| | |
| | wav_fpath, embed_fpath = fpaths |
| | wav = np.load(wav_fpath) |
| | wav = encoder.preprocess_wav(wav) |
| | embed = encoder.embed_utterance(wav) |
| | np.save(embed_fpath, embed, allow_pickle=False) |
| | |
| | |
| | def create_embeddings(outdir_root: Path, wav_dir: Path, encoder_model_fpath: Path, n_processes: int): |
| |
|
| | wav_dir = outdir_root.joinpath("audio") |
| | metadata_fpath = synthesizer_root.joinpath("train.txt") |
| | assert wav_dir.exists() and metadata_fpath.exists() |
| | embed_dir = synthesizer_root.joinpath("embeds") |
| | embed_dir.mkdir(exist_ok=True) |
| | |
| | |
| | with metadata_fpath.open("r") as metadata_file: |
| | metadata = [line.split("|") for line in metadata_file] |
| | fpaths = [(wav_dir.joinpath(m[0]), embed_dir.joinpath(m[2])) for m in metadata] |
| | |
| | |
| | |
| | func = partial(embed_utterance, encoder_model_fpath=encoder_model_fpath) |
| | job = Pool(n_processes).imap(func, fpaths) |
| | list(tqdm(job, "Embedding", len(fpaths), unit="utterances")) |