Spaces:
Runtime error
Runtime error
| # This script is an example of evaluate TTS models with objective metrics reported in ZipVoice-Dialog paper. | |
| # Add project root to PYTHONPATH | |
| export PYTHONPATH=../../:$PYTHONPATH | |
| # Set bash to 'debug' mode, it will exit on: | |
| # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', | |
| set -e | |
| set -u | |
| set -o pipefail | |
| stage=1 | |
| stop_stage=6 | |
| download_dir=download/ | |
| # Uncomment this line to use HF mirror | |
| # export HF_ENDPOINT=https://hf-mirror.com | |
| if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then | |
| echo "Stage 1: Download test sets (test-dialog)" | |
| hf_repo=k2-fsa/TTS_eval_datasets | |
| mkdir -p ${download_dir}/ | |
| file=dialog_testset.tar.gz | |
| echo "Downloading ${file}..." | |
| huggingface-cli download \ | |
| --repo-type dataset \ | |
| --local-dir ${download_dir}/ \ | |
| ${hf_repo} \ | |
| ${file} | |
| echo "Extracting ${file}..." | |
| tar -xzf ${download_dir}/${file} -C ${download_dir}/ | |
| fi | |
| if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then | |
| echo "Stage 2: Download all required evaluation models" | |
| mkdir -p ${download_dir}/tts_eval_models | |
| mkdir -p ${download_dir} | |
| huggingface-cli download \ | |
| --local-dir ${download_dir}/tts_eval_models \ | |
| ${hf_repo} | |
| fi | |
| if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then | |
| echo "Stage 3: Inference with the pre-trained ZipVoice model from huggingface" | |
| for testset in test_dialog_en test_dialog_zh; do | |
| if [ "$testset" = "test_dialog_en" ]; then | |
| test_tsv=${download_dir}/dialog_testset/en/test.tsv | |
| elif [ "$testset" = "test_dialog_zh" ]; then | |
| test_tsv=${download_dir}/dialog_testset/zh/test.tsv | |
| else | |
| echo "Error: unknown testset ${testset}" >&2 | |
| exit 1 | |
| fi | |
| echo "Inference on tetset ${testset}..." | |
| python3 -m zipvoice.bin.infer_zipvoice_dialog \ | |
| --model-name zipvoice_dialog \ | |
| --test-list ${test_tsv} \ | |
| --res-dir results/${testset} | |
| done | |
| fi | |
| if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then | |
| echo "Stage 4: Evaluation on test-dialog-en" | |
| model_path=${download_dir}/tts_eval_models | |
| wav_path=results/test_dialog_en | |
| test_tsv=${download_dir}/dialog_testset/en/test.tsv | |
| python3 -m zipvoice.eval.speaker_similarity.cpsim \ | |
| --wav-path ${wav_path} \ | |
| --test-list ${test_tsv} \ | |
| --model-dir ${model_path} | |
| python3 -m zipvoice.eval.wer.dialog \ | |
| --wav-path ${wav_path} \ | |
| --test-list ${test_tsv} \ | |
| --model-dir ${model_path} \ | |
| --lang en | |
| # cpWER mode: will only compute WER and cpWER | |
| # for speech less than 30s | |
| python3 -m zipvoice.eval.wer.dialog \ | |
| --wav-path ${wav_path} \ | |
| --test-list ${test_tsv} \ | |
| --model-dir ${model_path} \ | |
| --lang en \ | |
| --cpwer | |
| python3 -m zipvoice.eval.mos.utmos \ | |
| --wav-path ${wav_path} \ | |
| --model-dir ${model_path} | |
| fi | |
| if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then | |
| echo "Stage 5: Evaluation on test-dialog-zh" | |
| model_path=${download_dir}/tts_eval_models | |
| wav_path=results/test_dialog_zh | |
| test_tsv=${download_dir}/dialog_testset/zh/test.tsv | |
| python3 -m zipvoice.eval.speaker_similarity.cpsim \ | |
| --wav-path ${wav_path} \ | |
| --test-list ${test_tsv} \ | |
| --model-dir ${model_path} | |
| python3 -m zipvoice.eval.wer.dialog \ | |
| --wav-path ${wav_path} \ | |
| --test-list ${test_tsv} \ | |
| --model-dir ${model_path} \ | |
| --lang zh | |
| python3 -m zipvoice.eval.mos.utmos \ | |
| --wav-path ${wav_path} \ | |
| --model-dir ${model_path} | |
| fi |