diff --git a/ParallelWaveGAN/.github/FUNDING.yml b/ParallelWaveGAN/.github/FUNDING.yml new file mode 100644 index 0000000000000000000000000000000000000000..d780f47ca5115bd8d2ce1dff4657f6453fe82b12 --- /dev/null +++ b/ParallelWaveGAN/.github/FUNDING.yml @@ -0,0 +1 @@ +github: kan-bayashi diff --git a/ParallelWaveGAN/.github/workflows/ci.yaml b/ParallelWaveGAN/.github/workflows/ci.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a333bdc461bf698e32cad7d1a1cc43b9943c90f --- /dev/null +++ b/ParallelWaveGAN/.github/workflows/ci.yaml @@ -0,0 +1,97 @@ +name: CI + +on: + push: + branches: + - master + pull_request: + branches: + - master + schedule: + - cron: 0 0 * * 1 + +jobs: + linter_and_test: + runs-on: ubuntu-20.04 + strategy: + max-parallel: 5 + matrix: + python-version: [3.6] + # 1.6 is failed on cpu: https://github.com/kan-bayashi/ParallelWaveGAN/issues/198 + pytorch-version: [1.4, 1.5.1, 1.7.1, 1.8.1, 1.9] + steps: + - uses: actions/checkout@master + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + architecture: 'x64' + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-pip-${{ hashFiles('**/setup.py') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-pip- + - name: Install dependencies + run: | + sudo apt-get install libsndfile-dev + # make python env + cd tools; make CUDA_VERSION="" PYTHON=python${{ matrix.python-version }} PYTORCH_VERSION=${{ matrix.pytorch-version }} + # install shell check + wget https://github.com/koalaman/shellcheck/releases/download/stable/shellcheck-stable.linux.x86_64.tar.xz + tar -xvf shellcheck-stable.linux.x86_64.tar.xz + - name: ShellCheck + run: | + export PATH=shellcheck-stable:$PATH + find egs -name "*.sh" | grep -v path.sh | while read line; do shellcheck -x --shell=bash -P $(dirname $line) ${line}; done + - name: Black & Flake8 + run: | + source tools/venv/bin/activate + black --diff parallel_wavegan + flake8 parallel_wavegan + flake8 --extend-ignore=D test + - name: Pytest + run: | + source tools/venv/bin/activate + pytest test + + integration: + runs-on: ubuntu-20.04 + strategy: + max-parallel: 10 + matrix: + python-version: [3.7] + pytorch-version: [1.9] + config: + - "parallel_wavegan.v1.debug.yaml" + - "melgan.v1.debug.yaml" + - "melgan.v3.debug.yaml" + - "multi_band_melgan.v1.debug.yaml" + - "parallel_wavegan.v1.debug.npy.yaml" + - "parallel_wavegan.v1.debug.diff_fs.yaml" + - "hifigan.v1.debug.yaml" + - "style_melgan.v1.debug.yaml" + steps: + - uses: actions/checkout@master + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + architecture: 'x64' + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-pip-${{ hashFiles('**/setup.py') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-pip- + - name: Install dependencies + run: | + sudo apt-get install libsndfile-dev jq + # make python env + cd tools; make CUDA_VERSION="" PYTHON=python${{ matrix.python-version }} PYTORCH_VERSION=${{ matrix.pytorch-version }} + - name: Integration + run: | + cd egs/yesno/voc1 && ./run.sh --conf conf/${{ matrix.config }} + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: artifacts-${{ matrix.config }} + path: egs/yesno/voc1 diff --git a/ParallelWaveGAN/.gitignore b/ParallelWaveGAN/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..80f35e4c3ec6653bc3cde6b0c89b9c215730c844 --- /dev/null +++ b/ParallelWaveGAN/.gitignore @@ -0,0 +1,36 @@ +# general +*~ +*.pyc +\#*\# +.\#* +*DS_Store +out.txt +parallel_wavegan.egg-info/ +doc/_build +slurm-*.out +tmp* +.eggs/ +.hypothesis/ +.idea +.backup/ +.pytest_cache/ +__pycache__/ +.coverage* +coverage.xml* +.vscode* +.nfs* +.ipynb_checkpoints +.d000* +*.out +*.err + +# recipe related +egs/*/*/data +egs/*/*/downloads +egs/*/*/dump +egs/*/*/exp +egs/*/*/conf/tuning + +# tools related +tools/venv/ +tools/apex/ diff --git a/ParallelWaveGAN/LICENSE b/ParallelWaveGAN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..1ac590bf4864dbf3bf32f59709dc8ea87e8cfb02 --- /dev/null +++ b/ParallelWaveGAN/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2020 Tomoki Hayashi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/ParallelWaveGAN/egs/README.md b/ParallelWaveGAN/egs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..197444c11e73febf0da40a5db49f9908dfe598f3 --- /dev/null +++ b/ParallelWaveGAN/egs/README.md @@ -0,0 +1,165 @@ +# Kaldi-style all-in-one recipes + +This repository provides [Kaldi](https://github.com/kaldi-asr/kaldi)-style recipes, as the same as [ESPnet](https://github.com/espnet/espnet). +Currently, the following recipes are supported. + +- [LJSpeech](https://keithito.com/LJ-Speech-Dataset/): English female speaker +- [JSUT](https://sites.google.com/site/shinnosuketakamichi/publication/jsut): Japanese female speaker +- [JSSS](https://sites.google.com/site/shinnosuketakamichi/research-topics/jsss_corpus): Japanese female speaker +- [CSMSC](https://www.data-baker.com/open_source.html): Mandarin female speaker +- [CMU Arctic](http://www.festvox.org/cmu_arctic/): English speakers +- [JNAS](http://research.nii.ac.jp/src/en/JNAS.html): Japanese multi-speaker +- [VCTK](https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html): English multi-speaker +- [LibriTTS](https://arxiv.org/abs/1904.02882): English multi-speaker +- [YesNo](https://arxiv.org/abs/1904.02882): English speaker (For debugging) + + +## How to run the recipe + +```bash +# Let us move on the recipe directory +$ cd egs/ljspeech/voc1 + +# Run the recipe from scratch +$ ./run.sh + +# You can change config via command line +$ ./run.sh --conf + +# You can select the stage to start and stop +$ ./run.sh --stage 2 --stop_stage 2 + +# If you want to specify the gpu +$ CUDA_VISIBLE_DEVICES=1 ./run.sh --stage 2 + +# If you want to resume training from 10000 steps checkpoint +$ ./run.sh --stage 2 --resume //checkpoint-10000steps.pkl +``` + +You can check the command line options in `run.sh`. + +The integration with job schedulers such as [slurm](https://slurm.schedmd.com/documentation.html) can be done via `cmd.sh` and `conf/slurm.conf`. +If you want to use it, please check [this page](https://kaldi-asr.org/doc/queue.html). + +All of the hyperparameters are written in a single yaml format configuration file. +Please check [this example](https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/egs/ljspeech/voc1/conf/parallel_wavegan.v1.yaml) in ljspeech recipe. + +You can monitor the training progress via tensorboard. + +```bash +$ tensorboard --logdir exp +``` + +![](https://user-images.githubusercontent.com/22779813/68100080-58bbc500-ff09-11e9-9945-c835186fd7c2.png) + +If you want to accelerate the training, you can try distributed multi-gpu training based on apex. +You need to install apex for distributed training. Please make sure you already installed it. +Then you can run distributed multi-gpu training via following command: + +```bash +# in the case of the number of gpus = 8 +$ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" ./run.sh --stage 2 --n_gpus 8 +``` + +In the case of distributed training, the batch size will be automatically multiplied by the number of gpus. +Please be careful. + +## How to make the recipe for your own dateset + +Here, I will show how to make the recipe for your own dataset. + +1. Setup your dataset to be the following structure. + + ```bash + # For single-speaker case + $ tree /path/to/databse + /path/to/database + ├── utt_1.wav + ├── utt_2.wav + │ ... + └── utt_N.wav + # The directory can be nested, but each filename must be unique + + # For multi-speaker case + $ tree /path/to/databse + /path/to/database + ├── spk_1 + │ ├── utt1.wav + ├── spk_2 + │ ├── utt1.wav + │ ... + └── spk_N + ├── utt1.wav + ... + # The directory under each speaker can be nested, but each filename in each speaker directory must be unique + ``` + +2. Copy the template directory. + + ```bash + cd egs + + # For single speaker case + cp -r template_single_spk + + # For multi speaker case + cp -r template_multi_spk + + # Move on your recipe + cd egs//voc1 + ``` + +3. Modify the options in `run.sh`. + What you need to change at least in `run.sh` is as follows: + - `db_root`: Root path of the database. + - `num_dev`: The number of utterances for development set. + - `num_eval`: The number of utterances for evaluation set. + +4. Modify the hyperpameters in `conf/parallel_wavegan.v1.yaml`. + What you need to change at least in config is as follows: + - `sampling_rate`: If you can specify the lower sampling rate, the audio will be downsampled by sox. + +5. (Optional) Change command backend in `cmd.sh`. + If you are not familiar with kaldi and run in your local env, you do not need to change. + See more info on https://kaldi-asr.org/doc/queue.html. + +6. Run your recipe. + + ```bash + # Run all stages from the first stage + ./run.sh + + # If you want to specify CUDA device + CUDA_VISIBLE_DEVICES=0 ./run.sh + ``` + +If you want to try the other advanced model, please check the config files in `egs/ljspeech/voc1/conf`. + +## Run training using ESPnet2-TTS recipe within 5 minutes + +Make sure already you finished the espnet2-tts recipe experiments (at least starting the training). + +```bash +cd egs + +# Please use single spk template for both single and multi spk case +cp -r template_single_spk + +# Move on your recipe +cd egs//voc1 + +# Make symlink of data directory (Better to use absolute path) +mkdir dump data +ln -s /path/to/espnet/egs2//tts1/dump/raw dump/ +ln -s /path/to/espnet/egs2//tts1/dump/raw/tr_no_dev data/train_nodev +ln -s /path/to/espnet/egs2//tts1/dump/raw/dev data/dev +ln -s /path/to/espnet/egs2//tts1/dump/raw/eval1 data/eval + +# Edit config to match TTS model setting +vim conf/parallel_wavegan.v1.yaml + +# Run from stage 1 +./run.sh --stage 1 --conf conf/parallel_wavegan.v1.yaml +``` + +That's it! diff --git a/ParallelWaveGAN/egs/arctic/voc1/cmd.sh b/ParallelWaveGAN/egs/arctic/voc1/cmd.sh new file mode 100644 index 0000000000000000000000000000000000000000..19f342102fc4f3389157c48f1196b16b68eb1cf1 --- /dev/null +++ b/ParallelWaveGAN/egs/arctic/voc1/cmd.sh @@ -0,0 +1,91 @@ +# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== +# Usage: .pl [options] JOB=1: +# e.g. +# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB +# +# Options: +# --time