Spaces:

robinhad
/

ukrainian-tts

Running

Yurii Paniv commited on Dec 10, 2022

Commit

eb57397

•

1 Parent(s): 2b6da0d

Improve training steps

Files changed (3) hide show

training/.gitignore CHANGED Viewed

training/STEPS.md CHANGED Viewed

@@ -2,10 +2,9 @@ Setup env
 Link: https://espnet.github.io/espnet/installation.html
 0. `sudo apt-get install cmake sox libsndfile1-dev ffmpeg`
-1. `git clone https://github.com/espnet/espnet@cb06bb1a9e5e5355a02d8c1871a8ecfafd54754d`
-`conda create -p ./.venv python=3.8`
- `conda install -c anaconda cudatoolkit`
 2. `cd ./espnet/tools`
 3. `CONDA_TOOLS_DIR=$(dirname ${CONDA_EXE})/..`
 ./setup_anaconda.sh ${CONDA_TOOLS_DIR} espnet 3.8
 5. `make`
@@ -16,4 +15,19 @@ make
 # run training
 cd ../egs2/ljspeech/tts1
-./run.sh

 Link: https://espnet.github.io/espnet/installation.html
 0. `sudo apt-get install cmake sox libsndfile1-dev ffmpeg`
+1. `git clone --branch v.202209 https://github.com/espnet/espnet`
 2. `cd ./espnet/tools`
+./setup_anaconda.sh anaconda espnet 3.8
 3. `CONDA_TOOLS_DIR=$(dirname ${CONDA_EXE})/..`
 ./setup_anaconda.sh ${CONDA_TOOLS_DIR} espnet 3.8
 5. `make`
 # run training
 cd ../egs2/ljspeech/tts1
+./run.sh
+./run.sh \
+    --stage 2 \
+    --use_sid true \
+    --fs 22050 \
+    --n_fft 1024 \
+    --n_shift 256 \
+    --win_length null \
+    --dumpdir dump/22k \
+    --expdir exp/22k \
+    --tts_task gan_tts \
+    --feats_extract linear_spectrogram \
+    --feats_normalize none \
+    --train_config ./conf/tuning/train_vits.yaml \
+    --inference_config ./conf/tuning/decode_vits.yaml

training/train_vits.yaml CHANGED Viewed

@@ -16,8 +16,8 @@ tts_conf:
     generator_type: vits_generator
     generator_params:
         hidden_channels: 192
-        spks: -1
-        global_channels: -1
         segment_size: 32
         text_encoder_attention_heads: 2
         text_encoder_ffn_expand: 4
@@ -159,16 +159,18 @@ generator_first: false # whether to start updating generator first
 #                OTHER TRAINING SETTING                  #
 ##########################################################
 #num_iters_per_epoch: 1000 # number of iterations per epoch
-max_epoch: 1000           # number of epochs
 accum_grad: 1             # gradient accumulation
-batch_bins: 9000000       # batch bins (feats_type=raw)
 batch_type: numel         # how to make batch
 grad_clip: -1             # gradient clipping norm
 grad_noise: false         # whether to use gradient noise injection
 sort_in_batch: descending # how to sort data in making batch
 sort_batch: descending    # how to sort created batches
-num_workers: 12            # number of workers of data loader
 use_amp: false            # whether to use pytorch amp
 log_interval: 50          # log interval in iterations
 keep_nbest_models: 10     # number of models to keep
 num_att_plot: 3           # number of attention figures to be saved in every check

     generator_type: vits_generator
     generator_params:
         hidden_channels: 192
+        spks: 128
+        global_channels: 256
         segment_size: 32
         text_encoder_attention_heads: 2
         text_encoder_ffn_expand: 4
 #                OTHER TRAINING SETTING                  #
 ##########################################################
 #num_iters_per_epoch: 1000 # number of iterations per epoch
+max_epoch: 30           # number of epochs
 accum_grad: 1             # gradient accumulation
+batch_bins: 1900000       # batch bins (feats_type=raw)
 batch_type: numel         # how to make batch
+#batch_type: sorted           # how to make batchbatch_size: 1
 grad_clip: -1             # gradient clipping norm
 grad_noise: false         # whether to use gradient noise injection
 sort_in_batch: descending # how to sort data in making batch
 sort_batch: descending    # how to sort created batches
+num_workers: 1            # number of workers of data loader
 use_amp: false            # whether to use pytorch amp
+train_dtype: float32
 log_interval: 50          # log interval in iterations
 keep_nbest_models: 10     # number of models to keep
 num_att_plot: 3           # number of attention figures to be saved in every check