language-and-voice-lab
/

talromur_1and2_style_melgan

Model card Files Files and versions Community

GunnarThor commited on Mar 22

Commit

c885237

•

1 Parent(s): aa1d68c

add model files

Browse files

Files changed (4) hide show

README.md +9 -0
checkpoint-1500000steps.pkl +3 -0
config.yml +167 -0
stats.h5 +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,12 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
 ---
+This model was trained by Gunnar Thor Örnólfsson in 2023
+It uses a combination of the Talrómur 1 and 2 corpora, totalling 44 speakers (with 4 speakers being held back for evaluation.
+All 8 voices in Talrómur 1 were used.
+36 out of the 40 voices in Talrómur 2 were used.
+The model was trained for 1.500.000 steps using Tomoki Hayashi's implementation of Style-MelGAN: https://github.com/kan-bayashi/ParallelWaveGAN/

checkpoint-1500000steps.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fb6b455b89a754472d208e2e33d8536ed426de6c597ae48f27ed91de98425a4
+size 114014636

config.yml ADDED Viewed

	@@ -0,0 +1,167 @@

+allow_cache: false
+batch_max_steps: 22528
+batch_size: 32
+config: conf/style_melgan.v1.yaml
+dev_dumpdir: dump/dev_1and2/norm
+dev_feats_scp: null
+dev_segments: null
+dev_wav_scp: null
+discriminator_adv_loss_params:
+  average_by_discriminators: false
+discriminator_grad_norm: -1
+discriminator_optimizer_params:
+  betas:
+  - 0.5
+  - 0.9
+  lr: 0.0002
+  weight_decay: 0.0
+discriminator_optimizer_type: Adam
+discriminator_params:
+  discriminator_params:
+    bias: true
+    channels: 16
+    downsample_scales:
+    - 4
+    - 4
+    - 4
+    - 1
+    kernel_sizes:
+    - 5
+    - 3
+    max_downsample_channels: 512
+    nonlinear_activation: LeakyReLU
+    nonlinear_activation_params:
+      negative_slope: 0.2
+    out_channels: 1
+  pqmf_params:
+  - - 1
+    - None
+    - None
+    - None
+  - - 2
+    - 62
+    - 0.267
+    - 9.0
+  - - 4
+    - 62
+    - 0.142
+    - 9.0
+  - - 8
+    - 62
+    - 0.07949
+    - 9.0
+  repeats: 4
+  use_weight_norm: true
+  window_sizes:
+  - 512
+  - 1024
+  - 2048
+  - 4096
+discriminator_scheduler_params:
+  gamma: 0.5
+  milestones:
+  - 200000
+  - 400000
+  - 600000
+  - 800000
+discriminator_scheduler_type: MultiStepLR
+discriminator_train_start_steps: 100000
+discriminator_type: StyleMelGANDiscriminator
+distributed: false
+eval_interval_steps: 1000
+fft_size: 1024
+fmax: 7600
+fmin: 80
+format: hdf5
+generator_adv_loss_params:
+  average_by_discriminators: false
+generator_grad_norm: -1
+generator_optimizer_params:
+  betas:
+  - 0.5
+  - 0.9
+  lr: 0.0001
+  weight_decay: 0.0
+generator_optimizer_type: Adam
+generator_params:
+  aux_channels: 80
+  bias: true
+  channels: 64
+  dilation: 2
+  gated_function: softmax
+  in_channels: 128
+  kernel_size: 9
+  noise_upsample_activation: LeakyReLU
+  noise_upsample_activation_params:
+    negative_slope: 0.2
+  noise_upsample_scales:
+  - 11
+  - 2
+  - 2
+  - 2
+  out_channels: 1
+  upsample_mode: nearest
+  upsample_scales:
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 1
+  use_weight_norm: true
+generator_scheduler_params:
+  gamma: 0.5
+  milestones:
+  - 100000
+  - 300000
+  - 500000
+  - 700000
+  - 900000
+generator_scheduler_type: MultiStepLR
+generator_type: StyleMelGANGenerator
+global_gain_scale: 1.0
+hop_size: 256
+lambda_adv: 1.0
+lambda_aux: 1.0
+log_interval_steps: 100
+num_mels: 80
+num_save_intermediate_results: 4
+num_workers: 2
+outdir: exp/train_1and2_style_melgan.v1
+pin_memory: true
+pretrain: ''
+rank: 0
+remove_short_samples: false
+resume: ''
+sampling_rate: 22050
+save_interval_steps: 50000
+stft_loss_params:
+  fft_sizes:
+  - 1024
+  - 2048
+  - 512
+  hop_sizes:
+  - 120
+  - 240
+  - 50
+  win_lengths:
+  - 600
+  - 1200
+  - 240
+  window: hann_window
+train_dumpdir: dump/train_1and2/norm
+train_feats_scp: null
+train_max_steps: 1500000
+train_segments: null
+train_wav_scp: null
+trim_frame_size: 1024
+trim_hop_size: 256
+trim_silence: false
+trim_threshold_in_db: 60
+verbose: 1
+version: 0.6.0
+win_length: null
+window: hann

stats.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54f1528f410ec426951fc6e30ec4cc266805f8477599c6ad2a26f07e2797bb71
+size 4736