osanseviero HF staff commited on
Commit
fddf1eb
1 Parent(s): d49b2a3

add melgan model

Browse files
Files changed (2) hide show
  1. config.yaml +89 -0
  2. model.h5 +3 -0
config.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # This is the hyperparameter configuration file for MelGAN.
3
+ # Please make sure this is adjusted for the LJSpeech dataset. If you want to
4
+ # apply to the other dataset, you might need to carefully change some parameters.
5
+ # This configuration performs 4000k iters.
6
+
7
+ ###########################################################
8
+ # FEATURE EXTRACTION SETTING #
9
+ ###########################################################
10
+ sampling_rate: 22050 # Sampling rate of dataset.
11
+ hop_size: 256 # Hop size.
12
+ format: "npy"
13
+
14
+
15
+ ###########################################################
16
+ # GENERATOR NETWORK ARCHITECTURE SETTING #
17
+ ###########################################################
18
+ model_type: "melgan_generator"
19
+
20
+ melgan_generator_params:
21
+ out_channels: 1 # Number of output channels.
22
+ kernel_size: 7 # Kernel size of initial and final conv layers.
23
+ filters: 512 # Initial number of channels for conv layers.
24
+ upsample_scales: [8, 8, 2, 2] # List of Upsampling scales.
25
+ stack_kernel_size: 3 # Kernel size of dilated conv layers in residual stack.
26
+ stacks: 3 # Number of stacks in a single residual stack module.
27
+ is_weight_norm: false # Use weight-norm or not.
28
+
29
+ ###########################################################
30
+ # DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
31
+ ###########################################################
32
+ melgan_discriminator_params:
33
+ out_channels: 1 # Number of output channels.
34
+ scales: 3 # Number of multi-scales.
35
+ downsample_pooling: "AveragePooling1D" # Pooling type for the input downsampling.
36
+ downsample_pooling_params: # Parameters of the above pooling function.
37
+ pool_size: 4
38
+ strides: 2
39
+ kernel_sizes: [5, 3] # List of kernel size.
40
+ filters: 16 # Number of channels of the initial conv layer.
41
+ max_downsample_filters: 1024 # Maximum number of channels of downsampling layers.
42
+ downsample_scales: [4, 4, 4, 4] # List of downsampling scales.
43
+ nonlinear_activation: "LeakyReLU" # Nonlinear activation function.
44
+ nonlinear_activation_params: # Parameters of nonlinear activation function.
45
+ alpha: 0.2
46
+ is_weight_norm: false # Use weight-norm or not.
47
+
48
+ ###########################################################
49
+ # ADVERSARIAL LOSS SETTING #
50
+ ###########################################################
51
+ lambda_feat_match: 10.0
52
+
53
+ ###########################################################
54
+ # DATA LOADER SETTING #
55
+ ###########################################################
56
+ batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
57
+ batch_max_steps: 8192 # Length of each audio in batch for training. Make sure dividable by hop_size.
58
+ batch_max_steps_valid: 81920 # Length of each audio for validation. Make sure dividable by hope_size.
59
+ remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
60
+ allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
61
+ is_shuffle: true # shuffle dataset after each epoch.
62
+
63
+ ###########################################################
64
+ # OPTIMIZER & SCHEDULER SETTING #
65
+ ###########################################################
66
+ generator_optimizer_params:
67
+ lr: 0.0001 # Generator's learning rate.
68
+ beta_1: 0.5
69
+ beta_2: 0.9
70
+
71
+ discriminator_optimizer_params:
72
+ lr: 0.0001 # Discriminator's learning rate.
73
+ beta_1: 0.5
74
+ beta_2: 0.9
75
+
76
+ gradient_accumulation_steps: 1
77
+ ###########################################################
78
+ # INTERVAL SETTING #
79
+ ###########################################################
80
+ train_max_steps: 4000000 # Number of training steps.
81
+ save_interval_steps: 3 # Interval steps to save checkpoint.
82
+ eval_interval_steps: 2 # Interval steps to evaluate the network.
83
+ log_interval_steps: 1 # Interval steps to record the training log.
84
+ discriminator_train_start_steps: 0 # step to start training discriminator.
85
+
86
+ ###########################################################
87
+ # OTHER SETTING #
88
+ ###########################################################
89
+ num_save_intermediate_results: 1 # Number of batch to be saved as intermediate results.
model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f44239f67a2b8c3169510efd49d3492a29d0a1d167ab9071b68910d85849346
3
+ size 17142440