File size: 3,860 Bytes
8219f7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# pytorch_lightning==1.8.6
seed_everything: 4444
trainer:
  logger:
    class_path: pytorch_lightning.loggers.TensorBoardLogger
    init_args:
      save_dir: /home/patriotyk/vocos/logs
      name: lightning_logs
      version: null
      log_graph: false
      default_hp_metric: true
      prefix: ''
      sub_dir: null
      logdir: null
      comment: ''
      purge_step: null
      max_queue: 10
      flush_secs: 120
      filename_suffix: ''
      write_to_disk: true
      comet_config:
        disabled: true
  enable_checkpointing: true
  callbacks:
  - class_path: pytorch_lightning.callbacks.LearningRateMonitor
    init_args:
      logging_interval: null
      log_momentum: false
  - class_path: pytorch_lightning.callbacks.ModelSummary
    init_args:
      max_depth: 2
  - class_path: pytorch_lightning.callbacks.ModelCheckpoint
    init_args:
      dirpath: null
      filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
      monitor: val_loss
      verbose: false
      save_last: true
      save_top_k: 3
      save_weights_only: false
      mode: min
      auto_insert_metric_name: true
      every_n_train_steps: null
      train_time_interval: null
      every_n_epochs: null
      save_on_train_epoch_end: null
  - class_path: vocos.helpers.GradNormCallback
  default_root_dir: null
  gradient_clip_val: null
  gradient_clip_algorithm: null
  num_nodes: 1
  num_processes: null
  devices: -1
  gpus: null
  auto_select_gpus: false
  tpu_cores: null
  ipus: null
  enable_progress_bar: true
  overfit_batches: 0.0
  track_grad_norm: -1
  check_val_every_n_epoch: 1
  fast_dev_run: false
  accumulate_grad_batches: null
  max_epochs: null
  min_epochs: null
  max_steps: -1
  min_steps: null
  max_time: null
  limit_train_batches: null
  limit_val_batches: 100
  limit_test_batches: null
  limit_predict_batches: null
  val_check_interval: null
  log_every_n_steps: 100
  accelerator: gpu
  strategy: ddp
  sync_batchnorm: false
  precision: 32
  enable_model_summary: true
  num_sanity_val_steps: 2
  resume_from_checkpoint: null
  profiler: null
  benchmark: null
  deterministic: null
  reload_dataloaders_every_n_epochs: 0
  auto_lr_find: false
  replace_sampler_ddp: true
  detect_anomaly: false
  auto_scale_batch_size: false
  plugins: null
  amp_backend: native
  amp_level: null
  move_metrics_to_cpu: false
  multiple_trainloader_mode: max_size_cycle
  inference_mode: true
data:
  class_path: vocos.dataset.VocosDataModule
  init_args:
    train_params:
      filelist_path: /home/patriotyk/tts_corpus_44100/train_vocos.txt
      sampling_rate: 44100
      num_samples: 32768
      batch_size: 20
      num_workers: 24
    val_params:
      filelist_path: /home/patriotyk/tts_corpus_44100/val_vocos.txt
      sampling_rate: 44100
      num_samples: 96768
      batch_size: 20
      num_workers: 24
model:
  class_path: vocos.experiment.VocosExp
  init_args:
    feature_extractor:
      class_path: vocos.feature_extractors.MelSpectrogramFeatures
      init_args:
        sample_rate: 44100
        n_fft: 2048
        hop_length: 512
        n_mels: 80
        padding: same
        f_min: 0
        f_max: 8000
        norm: slaney
        mel_scale: slaney
    backbone:
      class_path: vocos.models.VocosBackbone
      init_args:
        input_channels: 80
        dim: 512
        intermediate_dim: 1536
        num_layers: 8
        layer_scale_init_value: null
        adanorm_num_embeddings: null
    head:
      class_path: vocos.heads.ISTFTHead
      init_args:
        dim: 512
        n_fft: 2048
        hop_length: 512
        padding: same
    sample_rate: 44100
    initial_learning_rate: 0.0003
    num_warmup_steps: 0
    mel_loss_coeff: 45.0
    mrd_loss_coeff: 1.0
    pretrain_mel_steps: 0
    decay_mel_coeff: false
    evaluate_utmos: true
    evaluate_pesq: true
    evaluate_periodicty: true