sound / configs /s1.yaml
Alyosha11's picture
Add files using upload-large-folder tool
9ba5132 verified
output_dir: "logs/s1"
train:
seed: 1234
epochs: 15
batch_size: 8
save_every_n_epoch: 5
precision: 32
if_save_latest: true
if_save_every_weights: true
exp_name: "gpt_training"
half_weights_save_dir: "weights/s1"
wandb:
project: "gpt-sovits-hindi"
name: "stage1_training"
entity: null
log_interval: 100
optimizer:
lr_init: 0.0001
lr: 0.0004
lr_end: 0.00001
warmup_steps: 500
decay_steps: 1000
data:
training_files: "data8"
max_sec: 60
max_frames: 60
filter_length: 2048
hop_length: 640
win_length: 2048
mel_channels: 128
mel_fmin: 0.0
mel_fmax: null
cleaned_text: true
num_workers: 4
batch_size: 8
pad_val: 1024
# Data paths
train_semantic_path: "data8/semantic.tsv"
train_phoneme_path: "data8/phoneme.txt"
model:
hidden_dim: 768
embedding_dim: 768
n_layer: 12
head: 12
n_embd: 768
vocab_size: 2048
block_size: 1000
embd_pdrop: 0.1
resid_pdrop: 0.1
attn_pdrop: 0.1
semantic_dim: 1024
num_layers: 6
ffn_hidden: 3072
dropout: 0.1
attention_dropout: 0.1
hidden_dropout: 0.1
max_text_positions: 2048
max_mel_positions: 8000
prenet_dim: 384
postnet_dim: 384
prenet_layers: 3
postnet_layers: 3
phoneme_vocab_size: 2048
EOS: 2047
pad_val: 1024