train: | |
seed: 1234 | |
epochs: 300 | |
batch_size: 8 | |
gradient_accumulation: 4 | |
save_every_n_epoch: 1 | |
precision: 16 | |
gradient_clip: 1.0 | |
optimizer: | |
lr: 0.01 | |
lr_init: 0.00001 | |
lr_end: 0.0001 | |
warmup_steps: 2000 | |
decay_steps: 40000 | |
data: | |
max_eval_sample: 8 | |
max_sec: 54 | |
num_workers: 1 | |
pad_val: 1024 # same with EOS in model | |
model: | |
vocab_size: 1025 | |
phoneme_vocab_size: 512 | |
embedding_dim: 512 | |
hidden_dim: 512 | |
head: 16 | |
linear_units: 2048 | |
n_layer: 12 | |
dropout: 0 | |
EOS: 1024 | |
inference: | |
top_k: 5 | |