defaults: - base - model@model.model: dual_ar_8_codebook_small - _self_ project: text2semantic_sft_medium_dual_ar max_length: 4096 ckpt_path: results/text2semantic_pretrain_medium_dual_ar/checkpoints/step_000060000.ckpt resume_weights_only: true # Lightning Trainer trainer: accumulate_grad_batches: 1 gradient_clip_val: 1.0 gradient_clip_algorithm: 'norm' max_steps: 10_000 precision: bf16-true limit_val_batches: 10 val_check_interval: 500 # Dataset Configuration tokenizer: _target_: transformers.AutoTokenizer.from_pretrained pretrained_model_name_or_path: fishaudio/speech-lm-v1 # Dataset Configuration train_dataset: _target_: fish_speech.datasets.text.AutoAugTextDataset use_data_server: false proto_files: - data/protos/sft/train_Genshin.protos - data/protos/sft/sft.protos tokenizer: ${tokenizer} max_length: ${max_length} num_codebooks: ${model.model.config.num_codebooks} use_speaker: false phones_prob: 0.5 interactive_prob: 0.5 val_dataset: _target_: fish_speech.datasets.text.AutoAugTextDataset use_data_server: false proto_files: - data/protos/sft/val_Genshin.protos tokenizer: ${tokenizer} max_length: ${max_length} num_codebooks: ${model.model.config.num_codebooks} use_speaker: false phones_prob: 0.5 interactive_prob: 0.5 data: _target_: fish_speech.datasets.text.TextDataModule train_dataset: ${train_dataset} val_dataset: ${val_dataset} num_workers: 4 batch_size: 8 tokenizer: ${tokenizer} max_length: ${max_length} # Model Configuration model: _target_: fish_speech.models.text2semantic.TextToSemantic model: {} optimizer: _target_: torch.optim.AdamW _partial_: true lr: 4e-5 weight_decay: 0 betas: [0.9, 0.95] eps: 1e-5 lr_scheduler: _target_: torch.optim.lr_scheduler.LambdaLR _partial_: true lr_lambda: _target_: fish_speech.scheduler.get_cosine_schedule_with_warmup_lr_lambda _partial_: true num_warmup_steps: 100 num_training_steps: ${trainer.max_steps} final_lr_ratio: 0 callbacks: model_checkpoint: every_n_train_steps: 1000 save_top_k: 10