Spaces:

fishaudio
/

fish-speech-1

Running on A10G

App Files Files Community

fish-speech-1 / fish_speech /configs /text2semantic_sft.yaml

lengyue233

Init hf space integration

0a3525d verified 8 months ago

raw

history blame

2.15 kB

	defaults:
	- base
	- model@model.model: dual_ar_8_codebook_small
	- _self_

	project: text2semantic_sft_medium_dual_ar
	max_length: 4096
	ckpt_path: results/text2semantic_pretrain_medium_dual_ar/checkpoints/step_000060000.ckpt
	resume_weights_only: true

	# Lightning Trainer
	trainer:
	accumulate_grad_batches: 1
	gradient_clip_val: 1.0
	gradient_clip_algorithm: 'norm'
	max_steps: 10_000
	precision: bf16-true
	limit_val_batches: 10
	val_check_interval: 500

	# Dataset Configuration
	tokenizer:
	_target_: transformers.AutoTokenizer.from_pretrained
	pretrained_model_name_or_path: fishaudio/speech-lm-v1

	# Dataset Configuration
	train_dataset:
	_target_: fish_speech.datasets.text.AutoAugTextDataset
	use_data_server: false
	proto_files:
	- data/protos/sft/train_Genshin.protos
	- data/protos/sft/sft.protos
	tokenizer: ${tokenizer}
	max_length: ${max_length}
	num_codebooks: ${model.model.config.num_codebooks}
	use_speaker: false
	phones_prob: 0.5
	interactive_prob: 0.5

	val_dataset:
	_target_: fish_speech.datasets.text.AutoAugTextDataset
	use_data_server: false
	proto_files:
	- data/protos/sft/val_Genshin.protos
	tokenizer: ${tokenizer}
	max_length: ${max_length}
	num_codebooks: ${model.model.config.num_codebooks}
	use_speaker: false
	phones_prob: 0.5
	interactive_prob: 0.5

	data:
	_target_: fish_speech.datasets.text.TextDataModule
	train_dataset: ${train_dataset}
	val_dataset: ${val_dataset}
	num_workers: 4
	batch_size: 8
	tokenizer: ${tokenizer}
	max_length: ${max_length}

	# Model Configuration
	model:
	_target_: fish_speech.models.text2semantic.TextToSemantic
	model: {}

	optimizer:
	_target_: torch.optim.AdamW
	_partial_: true
	lr: 4e-5
	weight_decay: 0
	betas: [0.9, 0.95]
	eps: 1e-5

	lr_scheduler:
	_target_: torch.optim.lr_scheduler.LambdaLR
	_partial_: true
	lr_lambda:
	_target_: fish_speech.scheduler.get_cosine_schedule_with_warmup_lr_lambda
	_partial_: true
	num_warmup_steps: 100
	num_training_steps: ${trainer.max_steps}
	final_lr_ratio: 0

	callbacks:
	model_checkpoint:
	every_n_train_steps: 1000
	save_top_k: 10