sanchit-gandhi's picture
Saving train state of step 500
75a9512
#!/usr/bin/env bash
python run_parler_tts_training.py \
--model_name_or_path parler-tts/parler_tts_mini_v0.1 \
--feature_extractor_name parler-tts/dac_44khZ_8kbps \
--description_tokenizer_name parler-tts/parler_tts_mini_v0.1 \
--prompt_tokenizer_name parler-tts/parler_tts_mini_v0.1 \
--report_to wandb \
--overwrite_output_dir true \
--train_dataset_name reach-vb/expresso-tagged-mistral-7b-instruct-v0.2 \
--train_metadata_dataset_name reach-vb/expresso-tagged-mistral-7b-instruct-v0.2 \
--train_dataset_config_name read \
--train_split_name train \
--eval_dataset_name reach-vb/expresso-tagged-mistral-7b-instruct-v0.2 \
--eval_metadata_dataset_name reach-vb/expresso-tagged-mistral-7b-instruct-v0.2 \
--eval_dataset_config_name read \
--eval_split_name train \
--max_eval_samples 8 \
--per_device_eval_batch_size 16 \
--target_audio_column_name audio \
--description_column_name text_description \
--prompt_column_name text \
--max_duration_in_seconds 20 \
--min_duration_in_seconds 2.0 \
--max_text_length 400 \
--preprocessing_num_workers 2 \
--do_train true \
--num_train_epochs 10 \
--gradient_accumulation_steps 4 \
--gradient_checkpointing true \
--per_device_train_batch_size 32 \
--learning_rate 8e-5 \
--adam_beta1 0.9 \
--adam_beta2 0.99 \
--weight_decay 0.01 \
--warmup_steps 250 \
--logging_steps 2 \
--freeze_text_encoder true \
--audio_encoder_per_device_batch_size 4 \
--dtype bfloat16 \
--seed 456 \
--output_dir ./ \
--temporary_save_to_disk ../audio_code_tmp/ \
--save_to_disk ../tmp_dataset_audio/ \
--dataloader_num_workers 4 \
--do_eval \
--predict_with_generate \
--include_inputs_for_metrics \
--group_by_length true \
--push_to_hub