accelerate launch ./run_parler_tts_training.py \ --model_name_or_path "parler-tts/parler_tts_mini_v0.1" \ --feature_extractor_name "parler-tts/dac_44khZ_8kbps" \ --description_tokenizer_name "parler-tts/parler_tts_mini_v0.1" \ --prompt_tokenizer_name "parler-tts/parler_tts_mini_v0.1" \ --report_to "wandb" \ --overwrite_output_dir true \ --train_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal" \ --train_metadata_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral" \ --train_dataset_config_name "read" \ --train_split_name "train[:-35]" \ --eval_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal" \ --eval_metadata_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral" \ --eval_dataset_config_name "read" \ --eval_split_name "train" \ --max_eval_samples 8 \ --per_device_eval_batch_size 16 \ --target_audio_column_name "audio" \ --description_column_name "text_description" \ --prompt_column_name "text" \ --max_duration_in_seconds 30.0 \ --min_duration_in_seconds 2.0 \ --max_text_length 400 \ --preprocessing_num_workers 2 \ --do_train true \ --num_train_epochs 8 \ --gradient_accumulation_steps 8 \ --gradient_checkpointing true \ --per_device_train_batch_size 16 \ --learning_rate 0.00008 \ --adam_beta1 0.9 \ --adam_beta2 0.99 \ --weight_decay 0.01 \ --lr_scheduler_type "cosine" \ --warmup_steps 250 \ --logging_steps 2 \ --freeze_text_encoder true \ --audio_encoder_per_device_batch_size 4 \ --dtype "bfloat16" \ --seed 456 \ --output_dir "./" \ --temporary_save_to_disk "../audio_code_tmp_concat/" \ --save_to_disk "../tmp_dataset_audio_concat/" \ --dataloader_num_workers 4 \ --do_eval \ --predict_with_generate \ --include_inputs_for_metrics \ --save_strategy "steps" \ --save_steps 72 \ --evaluation_strategy "epoch" \ --save_total_limit 5 \ --group_by_length true \ --push_to_hub