|
#!/usr/bin/env bash |
|
python ./run_flax_speech_recognition_seq2seq.py \ |
|
--dataset_name=mozilla-foundation/common_voice_9_0 \ |
|
--model_name_or_path=sanchit-gandhi/flax-wav2vec2-2-bart-large-scan \ |
|
--dataset_config_name=en \ |
|
--train_split_name=train \ |
|
--eval_split_name=validation \ |
|
--test_split_name=test \ |
|
--dataset_cache_dir=/home/sanchitgandhi/cache/huggingface/datasets \ |
|
--output_dir=./flax-wav2vec2-2-bart-large-cv9-feature-encoder \ |
|
--preprocessing_num_workers=1 \ |
|
--id_column_name=client_id \ |
|
--length_column_name=input_length \ |
|
--text_column_name=sentence \ |
|
--overwrite_output_dir \ |
|
--per_device_train_batch_size=8 \ |
|
--per_device_eval_batch_size=4 \ |
|
--logging_steps=25 \ |
|
--max_steps=50000 \ |
|
--eval_steps=10000 \ |
|
--save_steps=10000 \ |
|
--gradient_checkpointing \ |
|
--max_duration_in_seconds=20 \ |
|
--max_target_length=128 \ |
|
--generation_max_length=40 \ |
|
--generation_num_beams=1 \ |
|
--generation_length_penalty=1.2 \ |
|
--final_generation_max_length=200 \ |
|
--final_generation_num_beams=5 \ |
|
--learning_rate=1e-4 \ |
|
--warmup_steps=500 \ |
|
--save_total_limit=1 \ |
|
--freeze_feature_encoder=False \ |
|
--predict_with_generate \ |
|
--do_lower_case \ |
|
--do_eval \ |
|
--do_train \ |
|
--do_predict \ |
|
--push_to_hub \ |
|
--use_auth_token \ |
|
--wandb_project=commonvoice_9_0 \ |
|
--wandb_name=flax-wav2vec2-2-bart-large-cv9-feature-encoder |
|
|