# Whisper Finetuning script for the NST dataset # This is a test script for XLA on TPU python xla_spawn.py --num_cores=4 run_whisper_finetuning.py\ --model_name_or_path="openai/whisper-small" \ --output_dir="../whisper-NST-TPU" \ --overwrite_output_dir=True \ --language="Norwegian" \ --task="transcribe" \ --dataset_name="NbAiLab/NST" \ --dataset_config="no-close" \ --do_train=True \ --do_eval=True \ --audio_column_name="audio" \ --text_column_name="text" \ --per_device_train_batch_size=16 \ --per_device_train_batch_size=16 \ --learning_rate=2e-5 \ --warmup_steps=500 \ --max_steps=5000 \ --gradient_checkpointing=True \ --gradient_accumulation_steps=1 \ --group_by_length=False \ --evaluation_strategy="steps" \ --save_steps=1000 \ --eval_steps=1000 \ --max_eval_samples=100 \ --logging_steps=250 \ --load_best_model_at_end=True \ --metric_for_best_model="wer" \ --greater_is_better=False \ --report_to="tensorboard" \ --predict_with_generate=True \ --generation_max_length=225 \ --print_training_arguments=True \ --xla=True \ --push_to_hub=True # Very likely that some of this parameters needs to be added # tpu_name (:obj:`str`, `optional`): # The name of the TPU the process is running on. # tpu_zone (:obj:`str`, `optional`): # The zone of the TPU the process is running on. If not specified, we will attempt to automatically detect # from metadata. # xla (:obj:`bool`, `optional`): # Whether to activate the XLA compilation or not.