# source: https://github.com/kamfonas/whisper-fine-tuning-event/compare/master...minor-mods-by-farsipal python run_speech_recognition_seq2seq_streaming.py \ --model_name_or_path 'openai/whisper-medium' \ --model_revision main \ --do_train True \ --do_eval True \ --use_auth_token False \ --freeze_feature_encoder False \ --freeze_encoder False \ --model_index_name 'Whisper Medium Danish (CV11 + FLEAURS)' \ --dataset_name 'mozilla-foundation/common_voice_11_0,google/fleurs' \ --dataset_config_name 'da,da_dk' \ --train_split_name 'train+validation,train+validation' \ --eval_split_name 'test,-' \ --text_column_name 'sentence,raw_transcription' \ --audio_column_name 'audio,audio' \ --streaming False \ --max_duration_in_seconds 30 \ --do_lower_case False \ --do_remove_punctuation False \ --do_normalize_eval True \ --language danish \ --task transcribe \ --shuffle_buffer_size 500 \ --output_dir './' \ --overwrite_output_dir True \ --per_device_train_batch_size 32 \ --gradient_accumulation_steps 1 \ --learning_rate 8e-6 \ --dropout 0.1 \ --warmup_steps 500 \ --max_steps 10000 \ --eval_steps 1000 \ --gradient_checkpointing True \ --cache_dir '~/.cache' \ --fp16 True \ --evaluation_strategy steps \ --per_device_eval_batch_size 8 \ --predict_with_generate True \ --generation_max_length 225 \ --save_steps 1000 \ --logging_steps 25 \ --report_to tensorboard \ --load_best_model_at_end True \ --metric_for_best_model wer \ --greater_is_better False \ --push_to_hub