File size: 2,133 Bytes
5c23c43
 
497a54e
 
 
 
 
5c23c43
497a54e
 
 
 
 
 
 
 
 
 
 
 
 
 
5c23c43
497a54e
 
5c23c43
 
497a54e
 
 
 
60b3f40
497a54e
 
 
 
 
 
 
 
 
 
 
 
 
60b3f40
497a54e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
python run_interleave.py \
                --model_name_or_path   'emilios/whisper-medium-el' \
                --model_revision   main \
                --do_train   True \
                --do_eval   True \
                --freeze_feature_encoder   False \
                --freeze_encoder   False \
                --model_index_name   'Whisper Medium El Greco' \
                --dataset_name 'mozilla-foundation/common_voice_11_0,google/fleurs' \
                --dataset_config_name 'el,el_gr' \
                --train_split_name  'train+validation,train+validation' \
                --eval_split_name   'test,-' \
                --text_column_name  'sentence,transcription' \
                --audio_column_name 'audio,audio' \
                --streaming   False \
                --max_duration_in_seconds   30 \
                --do_lower_case   False \
                --do_remove_punctuation   False \
                --do_normalize_eval   True \
                --language   greek \
                --task transcribe \
                --shuffle_buffer_size   500 \
                --output_dir   './' \
                --overwrite_output_dir   True \
                --per_device_train_batch_size   32 \
                --gradient_accumulation_steps  1 \
                --per_device_eval_batch_size   16 \
                --learning_rate   1e-5 \
                --dropout         0.1 \
                --warmup_steps   500 \
                --max_steps   5000 \
		--resume_from_checkpoint="4000" \
                --eval_steps   1000 \
                --gradient_checkpointing   True \
                --cache_dir   '~/.cache' \
                --fp16   True \
                --evaluation_strategy   steps \
                --predict_with_generate   True \
                --generation_max_length   225 \
                --save_steps   1000 \
                --logging_steps   25 \
                --report_to   tensorboard \
                --load_best_model_at_end   True \
                --metric_for_best_model   wer \
                --greater_is_better   False \
                --push_to_hub   True