File size: 1,296 Bytes
b58d89f
 
 
931943b
b58d89f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/sh

export WANDB_PROJECT="xls-r-estonian"
export CUDA_VISIBLE_DEVICES=2

python src/run_speech_recognition_ctc_bnb.py \
       --dataset_name="mozilla-foundation/common_voice_8_0" \
       --model_name_or_path="facebook/wav2vec2-xls-r-300m" \
       --dataset_config_name="et" \
       --output_dir="./" \
       --overwrite_output_dir \
       --num_train_epochs=100 \
       --per_device_train_batch_size=72 \
       --per_device_eval_batch_size=72 \
       --gradient_accumulation_steps=2 \
       --learning_rate=3e-4 \
       --save_total_limit=1 \
       --warmup_steps=500 \
       --evaluation_strategy=steps \
       --text_column_name=sentence \
       --length_column_name=input_length \
       --save_steps=500 \
       --eval_steps=500 \
       --logging_steps=100 \
       --layerdrop=0.0 \
       --freeze_feature_encoder \
       --feat_proj_dropout=0.1 \
       --chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
       --gradient_checkpointing \
       --lr_scheduler_type=cosine \
       --fp16 \
       --group_by_length \
       --mask_time_prob=0.1 \
       --mask_time_length=10 \
       --report_to=wandb \
       --run_name="cosine+drop_proj+low_specaugment-300M+cv_8_0" \
       --do_train --do_eval \
       --use_auth_token --push_to_hub