# parameters to set | |
model_cfg: | |
init_from_hub_path: openai/whisper-large-v2 | |
# lang: None | |
# apply_spec_augment: True | |
# mask_time_prob: 0.05 | |
# mask_feature_prob: 0.05 | |
# mask_time_length: 40 | |
# mask_feature_length: 30 | |
# mask_time_min_masks: 2 | |
# mask_feature_min_masks: 2 | |
data_cfg: | |
data_root: ~/corpora/ | |
train_manif: ~/corpora/data_manifests/ASR/PUBLIC_KIDS_TRAIN_v4_deduped.csv | |
val_manif: # small private dataset of classroom speech, only affects training if load_best_model_at_end: True | |
test_manif: # small private dataset of classroom speech, doesn't affect training | |
experiment_cfg: | |
OUT_DIR: train/whisat/save/publicKS_LoRA_int8 | |
use_lora: True | |
use_int8: True | |
train_cfg: | |
training_args: | |
output_dir: !ref <experiment_cfg[OUT_DIR]> | |
per_device_train_batch_size: 32 # 64 | |
learning_rate: 0.0001 # 1e-5 orig, 1e-3 lora | |
warmup_steps: 50 # 500 orig 50 lora | |
num_train_epochs: 1 | |
fp16: True # True | |
evaluation_strategy: steps # or epochs | |
per_device_eval_batch_size: 4 | |
predict_with_generate: True | |
generation_max_length: 112 | |
save_steps: 500 | |
eval_steps: 500 | |
eval_accumulation_steps: 2 | |
logging_steps: 25 | |
report_to: | |
- tensorboard | |
load_best_model_at_end: False | |
metric_for_best_model: wer | |
greater_is_better: False | |
push_to_hub: False | |
remove_unused_columns: False # required as the PeftModel forward doesn't have the signature of the wrapped model's forward | |
label_names: | |
- labels | |