# parameters to set model_cfg: init_from_hub_path: openai/whisper-large-v2 # lang: None # apply_spec_augment: True # mask_time_prob: 0.05 # mask_feature_prob: 0.05 # mask_time_length: 40 # mask_feature_length: 30 # mask_time_min_masks: 2 # mask_feature_min_masks: 2 data_cfg: data_root: ~/corpora/ train_manif: ~/corpora/data_manifests/ASR/PUBLIC_KIDS_TRAIN_v4_deduped.csv val_manif: # small private dataset of classroom speech, only affects training if load_best_model_at_end: True test_manif: # small private dataset of classroom speech, doesn't affect training experiment_cfg: OUT_DIR: train/whisat/save/publicKS_LoRA_int8 use_lora: True use_int8: True train_cfg: training_args: output_dir: !ref per_device_train_batch_size: 32 # 64 learning_rate: 0.0001 # 1e-5 orig, 1e-3 lora warmup_steps: 50 # 500 orig 50 lora num_train_epochs: 1 fp16: True # True evaluation_strategy: steps # or epochs per_device_eval_batch_size: 4 predict_with_generate: True generation_max_length: 112 save_steps: 500 eval_steps: 500 eval_accumulation_steps: 2 logging_steps: 25 report_to: - tensorboard load_best_model_at_end: False metric_for_best_model: wer greater_is_better: False push_to_hub: False remove_unused_columns: False # required as the PeftModel forward doesn't have the signature of the wrapped model's forward label_names: - labels