drmeeseeks commited on
Commit
5804643
1 Parent(s): 46accd2

Upload whisper_python_am_et.ipynb

Browse files
Files changed (1) hide show
  1. whisper_python_am_et.ipynb +11 -10
whisper_python_am_et.ipynb CHANGED
@@ -628,12 +628,13 @@
628
  },
629
  {
630
  "cell_type": "code",
631
- "execution_count": 14,
632
  "metadata": {
633
  "id": "iN2MgL5gYgmB"
634
  },
635
  "outputs": [],
636
  "source": [
 
637
  "!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
638
  " --model_name_or_path=\"openai/whisper-small\" \\\n",
639
  " --dataset_name=\"google/fleurs\" \\\n",
@@ -642,7 +643,7 @@
642
  " --train_split_name=\"train+validation\" \\\n",
643
  " --eval_split_name=\"test\" \\\n",
644
  " --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
645
- " --max_steps=\"5000\" \\\n",
646
  " --output_dir=\"./whisper-small-amet\" \\\n",
647
  " --per_device_train_batch_size=\"64\" \\\n",
648
  " --per_device_eval_batch_size=\"32\" \\\n",
@@ -651,9 +652,9 @@
651
  " --learning_rate=\"1e-5\" \\\n",
652
  " --warmup_steps=\"500\" \\\n",
653
  " --evaluation_strategy=\"steps\" \\\n",
654
- " --eval_steps=\"5000\" \\\n",
655
  " --save_strategy=\"steps\" \\\n",
656
- " --save_steps=\"5000\" \\\n",
657
  " --generation_max_length=\"225\" \\\n",
658
  " --length_column_name=\"input_length\" \\\n",
659
  " --max_duration_in_seconds=\"30\" \\\n",
@@ -662,22 +663,22 @@
662
  " --report_to=\"tensorboard\" \\\n",
663
  " --metric_for_best_model=\"wer\" \\\n",
664
  " --greater_is_better=\"False\" \\\n",
665
- " --load_best_model_at_end \\\n",
666
  " --gradient_checkpointing \\\n",
667
  " --fp16 \\\n",
668
  " --overwrite_output_dir \\\n",
669
  " --do_train \\\n",
670
- " --do_eval=False \\\n",
671
  " --predict_with_generate \\\n",
672
- " --do_normalize_eval \\\n",
673
  " --use_auth_token \\\n",
674
  " --no_streaming \\\n",
675
- " --push_to_hub=True' >> run.sh"
676
  ]
677
  },
678
  {
679
  "cell_type": "code",
680
- "execution_count": 13,
681
  "metadata": {
682
  "colab": {
683
  "base_uri": "https://localhost:8080/"
@@ -709,7 +710,7 @@
709
  "metadata": {},
710
  "outputs": [],
711
  "source": [
712
- "!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --device=0 --language=\"am\""
713
  ]
714
  },
715
  {
 
628
  },
629
  {
630
  "cell_type": "code",
631
+ "execution_count": 23,
632
  "metadata": {
633
  "id": "iN2MgL5gYgmB"
634
  },
635
  "outputs": [],
636
  "source": [
637
+ "!rm run.sh\n",
638
  "!echo 'python run_speech_recognition_seq2seq_streaming.py \\\n",
639
  " --model_name_or_path=\"openai/whisper-small\" \\\n",
640
  " --dataset_name=\"google/fleurs\" \\\n",
 
643
  " --train_split_name=\"train+validation\" \\\n",
644
  " --eval_split_name=\"test\" \\\n",
645
  " --model_index_name=\"Whisper Small Amharic FLEURS\" \\\n",
646
+ " --max_steps=\"1000\" \\\n",
647
  " --output_dir=\"./whisper-small-amet\" \\\n",
648
  " --per_device_train_batch_size=\"64\" \\\n",
649
  " --per_device_eval_batch_size=\"32\" \\\n",
 
652
  " --learning_rate=\"1e-5\" \\\n",
653
  " --warmup_steps=\"500\" \\\n",
654
  " --evaluation_strategy=\"steps\" \\\n",
655
+ " --eval_steps=\"10000\" \\\n",
656
  " --save_strategy=\"steps\" \\\n",
657
+ " --save_steps=\"100\" \\\n",
658
  " --generation_max_length=\"225\" \\\n",
659
  " --length_column_name=\"input_length\" \\\n",
660
  " --max_duration_in_seconds=\"30\" \\\n",
 
663
  " --report_to=\"tensorboard\" \\\n",
664
  " --metric_for_best_model=\"wer\" \\\n",
665
  " --greater_is_better=\"False\" \\\n",
666
+ " --load_best_model_at_end=\"False\" \\\n",
667
  " --gradient_checkpointing \\\n",
668
  " --fp16 \\\n",
669
  " --overwrite_output_dir \\\n",
670
  " --do_train \\\n",
671
+ " --do_eval=\"False\" \\\n",
672
  " --predict_with_generate \\\n",
673
+ " --do_normalize_eval=\"False\" \\\n",
674
  " --use_auth_token \\\n",
675
  " --no_streaming \\\n",
676
+ " --push_to_hub=\"True\"' >> run.sh"
677
  ]
678
  },
679
  {
680
  "cell_type": "code",
681
+ "execution_count": 15,
682
  "metadata": {
683
  "colab": {
684
  "base_uri": "https://localhost:8080/"
 
710
  "metadata": {},
711
  "outputs": [],
712
  "source": [
713
+ "!python run_eval_whisper_streaming.py --model_id=\"openai/whisper-small\" --dataset=\"google/fleurs\" --config=\"am_et\" --batch_size=32 --max_eval_samples=64 --device=0 --language=\"am\""
714
  ]
715
  },
716
  {