update model card README.md

Browse files

Files changed (5) hide show

.ipynb_checkpoints/Untitled-checkpoint.ipynb +0 -6
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb +198 -29
README.md +62 -0
Untitled.ipynb +0 -6
fine-tune-whisper-streaming.ipynb +18 -4

.ipynb_checkpoints/Untitled-checkpoint.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb CHANGED Viewed

@@ -722,10 +722,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
-   "outputs": [],
    "source": [
     "from transformers import Seq2SeqTrainingArguments\n",
     "\n",
@@ -773,7 +781,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
@@ -802,7 +810,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
    "outputs": [
@@ -841,7 +849,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
    "outputs": [
@@ -887,34 +895,181 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {},
    "outputs": [
     {
-     "ename": "AttributeError",
-     "evalue": "/home/ubuntu/whisper-ft/bin/python: undefined symbol: cudaRuntimeGetVersion",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[30], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
       "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1536\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1531\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m   1533\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m   1534\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m   1535\u001b[0m )\n\u001b[0;32m-> 1536\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1537\u001b[0m \u001b[43m    \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1538\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1539\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1540\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1541\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1614\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1612\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlr_scheduler \u001b[38;5;241m=\u001b[39m lr_scheduler\n\u001b[1;32m   1613\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m delay_optimizer_creation:\n\u001b[0;32m-> 1614\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_optimizer_and_scheduler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_training_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_steps\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1616\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;241m=\u001b[39m TrainerState()\n\u001b[1;32m   1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mis_hyper_param_search \u001b[38;5;241m=\u001b[39m trial \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1001\u001b[0m, in \u001b[0;36mTrainer.create_optimizer_and_scheduler\u001b[0;34m(self, num_training_steps)\u001b[0m\n\u001b[1;32m    993\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_optimizer_and_scheduler\u001b[39m(\u001b[38;5;28mself\u001b[39m, num_training_steps: \u001b[38;5;28mint\u001b[39m):\n\u001b[1;32m    994\u001b[0m     \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    995\u001b[0m \u001b[38;5;124;03m    Setup the optimizer and the learning rate scheduler.\u001b[39;00m\n\u001b[1;32m    996\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    999\u001b[0m \u001b[38;5;124;03m    `create_scheduler`) in a subclass.\u001b[39;00m\n\u001b[1;32m   1000\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1001\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_optimizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1002\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m IS_SAGEMAKER_MP_POST_1_10 \u001b[38;5;129;01mand\u001b[39;00m smp\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mcfg\u001b[38;5;241m.\u001b[39mfp16:\n\u001b[1;32m   1003\u001b[0m         \u001b[38;5;66;03m# If smp >= 1.10 and fp16 is enabled, we unwrap the optimizer\u001b[39;00m\n\u001b[1;32m   1004\u001b[0m         optimizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptimizer\u001b[38;5;241m.\u001b[39moptimizer\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1032\u001b[0m, in \u001b[0;36mTrainer.create_optimizer\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1020\u001b[0m decay_parameters \u001b[38;5;241m=\u001b[39m [name \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m decay_parameters \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbias\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m name]\n\u001b[1;32m   1021\u001b[0m optimizer_grouped_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m   1022\u001b[0m     {\n\u001b[1;32m   1023\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparams\u001b[39m\u001b[38;5;124m\"\u001b[39m: [p \u001b[38;5;28;01mfor\u001b[39;00m n, p \u001b[38;5;129;01min\u001b[39;00m opt_model\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m decay_parameters],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1029\u001b[0m     },\n\u001b[1;32m   1030\u001b[0m ]\n\u001b[0;32m-> 1032\u001b[0m optimizer_cls, optimizer_kwargs \u001b[38;5;241m=\u001b[39m \u001b[43mTrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_optimizer_cls_and_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1034\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msharded_ddp \u001b[38;5;241m==\u001b[39m ShardedDDPOption\u001b[38;5;241m.\u001b[39mSIMPLE:\n\u001b[1;32m   1035\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptimizer \u001b[38;5;241m=\u001b[39m OSS(\n\u001b[1;32m   1036\u001b[0m         params\u001b[38;5;241m=\u001b[39moptimizer_grouped_parameters,\n\u001b[1;32m   1037\u001b[0m         optim\u001b[38;5;241m=\u001b[39moptimizer_cls,\n\u001b[1;32m   1038\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptimizer_kwargs,\n\u001b[1;32m   1039\u001b[0m     )\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1112\u001b[0m, in \u001b[0;36mTrainer.get_optimizer_cls_and_kwargs\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m   1110\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m args\u001b[38;5;241m.\u001b[39moptim \u001b[38;5;241m==\u001b[39m OptimizerNames\u001b[38;5;241m.\u001b[39mADAMW_BNB:\n\u001b[1;32m   1111\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1112\u001b[0m         \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbitsandbytes\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moptim\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Adam8bit\n\u001b[1;32m   1114\u001b[0m         optimizer_cls \u001b[38;5;241m=\u001b[39m Adam8bit\n\u001b[1;32m   1115\u001b[0m         optimizer_kwargs\u001b[38;5;241m.\u001b[39mupdate(adam_kwargs)\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/__init__.py:6\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Copyright (c) Facebook, Inc. and its affiliates.\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# This source code is licensed under the MIT license found in the\u001b[39;00m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;66;03m# LICENSE file in the root directory of this source tree.\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mautograd\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_functions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m      7\u001b[0m     MatmulLtState,\n\u001b[1;32m      8\u001b[0m     bmm_cublas,\n\u001b[1;32m      9\u001b[0m     matmul,\n\u001b[1;32m     10\u001b[0m     matmul_cublas,\n\u001b[1;32m     11\u001b[0m     mm_cublas,\n\u001b[1;32m     12\u001b[0m )\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcextension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m COMPILED_WITH_CUDA\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m modules\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py:5\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwarnings\u001b[39;00m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mbitsandbytes\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctional\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mF\u001b[39;00m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdataclasses\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dataclass\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfunctools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m reduce  \u001b[38;5;66;03m# Required in Python 3\u001b[39;00m\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/functional.py:13\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tuple\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[0;32m---> 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcextension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m COMPILED_WITH_CUDA, lib\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfunctools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m reduce  \u001b[38;5;66;03m# Required in Python 3\u001b[39;00m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;66;03m# math.prod not compatible with python < 3.8\u001b[39;00m\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cextension.py:113\u001b[0m\n\u001b[1;32m    109\u001b[0m             \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance\u001b[38;5;241m.\u001b[39minitialize()\n\u001b[1;32m    110\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance\n\u001b[0;32m--> 113\u001b[0m lib \u001b[38;5;241m=\u001b[39m \u001b[43mCUDASetup\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_instance\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mlib\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    115\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m lib \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available():\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cextension.py:109\u001b[0m, in \u001b[0;36mCUDASetup.get_instance\u001b[0;34m(cls)\u001b[0m\n\u001b[1;32m    107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    108\u001b[0m     \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__new__\u001b[39m(\u001b[38;5;28mcls\u001b[39m)\n\u001b[0;32m--> 109\u001b[0m     \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    110\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cextension.py:59\u001b[0m, in \u001b[0;36mCUDASetup.initialize\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     56\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlib \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m     58\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcuda_setup\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m evaluate_cuda_setup\n\u001b[0;32m---> 59\u001b[0m binary_name, cudart_path, cuda, cc, cuda_version_string \u001b[38;5;241m=\u001b[39m \u001b[43mevaluate_cuda_setup\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcudart_path \u001b[38;5;241m=\u001b[39m cudart_path\n\u001b[1;32m     61\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcuda \u001b[38;5;241m=\u001b[39m cuda\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:125\u001b[0m, in \u001b[0;36mevaluate_cuda_setup\u001b[0;34m()\u001b[0m\n\u001b[1;32m    123\u001b[0m cuda \u001b[38;5;241m=\u001b[39m get_cuda_lib_handle()\n\u001b[1;32m    124\u001b[0m cc \u001b[38;5;241m=\u001b[39m get_compute_capability(cuda)\n\u001b[0;32m--> 125\u001b[0m cuda_version_string \u001b[38;5;241m=\u001b[39m \u001b[43mget_cuda_version\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcuda\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcudart_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    127\u001b[0m failure \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m    128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cudart_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
-      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:45\u001b[0m, in \u001b[0;36mget_cuda_version\u001b[0;34m(cuda, cudart_path)\u001b[0m\n\u001b[1;32m     42\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m     44\u001b[0m version \u001b[38;5;241m=\u001b[39m ctypes\u001b[38;5;241m.\u001b[39mc_int()\n\u001b[0;32m---> 45\u001b[0m check_cuda_result(cuda, \u001b[43mcudart\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcudaRuntimeGetVersion\u001b[49m(ctypes\u001b[38;5;241m.\u001b[39mbyref(version)))\n\u001b[1;32m     46\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(version\u001b[38;5;241m.\u001b[39mvalue)\n\u001b[1;32m     47\u001b[0m major \u001b[38;5;241m=\u001b[39m version\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m1000\u001b[39m\n",
-      "File \u001b[0;32m/usr/lib/python3.8/ctypes/__init__.py:386\u001b[0m, in \u001b[0;36mCDLL.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m    384\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m name\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m    385\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(name)\n\u001b[0;32m--> 386\u001b[0m func \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    387\u001b[0m \u001b[38;5;28msetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, name, func)\n\u001b[1;32m    388\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func\n",
-      "File \u001b[0;32m/usr/lib/python3.8/ctypes/__init__.py:391\u001b[0m, in \u001b[0;36mCDLL.__getitem__\u001b[0;34m(self, name_or_ordinal)\u001b[0m\n\u001b[1;32m    390\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name_or_ordinal):\n\u001b[0;32m--> 391\u001b[0m     func \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_FuncPtr\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname_or_ordinal\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    392\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name_or_ordinal, \u001b[38;5;28mint\u001b[39m):\n\u001b[1;32m    393\u001b[0m         func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m=\u001b[39m name_or_ordinal\n",
-      "\u001b[0;31mAttributeError\u001b[0m: /home/ubuntu/whisper-ft/bin/python: undefined symbol: cudaRuntimeGetVersion"
      ]
     }
    ],
@@ -944,7 +1099,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
@@ -952,8 +1107,8 @@
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
-    "    \"language\": \"es\",\n",
-    "    \"model_name\": \"Whisper Small Es - Sanchit Gandhi\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",
@@ -973,7 +1128,21 @@
    "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]

   },
   {
    "cell_type": "code",
+   "execution_count": 31,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "PyTorch: setting up devices\n"
+     ]
+    }
+   ],
    "source": [
     "from transformers import Seq2SeqTrainingArguments\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 32,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 33,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 34,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 35,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/whisper-ft/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  warnings.warn(\n",
+      "***** Running training *****\n",
+      "  Num examples = 320000\n",
+      "  Num Epochs = 9223372036854775807\n",
+      "  Instantaneous batch size per device = 64\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 5000\n",
+      "  Number of trainable parameters = 241734912\n",
+      "Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "  ········\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.13.6"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/home/ubuntu/whisper-small-ro/wandb/run-20221207_155111-30fb33sa</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href=\"https://wandb.ai/gigant/huggingface/runs/30fb33sa\" target=\"_blank\">./</a></strong> to <a href=\"https://wandb.ai/gigant/huggingface\" target=\"_blank\">Weights & Biases</a> (<a href=\"https://wandb.me/run\" target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Reading metadata...: 5187it [00:00, 30765.68it/s]\n",
+      "Reading metadata...: 3703it [00:00, 26251.44it/s]\n",
+      "Reading metadata...: 19267it [00:00, 32817.93it/s]\n",
+      "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='1001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [1001/5000 1:50:49 < 7:23:38, 0.15 it/s, Epoch 5.03/9223372036854775807]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Wer</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1000</td>\n",
+       "      <td>0.013700</td>\n",
+       "      <td>0.206843</td>\n",
+       "      <td>15.703064</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Reading metadata...: 5187it [00:00, 20496.49it/s]\n",
+      "Reading metadata...: 3703it [00:00, 22242.62it/s]\n",
+      "Reading metadata...: 19267it [00:00, 40619.61it/s]\n",
+      "Reading metadata...: 5187it [00:00, 26427.59it/s]\n",
+      "Reading metadata...: 3703it [00:00, 24175.02it/s]\n",
+      "Reading metadata...: 19267it [00:00, 37108.24it/s]\n",
+      "Reading metadata...: 5187it [00:00, 23505.99it/s]\n",
+      "Reading metadata...: 3703it [00:00, 24004.94it/s]\n",
+      "Reading metadata...: 19267it [00:00, 48305.49it/s]\n",
+      "Reading metadata...: 5187it [00:00, 61068.55it/s]\n",
+      "Reading metadata...: 3703it [00:00, 62782.48it/s]\n",
+      "Reading metadata...: 19267it [00:00, 83700.58it/s]\n",
+      "Reading metadata...: 5187it [00:00, 68870.74it/s]\n",
+      "Reading metadata...: 3703it [00:00, 50878.77it/s]\n",
+      "Reading metadata...: 19267it [00:00, 83165.23it/s]\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples: Unknown\n",
+      "  Batch size = 8\n",
+      "Reading metadata...: 3859it [00:00, 15916.98it/s]\n",
+      "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
+      "Saving model checkpoint to ./checkpoint-1000\n",
+      "Configuration saved in ./checkpoint-1000/config.json\n",
+      "Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
+      "Feature extractor saved in ./checkpoint-1000/preprocessor_config.json\n",
+      "tokenizer config file saved in ./checkpoint-1000/tokenizer_config.json\n",
+      "Special tokens file saved in ./checkpoint-1000/special_tokens_map.json\n",
+      "added tokens file saved in ./checkpoint-1000/added_tokens.json\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    },
+    {
+     "ename": "IsADirectoryError",
+     "evalue": "[Errno 21] Is a directory: '/home/ubuntu/whisper-small-ro/./wandb/latest-run'",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIsADirectoryError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[35], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
       "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1536\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1531\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m   1533\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m   1534\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m   1535\u001b[0m )\n\u001b[0;32m-> 1536\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1537\u001b[0m \u001b[43m    \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1538\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1539\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1540\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1541\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1861\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1858\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[1;32m   1859\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 1861\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1862\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1863\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:2128\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   2125\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m   2127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_save:\n\u001b[0;32m-> 2128\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetrics\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2129\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_save(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:2272\u001b[0m, in \u001b[0;36mTrainer._save_checkpoint\u001b[0;34m(self, model, trial, metrics)\u001b[0m\n\u001b[1;32m   2269\u001b[0m     torch\u001b[38;5;241m.\u001b[39msave(rng_states, os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(output_dir, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrng_state_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mprocess_index\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.pth\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m   2271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpush_to_hub:\n\u001b[0;32m-> 2272\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_push_from_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2274\u001b[0m \u001b[38;5;66;03m# Maybe delete some older checkpoints.\u001b[39;00m\n\u001b[1;32m   2275\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:3444\u001b[0m, in \u001b[0;36mTrainer._push_from_checkpoint\u001b[0;34m(self, checkpoint_folder)\u001b[0m\n\u001b[1;32m   3442\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   3443\u001b[0m         commit_message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTraining in progress, epoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mint\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 3444\u001b[0m     _, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpush_in_progress \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3445\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcommit_message\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_message\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m   3446\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3447\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m   3448\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_strategy \u001b[38;5;241m==\u001b[39m HubStrategy\u001b[38;5;241m.\u001b[39mCHECKPOINT:\n\u001b[1;32m   3449\u001b[0m         \u001b[38;5;66;03m# Move back the checkpoint to its place\u001b[39;00m\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1430\u001b[0m, in \u001b[0;36mRepository.push_to_hub\u001b[0;34m(self, commit_message, blocking, clean_ok, auto_lfs_prune)\u001b[0m\n\u001b[1;32m   1428\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRepo currently clean. Ignoring push_to_hub\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1429\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1430\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit_add\u001b[49m\u001b[43m(\u001b[49m\u001b[43mauto_lfs_track\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m   1431\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_commit(commit_message)\n\u001b[1;32m   1432\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_push(\n\u001b[1;32m   1433\u001b[0m     upstream\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morigin \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_branch\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1434\u001b[0m     blocking\u001b[38;5;241m=\u001b[39mblocking,\n\u001b[1;32m   1435\u001b[0m     auto_lfs_prune\u001b[38;5;241m=\u001b[39mauto_lfs_prune,\n\u001b[1;32m   1436\u001b[0m )\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1113\u001b[0m, in \u001b[0;36mRepository.git_add\u001b[0;34m(self, pattern, auto_lfs_track)\u001b[0m\n\u001b[1;32m   1110\u001b[0m tracked_files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauto_track_large_files(pattern)\n\u001b[1;32m   1112\u001b[0m \u001b[38;5;66;03m# Read the remaining files and track them if they're binary\u001b[39;00m\n\u001b[0;32m-> 1113\u001b[0m tracked_files\u001b[38;5;241m.\u001b[39mextend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauto_track_binary_files\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m   1115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tracked_files:\n\u001b[1;32m   1116\u001b[0m     logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m   1117\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAdding files tracked by Git LFS: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtracked_files\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. This may take a\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1118\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m bit of time if the files are large.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1119\u001b[0m     )\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1001\u001b[0m, in \u001b[0;36mRepository.auto_track_binary_files\u001b[0;34m(self, pattern)\u001b[0m\n\u001b[1;32m    994\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m size_in_mb \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10\u001b[39m:\n\u001b[1;32m    995\u001b[0m     logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m    996\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mParsing a large file to check if binary or not. Tracking large\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    997\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m files using `repository.auto_track_large_files` is\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    998\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m recommended so as to not load the full file in memory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    999\u001b[0m     )\n\u001b[0;32m-> 1001\u001b[0m is_binary \u001b[38;5;241m=\u001b[39m \u001b[43mis_binary_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_to_file\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1003\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_binary:\n\u001b[1;32m   1004\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlfs_track(filename)\n",
+      "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:235\u001b[0m, in \u001b[0;36mis_binary_file\u001b[0;34m(filename)\u001b[0m\n\u001b[1;32m    224\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    225\u001b[0m \u001b[38;5;124;03mCheck if file is a binary file.\u001b[39;00m\n\u001b[1;32m    226\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    232\u001b[0m \u001b[38;5;124;03m    `bool`: `True` if the file passed is a binary file, `False` otherwise.\u001b[39;00m\n\u001b[1;32m    233\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 235\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m    236\u001b[0m         content \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;241m10\u001b[39m \u001b[38;5;241m*\u001b[39m (\u001b[38;5;241m1024\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m2\u001b[39m))  \u001b[38;5;66;03m# Read a maximum of 10MB\u001b[39;00m\n\u001b[1;32m    238\u001b[0m     \u001b[38;5;66;03m# Code sample taken from the following stack overflow thread\u001b[39;00m\n\u001b[1;32m    239\u001b[0m     \u001b[38;5;66;03m# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391\u001b[39;00m\n",
+      "\u001b[0;31mIsADirectoryError\u001b[0m: [Errno 21] Is a directory: '/home/ubuntu/whisper-small-ro/./wandb/latest-run'"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
+    "    \"language\": \"ro\",\n",
+    "    \"model_name\": \"Whisper Small Romanian\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",
    "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    }
+   ],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]

README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+language:
+- ro
+license: apache-2.0
+tags:
+- whisper-event
+- generated_from_trainer
+datasets:
+- mozilla-foundation/common_voice_11_0
+model-index:
+- name: Whisper Small Romanian
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Whisper Small Romanian
+This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 11.0 dataset.
+It achieves the following results on the evaluation set:
+- eval_loss: 0.2068
+- eval_wer: 15.7031
+- eval_runtime: 1070.693
+- eval_samples_per_second: 3.604
+- eval_steps_per_second: 0.451
+- epoch: 5.03
+- step: 1000
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 64
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- training_steps: 5000
+- mixed_precision_training: Native AMP
+### Framework versions
+- Transformers 4.26.0.dev0
+- Pytorch 1.13.0+cu117
+- Datasets 2.7.1.dev0
+- Tokenizers 0.13.2

Untitled.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

fine-tune-whisper-streaming.ipynb CHANGED Viewed

@@ -1099,7 +1099,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
@@ -1107,8 +1107,8 @@
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
-    "    \"language\": \"es\",\n",
-    "    \"model_name\": \"Whisper Small Es - Sanchit Gandhi\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",
@@ -1128,7 +1128,21 @@
    "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]

   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
+    "    \"language\": \"ro\",\n",
+    "    \"model_name\": \"Whisper Small Romanian\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",
    "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    }
+   ],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]