farsipal commited on
Commit
31b008e
·
1 Parent(s): 98282bb

Upload fine-tune-whisper-streaming-cf11-el.ipynb

Browse files
fine-tune-whisper-streaming-cf11-el.ipynb CHANGED
@@ -661,7 +661,8 @@
661
  ],
662
  "source": [
663
  "from datasets import IterableDatasetDict\n",
664
- "access_token = 'hf_xtu...use your access token here'\n",
 
665
  "raw_datasets = IterableDatasetDict()\n",
666
  "\n",
667
  "raw_datasets[\"train\"] = load_whole_dataset(\"mozilla-foundation/common_voice_11_0\", \"el\", split=\"train+validation\", use_auth_token=access_token) \n",
@@ -2014,27 +2015,6 @@
2014
  "We can label our checkpoint with the `whisper-event` tag on push by setting the appropriate key-word arguments (kwargs):"
2015
  ]
2016
  },
2017
- {
2018
- "cell_type": "code",
2019
- "execution_count": 45,
2020
- "id": "4128bee3",
2021
- "metadata": {},
2022
- "outputs": [
2023
- {
2024
- "data": {
2025
- "text/plain": [
2026
- "'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'"
2027
- ]
2028
- },
2029
- "execution_count": 45,
2030
- "metadata": {},
2031
- "output_type": "execute_result"
2032
- }
2033
- ],
2034
- "source": [
2035
- "access_token"
2036
- ]
2037
- },
2038
  {
2039
  "cell_type": "code",
2040
  "execution_count": 50,
@@ -2085,44 +2065,7 @@
2085
  ],
2086
  "source": [
2087
  "from huggingface_hub import notebook_login\n",
2088
- "notebook_login()\n"
2089
- ]
2090
- },
2091
- {
2092
- "cell_type": "code",
2093
- "execution_count": 53,
2094
- "id": "8f26ce24",
2095
- "metadata": {},
2096
- "outputs": [
2097
- {
2098
- "name": "stderr",
2099
- "output_type": "stream",
2100
- "text": [
2101
- "/home/farsipal/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/repository.py:725: FutureWarning: Creating a repository through 'clone_from' is deprecated and will be removed in v0.12. Please create the repository first using `create_repo(..., exists_ok=True)`.\n",
2102
- " warnings.warn(\n"
2103
- ]
2104
- },
2105
- {
2106
- "ename": "OSError",
2107
- "evalue": "Tried to clone a repository in a non-empty folder that isn't a git repository. If you really want to do this, do it manually:\ngit init && git remote add origin && git pull origin main\n or clone repo to a new folder and move your existing files there afterwards.",
2108
- "output_type": "error",
2109
- "traceback": [
2110
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
2111
- "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
2112
- "Cell \u001b[0;32mIn [53], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39m#access_token = \"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\"\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m trainer\u001b[39m.\u001b[39;49mpush_to_hub(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
2113
- "File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/transformers/trainer.py:3456\u001b[0m, in \u001b[0;36mTrainer.push_to_hub\u001b[0;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[1;32m 3453\u001b[0m \u001b[39m# If a user calls manually `push_to_hub` with `self.args.push_to_hub = False`, we try to create the repo but\u001b[39;00m\n\u001b[1;32m 3454\u001b[0m \u001b[39m# it might fail.\u001b[39;00m\n\u001b[1;32m 3455\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mhasattr\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mrepo\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m-> 3456\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_git_repo()\n\u001b[1;32m 3458\u001b[0m model_name \u001b[39m=\u001b[39m kwargs\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mmodel_name\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m 3459\u001b[0m \u001b[39mif\u001b[39;00m model_name \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39mshould_save:\n",
2114
- "File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/transformers/trainer.py:3309\u001b[0m, in \u001b[0;36mTrainer.init_git_repo\u001b[0;34m(self, at_init)\u001b[0m\n\u001b[1;32m 3306\u001b[0m repo_name \u001b[39m=\u001b[39m get_full_repo_name(repo_name, token\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39mhub_token)\n\u001b[1;32m 3308\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3309\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrepo \u001b[39m=\u001b[39m Repository(\n\u001b[1;32m 3310\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49margs\u001b[39m.\u001b[39;49moutput_dir,\n\u001b[1;32m 3311\u001b[0m clone_from\u001b[39m=\u001b[39;49mrepo_name,\n\u001b[1;32m 3312\u001b[0m use_auth_token\u001b[39m=\u001b[39;49muse_auth_token,\n\u001b[1;32m 3313\u001b[0m private\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49margs\u001b[39m.\u001b[39;49mhub_private_repo,\n\u001b[1;32m 3314\u001b[0m )\n\u001b[1;32m 3315\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mEnvironmentError\u001b[39;00m:\n\u001b[1;32m 3316\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39moverwrite_output_dir \u001b[39mand\u001b[39;00m at_init:\n\u001b[1;32m 3317\u001b[0m \u001b[39m# Try again after wiping output_dir\u001b[39;00m\n",
2115
- "File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/utils/_deprecation.py:101\u001b[0m, in \u001b[0;36m_deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m message \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m custom_message\n\u001b[1;32m 100\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mFutureWarning\u001b[39;00m)\n\u001b[0;32m--> 101\u001b[0m \u001b[39mreturn\u001b[39;00m f(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
2116
- "File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:124\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[39mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 120\u001b[0m kwargs \u001b[39m=\u001b[39m smoothly_deprecate_use_auth_token(\n\u001b[1;32m 121\u001b[0m fn_name\u001b[39m=\u001b[39mfn\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, has_token\u001b[39m=\u001b[39mhas_token, kwargs\u001b[39m=\u001b[39mkwargs\n\u001b[1;32m 122\u001b[0m )\n\u001b[0;32m--> 124\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
2117
- "File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/repository.py:528\u001b[0m, in \u001b[0;36mRepository.__init__\u001b[0;34m(self, local_dir, clone_from, repo_type, token, git_user, git_email, revision, private, skip_lfs_files, client)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhuggingface_token \u001b[39m=\u001b[39m HfFolder\u001b[39m.\u001b[39mget_token()\n\u001b[1;32m 527\u001b[0m \u001b[39mif\u001b[39;00m clone_from \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 528\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mclone_from(repo_url\u001b[39m=\u001b[39;49mclone_from)\n\u001b[1;32m 529\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 530\u001b[0m \u001b[39mif\u001b[39;00m is_git_repo(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir):\n",
2118
- "File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:124\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[39mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 120\u001b[0m kwargs \u001b[39m=\u001b[39m smoothly_deprecate_use_auth_token(\n\u001b[1;32m 121\u001b[0m fn_name\u001b[39m=\u001b[39mfn\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, has_token\u001b[39m=\u001b[39mhas_token, kwargs\u001b[39m=\u001b[39mkwargs\n\u001b[1;32m 122\u001b[0m )\n\u001b[0;32m--> 124\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
2119
- "File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/repository.py:762\u001b[0m, in \u001b[0;36mRepository.clone_from\u001b[0;34m(self, repo_url, token)\u001b[0m\n\u001b[1;32m 759\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 760\u001b[0m \u001b[39m# Check if the folder is the root of a git repository\u001b[39;00m\n\u001b[1;32m 761\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_git_repo(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir):\n\u001b[0;32m--> 762\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 763\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mTried to clone a repository in a non-empty folder that isn\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt a\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 764\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m git repository. If you really want to do this, do it\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 765\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m manually:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mgit init && git remote add origin && git pull\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 766\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m origin main\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m or clone repo to a new folder and move your\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 767\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m existing files there afterwards.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 768\u001b[0m )\n\u001b[1;32m 770\u001b[0m \u001b[39mif\u001b[39;00m is_local_clone(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir, repo_url):\n\u001b[1;32m 771\u001b[0m logger\u001b[39m.\u001b[39mwarning(\n\u001b[1;32m 772\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir\u001b[39m}\u001b[39;00m\u001b[39m is already a clone of \u001b[39m\u001b[39m{\u001b[39;00mclean_repo_url\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 773\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m Make sure you pull the latest changes with\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 774\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m `repo.git_pull()`.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 775\u001b[0m )\n",
2120
- "\u001b[0;31mOSError\u001b[0m: Tried to clone a repository in a non-empty folder that isn't a git repository. If you really want to do this, do it manually:\ngit init && git remote add origin && git pull origin main\n or clone repo to a new folder and move your existing files there afterwards."
2121
- ]
2122
- }
2123
- ],
2124
- "source": [
2125
- "\n",
2126
  "trainer.push_to_hub(**kwargs)"
2127
  ]
2128
  }
 
661
  ],
662
  "source": [
663
  "from datasets import IterableDatasetDict\n",
664
+ "# Please use a read access token below\"\n",
665
+ "access_token = 'use your token here'\n",
666
  "raw_datasets = IterableDatasetDict()\n",
667
  "\n",
668
  "raw_datasets[\"train\"] = load_whole_dataset(\"mozilla-foundation/common_voice_11_0\", \"el\", split=\"train+validation\", use_auth_token=access_token) \n",
 
2015
  "We can label our checkpoint with the `whisper-event` tag on push by setting the appropriate key-word arguments (kwargs):"
2016
  ]
2017
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2018
  {
2019
  "cell_type": "code",
2020
  "execution_count": 50,
 
2065
  ],
2066
  "source": [
2067
  "from huggingface_hub import notebook_login\n",
2068
+ "notebook_login()\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2069
  "trainer.push_to_hub(**kwargs)"
2070
  ]
2071
  }