kimbochen commited on
Commit
6f314a7
·
1 Parent(s): 3a561aa

End of training

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. fine-tune-whisper-streaming.ipynb +8 -9
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  language:
3
- - zh-tw
4
  license: apache-2.0
5
  tags:
6
  - whisper-event
 
1
  ---
2
  language:
3
+ - zh
4
  license: apache-2.0
5
  tags:
6
  - whisper-event
fine-tune-whisper-streaming.ipynb CHANGED
@@ -1115,7 +1115,7 @@
1115
  },
1116
  {
1117
  "cell_type": "code",
1118
- "execution_count": 34,
1119
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1120
  "metadata": {},
1121
  "outputs": [],
@@ -1123,7 +1123,7 @@
1123
  "kwargs = {\n",
1124
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1125
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1126
- " \"language\": \"zh\",\n",
1127
  " \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
1128
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1129
  " \"tasks\": \"automatic-speech-recognition\",\n",
@@ -1141,7 +1141,7 @@
1141
  },
1142
  {
1143
  "cell_type": "code",
1144
- "execution_count": 35,
1145
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1146
  "metadata": {},
1147
  "outputs": [
@@ -1156,14 +1156,13 @@
1156
  "tokenizer config file saved in ./tokenizer_config.json\n",
1157
  "Special tokens file saved in ./special_tokens_map.json\n",
1158
  "added tokens file saved in ./added_tokens.json\n",
1159
- "Several commits (11) will be pushed upstream.\n",
1160
  "The progress bars may be unreliable.\n",
1161
  "remote: Scanning LFS files for validity, may be slow... \n",
1162
  "remote: LFS file scan complete. \n",
1163
  "remote: ----------------------------------------------------------\u001b[0;31m \n",
1164
  "remote: Sorry, your push was rejected during YAML metadata verification: \n",
1165
- "remote: - Error: \"language[0]\" must only contain lowercase characters \n",
1166
- "remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
1167
  "remote: ---------------------------------------------------------- \n",
1168
  "remote: Please find the documentation at: \n",
1169
  "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
@@ -1176,7 +1175,7 @@
1176
  },
1177
  {
1178
  "ename": "OSError",
1179
- "evalue": "remote: Scanning LFS files for validity, may be slow... \nremote: LFS file scan complete. \nremote: ----------------------------------------------------------\u001b[0;31m \nremote: Sorry, your push was rejected during YAML metadata verification: \nremote: - Error: \"language[0]\" must only contain lowercase characters \nremote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \nremote: ---------------------------------------------------------- \nremote: Please find the documentation at: \nremote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \nremote: ---------------------------------------------------------- \nTo https://huggingface.co/kimbochen/whisper-small-zh-tw\n ! [remote rejected] main -> main (pre-receive hook declined)\nerror: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
1180
  "output_type": "error",
1181
  "traceback": [
1182
  "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
@@ -1185,11 +1184,11 @@
1185
  "\u001b[0;31mCalledProcessError\u001b[0m: Command '['git', 'push', '--set-upstream', 'origin', 'main']' returned non-zero exit status 1.",
1186
  "\nDuring handling of the above exception, another exception occurred:\n",
1187
  "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
1188
- "Cell \u001b[0;32mIn[35], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
1189
  "File \u001b[0;32m~/.venv/lib/python3.8/site-packages/transformers/trainer.py:3492\u001b[0m, in \u001b[0;36mTrainer.push_to_hub\u001b[0;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[1;32m 3489\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpush_in_progress\u001b[38;5;241m.\u001b[39m_process\u001b[38;5;241m.\u001b[39mkill()\n\u001b[1;32m 3490\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpush_in_progress \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3492\u001b[0m git_head_commit_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3493\u001b[0m \u001b[43m \u001b[49m\u001b[43mcommit_message\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_message\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblocking\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 3494\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3495\u001b[0m \u001b[38;5;66;03m# push separately the model card to be independant from the rest of the model\u001b[39;00m\n\u001b[1;32m 3496\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n",
1190
  "File \u001b[0;32m~/.venv/lib/python3.8/site-packages/huggingface_hub/repository.py:1432\u001b[0m, in \u001b[0;36mRepository.push_to_hub\u001b[0;34m(self, commit_message, blocking, clean_ok, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1430\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_add(auto_lfs_track\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1431\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_commit(commit_message)\n\u001b[0;32m-> 1432\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit_push\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1433\u001b[0m \u001b[43m \u001b[49m\u001b[43mupstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43morigin \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcurrent_branch\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1434\u001b[0m \u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblocking\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1435\u001b[0m \u001b[43m \u001b[49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1436\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
1191
  "File \u001b[0;32m~/.venv/lib/python3.8/site-packages/huggingface_hub/repository.py:1212\u001b[0m, in \u001b[0;36mRepository.git_push\u001b[0;34m(self, upstream, blocking, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1207\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m subprocess\u001b[38;5;241m.\u001b[39mCalledProcessError(\n\u001b[1;32m 1208\u001b[0m return_code, process\u001b[38;5;241m.\u001b[39margs, output\u001b[38;5;241m=\u001b[39mstdout, stderr\u001b[38;5;241m=\u001b[39mstderr\n\u001b[1;32m 1209\u001b[0m )\n\u001b[1;32m 1211\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m subprocess\u001b[38;5;241m.\u001b[39mCalledProcessError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m-> 1212\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(exc\u001b[38;5;241m.\u001b[39mstderr)\n\u001b[1;32m 1214\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m blocking:\n\u001b[1;32m 1216\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mstatus_method\u001b[39m():\n",
1192
- "\u001b[0;31mOSError\u001b[0m: remote: Scanning LFS files for validity, may be slow... \nremote: LFS file scan complete. \nremote: ----------------------------------------------------------\u001b[0;31m \nremote: Sorry, your push was rejected during YAML metadata verification: \nremote: - Error: \"language[0]\" must only contain lowercase characters \nremote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \nremote: ---------------------------------------------------------- \nremote: Please find the documentation at: \nremote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \nremote: ---------------------------------------------------------- \nTo https://huggingface.co/kimbochen/whisper-small-zh-tw\n ! [remote rejected] main -> main (pre-receive hook declined)\nerror: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n"
1193
  ]
1194
  }
1195
  ],
 
1115
  },
1116
  {
1117
  "cell_type": "code",
1118
+ "execution_count": 37,
1119
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1120
  "metadata": {},
1121
  "outputs": [],
 
1123
  "kwargs = {\n",
1124
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1125
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1126
+ " \"language\": \"zh-TW\",\n",
1127
  " \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
1128
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1129
  " \"tasks\": \"automatic-speech-recognition\",\n",
 
1141
  },
1142
  {
1143
  "cell_type": "code",
1144
+ "execution_count": 36,
1145
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1146
  "metadata": {},
1147
  "outputs": [
 
1156
  "tokenizer config file saved in ./tokenizer_config.json\n",
1157
  "Special tokens file saved in ./special_tokens_map.json\n",
1158
  "added tokens file saved in ./added_tokens.json\n",
1159
+ "Several commits (12) will be pushed upstream.\n",
1160
  "The progress bars may be unreliable.\n",
1161
  "remote: Scanning LFS files for validity, may be slow... \n",
1162
  "remote: LFS file scan complete. \n",
1163
  "remote: ----------------------------------------------------------\u001b[0;31m \n",
1164
  "remote: Sorry, your push was rejected during YAML metadata verification: \n",
1165
+ "remote: - Error: \"language[0]\" with value \"zh-tw\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
 
1166
  "remote: ---------------------------------------------------------- \n",
1167
  "remote: Please find the documentation at: \n",
1168
  "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
 
1175
  },
1176
  {
1177
  "ename": "OSError",
1178
+ "evalue": "remote: Scanning LFS files for validity, may be slow... \nremote: LFS file scan complete. \nremote: ----------------------------------------------------------\u001b[0;31m \nremote: Sorry, your push was rejected during YAML metadata verification: \nremote: - Error: \"language[0]\" with value \"zh-tw\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \nremote: ---------------------------------------------------------- \nremote: Please find the documentation at: \nremote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \nremote: ---------------------------------------------------------- \nTo https://huggingface.co/kimbochen/whisper-small-zh-tw\n ! [remote rejected] main -> main (pre-receive hook declined)\nerror: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
1179
  "output_type": "error",
1180
  "traceback": [
1181
  "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 
1184
  "\u001b[0;31mCalledProcessError\u001b[0m: Command '['git', 'push', '--set-upstream', 'origin', 'main']' returned non-zero exit status 1.",
1185
  "\nDuring handling of the above exception, another exception occurred:\n",
1186
  "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
1187
+ "Cell \u001b[0;32mIn[36], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
1188
  "File \u001b[0;32m~/.venv/lib/python3.8/site-packages/transformers/trainer.py:3492\u001b[0m, in \u001b[0;36mTrainer.push_to_hub\u001b[0;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[1;32m 3489\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpush_in_progress\u001b[38;5;241m.\u001b[39m_process\u001b[38;5;241m.\u001b[39mkill()\n\u001b[1;32m 3490\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpush_in_progress \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3492\u001b[0m git_head_commit_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3493\u001b[0m \u001b[43m \u001b[49m\u001b[43mcommit_message\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_message\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblocking\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 3494\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3495\u001b[0m \u001b[38;5;66;03m# push separately the model card to be independant from the rest of the model\u001b[39;00m\n\u001b[1;32m 3496\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n",
1189
  "File \u001b[0;32m~/.venv/lib/python3.8/site-packages/huggingface_hub/repository.py:1432\u001b[0m, in \u001b[0;36mRepository.push_to_hub\u001b[0;34m(self, commit_message, blocking, clean_ok, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1430\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_add(auto_lfs_track\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1431\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_commit(commit_message)\n\u001b[0;32m-> 1432\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit_push\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1433\u001b[0m \u001b[43m \u001b[49m\u001b[43mupstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43morigin \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcurrent_branch\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1434\u001b[0m \u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblocking\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1435\u001b[0m \u001b[43m \u001b[49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1436\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
1190
  "File \u001b[0;32m~/.venv/lib/python3.8/site-packages/huggingface_hub/repository.py:1212\u001b[0m, in \u001b[0;36mRepository.git_push\u001b[0;34m(self, upstream, blocking, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1207\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m subprocess\u001b[38;5;241m.\u001b[39mCalledProcessError(\n\u001b[1;32m 1208\u001b[0m return_code, process\u001b[38;5;241m.\u001b[39margs, output\u001b[38;5;241m=\u001b[39mstdout, stderr\u001b[38;5;241m=\u001b[39mstderr\n\u001b[1;32m 1209\u001b[0m )\n\u001b[1;32m 1211\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m subprocess\u001b[38;5;241m.\u001b[39mCalledProcessError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m-> 1212\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(exc\u001b[38;5;241m.\u001b[39mstderr)\n\u001b[1;32m 1214\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m blocking:\n\u001b[1;32m 1216\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mstatus_method\u001b[39m():\n",
1191
+ "\u001b[0;31mOSError\u001b[0m: remote: Scanning LFS files for validity, may be slow... \nremote: LFS file scan complete. \nremote: ----------------------------------------------------------\u001b[0;31m \nremote: Sorry, your push was rejected during YAML metadata verification: \nremote: - Error: \"language[0]\" with value \"zh-tw\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \nremote: ---------------------------------------------------------- \nremote: Please find the documentation at: \nremote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \nremote: ---------------------------------------------------------- \nTo https://huggingface.co/kimbochen/whisper-small-zh-tw\n ! [remote rejected] main -> main (pre-receive hook declined)\nerror: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n"
1192
  ]
1193
  }
1194
  ],