Training in progress, step 1000
Browse files- .ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb +11 -72
- fine-tune-whisper-streaming.ipynb +82 -220
- pytorch_model.bin +1 -1
- runs/Dec10_16-51-23_129-213-89-27/1670691100.0045364/events.out.tfevents.1670691100.129-213-89-27.2038299.1 +3 -0
- runs/Dec10_16-51-23_129-213-89-27/events.out.tfevents.1670691099.129-213-89-27.2038299.0 +3 -0
- runs/Dec10_16-52-02_129-213-89-27/1670691146.0308955/events.out.tfevents.1670691146.129-213-89-27.2038299.3 +3 -0
- runs/Dec10_16-52-02_129-213-89-27/events.out.tfevents.1670691146.129-213-89-27.2038299.2 +3 -0
- runs/Dec10_17-00-05_129-213-89-27/1670691616.7069395/events.out.tfevents.1670691616.129-213-89-27.2038299.5 +3 -0
- runs/Dec10_17-00-05_129-213-89-27/events.out.tfevents.1670691616.129-213-89-27.2038299.4 +3 -0
- runs/Dec10_17-02-04_129-213-89-27/1670691734.0866246/events.out.tfevents.1670691734.129-213-89-27.2038299.7 +3 -0
- runs/Dec10_17-02-04_129-213-89-27/events.out.tfevents.1670691734.129-213-89-27.2038299.6 +3 -0
- runs/Dec10_17-03-50_129-213-89-27/1670691839.2134347/events.out.tfevents.1670691839.129-213-89-27.2181513.1 +3 -0
- runs/Dec10_17-03-50_129-213-89-27/events.out.tfevents.1670691839.129-213-89-27.2181513.0 +3 -0
- training_args.bin +1 -1
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb
CHANGED
@@ -1066,8 +1066,8 @@
|
|
1066 |
"\n",
|
1067 |
" <div>\n",
|
1068 |
" \n",
|
1069 |
-
" <progress value='
|
1070 |
-
" [
|
1071 |
" </div>\n",
|
1072 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1073 |
" <thead>\n",
|
@@ -1115,7 +1115,7 @@
|
|
1115 |
},
|
1116 |
{
|
1117 |
"cell_type": "code",
|
1118 |
-
"execution_count":
|
1119 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1120 |
"metadata": {},
|
1121 |
"outputs": [],
|
@@ -1123,7 +1123,7 @@
|
|
1123 |
"kwargs = {\n",
|
1124 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
1125 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
1126 |
-
" \"language\": \"zh
|
1127 |
" \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
|
1128 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
1129 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
@@ -1141,7 +1141,7 @@
|
|
1141 |
},
|
1142 |
{
|
1143 |
"cell_type": "code",
|
1144 |
-
"execution_count":
|
1145 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1146 |
"metadata": {},
|
1147 |
"outputs": [
|
@@ -1155,82 +1155,21 @@
|
|
1155 |
"Feature extractor saved in ./preprocessor_config.json\n",
|
1156 |
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1157 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
1158 |
-
"added tokens file saved in ./added_tokens.json\n"
|
1159 |
-
|
1160 |
-
|
1161 |
-
{
|
1162 |
-
"data": {
|
1163 |
-
"application/vnd.jupyter.widget-view+json": {
|
1164 |
-
"model_id": "dc59052a3b7f45b2b896c03763c79f57",
|
1165 |
-
"version_major": 2,
|
1166 |
-
"version_minor": 0
|
1167 |
-
},
|
1168 |
-
"text/plain": [
|
1169 |
-
"Upload file pytorch_model.bin: 0%| | 32.0k/922M [00:00<?, ?B/s]"
|
1170 |
-
]
|
1171 |
-
},
|
1172 |
-
"metadata": {},
|
1173 |
-
"output_type": "display_data"
|
1174 |
-
},
|
1175 |
-
{
|
1176 |
-
"data": {
|
1177 |
-
"application/vnd.jupyter.widget-view+json": {
|
1178 |
-
"model_id": "1c58442a44e84af9a6dff915e036de83",
|
1179 |
-
"version_major": 2,
|
1180 |
-
"version_minor": 0
|
1181 |
-
},
|
1182 |
-
"text/plain": [
|
1183 |
-
"Upload file training_args.bin: 100%|##########| 3.50k/3.50k [00:00<?, ?B/s]"
|
1184 |
-
]
|
1185 |
-
},
|
1186 |
-
"metadata": {},
|
1187 |
-
"output_type": "display_data"
|
1188 |
-
},
|
1189 |
-
{
|
1190 |
-
"name": "stderr",
|
1191 |
-
"output_type": "stream",
|
1192 |
-
"text": [
|
1193 |
-
"remote: Scanning LFS files for validity, may be slow... \n",
|
1194 |
-
"remote: LFS file scan complete. \n",
|
1195 |
-
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1196 |
-
" 2ee4cf3..214645d main -> main\n",
|
1197 |
-
"\n",
|
1198 |
-
"Dropping the following result as it does not have all the necessary fields:\n",
|
1199 |
-
"{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'zh-TW', 'split': 'test', 'args': 'zh-TW'}}\n",
|
1200 |
-
"remote: ----------------------------------------------------------\u001b[0;31m \n",
|
1201 |
-
"remote: Sorry, your push was rejected during YAML metadata verification: \n",
|
1202 |
-
"remote: - Error: \"language[0]\" must only contain lowercase characters \n",
|
1203 |
-
"remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
|
1204 |
-
"remote: ---------------------------------------------------------- \n",
|
1205 |
-
"remote: Please find the documentation at: \n",
|
1206 |
-
"remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
|
1207 |
-
"remote: ---------------------------------------------------------- \n",
|
1208 |
-
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1209 |
-
" ! [remote rejected] main -> main (pre-receive hook declined)\n",
|
1210 |
-
"error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
|
1211 |
-
"\n",
|
1212 |
-
"Error pushing update to the model card. Please read logs and retry.\n",
|
1213 |
-
"$remote: ----------------------------------------------------------\u001b[0;31m \n",
|
1214 |
-
"remote: Sorry, your push was rejected during YAML metadata verification: \n",
|
1215 |
-
"remote: - Error: \"language[0]\" must only contain lowercase characters \n",
|
1216 |
-
"remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
|
1217 |
-
"remote: ---------------------------------------------------------- \n",
|
1218 |
-
"remote: Please find the documentation at: \n",
|
1219 |
-
"remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
|
1220 |
-
"remote: ---------------------------------------------------------- \n",
|
1221 |
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1222 |
-
"
|
1223 |
-
"error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
|
1224 |
"\n"
|
1225 |
]
|
1226 |
},
|
1227 |
{
|
1228 |
"data": {
|
1229 |
"text/plain": [
|
1230 |
-
"'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/
|
1231 |
]
|
1232 |
},
|
1233 |
-
"execution_count":
|
1234 |
"metadata": {},
|
1235 |
"output_type": "execute_result"
|
1236 |
}
|
|
|
1066 |
"\n",
|
1067 |
" <div>\n",
|
1068 |
" \n",
|
1069 |
+
" <progress value='35' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1070 |
+
" [ 35/5000 03:29 < 8:46:02, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
|
1071 |
" </div>\n",
|
1072 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1073 |
" <thead>\n",
|
|
|
1115 |
},
|
1116 |
{
|
1117 |
"cell_type": "code",
|
1118 |
+
"execution_count": 40,
|
1119 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1120 |
"metadata": {},
|
1121 |
"outputs": [],
|
|
|
1123 |
"kwargs = {\n",
|
1124 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
1125 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
1126 |
+
" \"language\": \"zh\",\n",
|
1127 |
" \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
|
1128 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
1129 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
|
|
1141 |
},
|
1142 |
{
|
1143 |
"cell_type": "code",
|
1144 |
+
"execution_count": 41,
|
1145 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1146 |
"metadata": {},
|
1147 |
"outputs": [
|
|
|
1155 |
"Feature extractor saved in ./preprocessor_config.json\n",
|
1156 |
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1157 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
1158 |
+
"added tokens file saved in ./added_tokens.json\n",
|
1159 |
+
"Several commits (2) will be pushed upstream.\n",
|
1160 |
+
"The progress bars may be unreliable.\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1161 |
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1162 |
+
" 61dfe27..a94bfc3 main -> main\n",
|
|
|
1163 |
"\n"
|
1164 |
]
|
1165 |
},
|
1166 |
{
|
1167 |
"data": {
|
1168 |
"text/plain": [
|
1169 |
+
"'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/a94bfc3cbb71b83e10525899df53ee0d4db96e32'"
|
1170 |
]
|
1171 |
},
|
1172 |
+
"execution_count": 41,
|
1173 |
"metadata": {},
|
1174 |
"output_type": "execute_result"
|
1175 |
}
|
fine-tune-whisper-streaming.ipynb
CHANGED
@@ -145,70 +145,15 @@
|
|
145 |
"execution_count": 2,
|
146 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
|
147 |
"metadata": {},
|
148 |
-
"outputs": [
|
149 |
-
{
|
150 |
-
"data": {
|
151 |
-
"application/vnd.jupyter.widget-view+json": {
|
152 |
-
"model_id": "ecce3a630cdb4ebab217a88a0163b257",
|
153 |
-
"version_major": 2,
|
154 |
-
"version_minor": 0
|
155 |
-
},
|
156 |
-
"text/plain": [
|
157 |
-
"Downloading builder script: 0%| | 0.00/8.30k [00:00<?, ?B/s]"
|
158 |
-
]
|
159 |
-
},
|
160 |
-
"metadata": {},
|
161 |
-
"output_type": "display_data"
|
162 |
-
},
|
163 |
-
{
|
164 |
-
"data": {
|
165 |
-
"application/vnd.jupyter.widget-view+json": {
|
166 |
-
"model_id": "b0141b068f944775867034bc494f88d7",
|
167 |
-
"version_major": 2,
|
168 |
-
"version_minor": 0
|
169 |
-
},
|
170 |
-
"text/plain": [
|
171 |
-
"Downloading readme: 0%| | 0.00/12.2k [00:00<?, ?B/s]"
|
172 |
-
]
|
173 |
-
},
|
174 |
-
"metadata": {},
|
175 |
-
"output_type": "display_data"
|
176 |
-
},
|
177 |
-
{
|
178 |
-
"data": {
|
179 |
-
"application/vnd.jupyter.widget-view+json": {
|
180 |
-
"model_id": "9dd1f4ded47c4160b55f1bcedce2694f",
|
181 |
-
"version_major": 2,
|
182 |
-
"version_minor": 0
|
183 |
-
},
|
184 |
-
"text/plain": [
|
185 |
-
"Downloading extra modules: 0%| | 0.00/3.44k [00:00<?, ?B/s]"
|
186 |
-
]
|
187 |
-
},
|
188 |
-
"metadata": {},
|
189 |
-
"output_type": "display_data"
|
190 |
-
},
|
191 |
-
{
|
192 |
-
"data": {
|
193 |
-
"application/vnd.jupyter.widget-view+json": {
|
194 |
-
"model_id": "a442da1e2a6b4271bae8ae0c655594b6",
|
195 |
-
"version_major": 2,
|
196 |
-
"version_minor": 0
|
197 |
-
},
|
198 |
-
"text/plain": [
|
199 |
-
"Downloading extra modules: 0%| | 0.00/60.9k [00:00<?, ?B/s]"
|
200 |
-
]
|
201 |
-
},
|
202 |
-
"metadata": {},
|
203 |
-
"output_type": "display_data"
|
204 |
-
}
|
205 |
-
],
|
206 |
"source": [
|
207 |
"from datasets import IterableDatasetDict\n",
|
208 |
"\n",
|
209 |
"raw_datasets = IterableDatasetDict()\n",
|
210 |
"\n",
|
211 |
-
"raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-
|
|
|
|
|
212 |
"raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
|
213 |
]
|
214 |
},
|
@@ -244,107 +189,10 @@
|
|
244 |
"cell_type": "code",
|
245 |
"execution_count": 3,
|
246 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
|
247 |
-
"metadata": {
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
"application/vnd.jupyter.widget-view+json": {
|
252 |
-
"model_id": "0d0c17f582474beebea009f021515946",
|
253 |
-
"version_major": 2,
|
254 |
-
"version_minor": 0
|
255 |
-
},
|
256 |
-
"text/plain": [
|
257 |
-
"Downloading: 0%| | 0.00/185k [00:00<?, ?B/s]"
|
258 |
-
]
|
259 |
-
},
|
260 |
-
"metadata": {},
|
261 |
-
"output_type": "display_data"
|
262 |
-
},
|
263 |
-
{
|
264 |
-
"data": {
|
265 |
-
"application/vnd.jupyter.widget-view+json": {
|
266 |
-
"model_id": "9f48049fe65c4045ba74c6fac892945e",
|
267 |
-
"version_major": 2,
|
268 |
-
"version_minor": 0
|
269 |
-
},
|
270 |
-
"text/plain": [
|
271 |
-
"Downloading: 0%| | 0.00/829 [00:00<?, ?B/s]"
|
272 |
-
]
|
273 |
-
},
|
274 |
-
"metadata": {},
|
275 |
-
"output_type": "display_data"
|
276 |
-
},
|
277 |
-
{
|
278 |
-
"data": {
|
279 |
-
"application/vnd.jupyter.widget-view+json": {
|
280 |
-
"model_id": "25615259dd364494bc5782b4e8231b05",
|
281 |
-
"version_major": 2,
|
282 |
-
"version_minor": 0
|
283 |
-
},
|
284 |
-
"text/plain": [
|
285 |
-
"Downloading: 0%| | 0.00/1.04M [00:00<?, ?B/s]"
|
286 |
-
]
|
287 |
-
},
|
288 |
-
"metadata": {},
|
289 |
-
"output_type": "display_data"
|
290 |
-
},
|
291 |
-
{
|
292 |
-
"data": {
|
293 |
-
"application/vnd.jupyter.widget-view+json": {
|
294 |
-
"model_id": "6867564094bf4c7d82d0046dccb173fe",
|
295 |
-
"version_major": 2,
|
296 |
-
"version_minor": 0
|
297 |
-
},
|
298 |
-
"text/plain": [
|
299 |
-
"Downloading: 0%| | 0.00/494k [00:00<?, ?B/s]"
|
300 |
-
]
|
301 |
-
},
|
302 |
-
"metadata": {},
|
303 |
-
"output_type": "display_data"
|
304 |
-
},
|
305 |
-
{
|
306 |
-
"data": {
|
307 |
-
"application/vnd.jupyter.widget-view+json": {
|
308 |
-
"model_id": "2cb3be77451542868602317c4d7eff85",
|
309 |
-
"version_major": 2,
|
310 |
-
"version_minor": 0
|
311 |
-
},
|
312 |
-
"text/plain": [
|
313 |
-
"Downloading: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
|
314 |
-
]
|
315 |
-
},
|
316 |
-
"metadata": {},
|
317 |
-
"output_type": "display_data"
|
318 |
-
},
|
319 |
-
{
|
320 |
-
"data": {
|
321 |
-
"application/vnd.jupyter.widget-view+json": {
|
322 |
-
"model_id": "6dfc5dedce13459bbac6f2d695695ae0",
|
323 |
-
"version_major": 2,
|
324 |
-
"version_minor": 0
|
325 |
-
},
|
326 |
-
"text/plain": [
|
327 |
-
"Downloading: 0%| | 0.00/2.11k [00:00<?, ?B/s]"
|
328 |
-
]
|
329 |
-
},
|
330 |
-
"metadata": {},
|
331 |
-
"output_type": "display_data"
|
332 |
-
},
|
333 |
-
{
|
334 |
-
"data": {
|
335 |
-
"application/vnd.jupyter.widget-view+json": {
|
336 |
-
"model_id": "944cb945f9dd47178ab22d418aa2934b",
|
337 |
-
"version_major": 2,
|
338 |
-
"version_minor": 0
|
339 |
-
},
|
340 |
-
"text/plain": [
|
341 |
-
"Downloading: 0%| | 0.00/2.06k [00:00<?, ?B/s]"
|
342 |
-
]
|
343 |
-
},
|
344 |
-
"metadata": {},
|
345 |
-
"output_type": "display_data"
|
346 |
-
}
|
347 |
-
],
|
348 |
"source": [
|
349 |
"from transformers import WhisperProcessor\n",
|
350 |
"\n",
|
@@ -706,22 +554,7 @@
|
|
706 |
"execution_count": 14,
|
707 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
708 |
"metadata": {},
|
709 |
-
"outputs": [
|
710 |
-
{
|
711 |
-
"data": {
|
712 |
-
"application/vnd.jupyter.widget-view+json": {
|
713 |
-
"model_id": "bafc0b31fe9a4d239eedc348d5521dfc",
|
714 |
-
"version_major": 2,
|
715 |
-
"version_minor": 0
|
716 |
-
},
|
717 |
-
"text/plain": [
|
718 |
-
"Downloading builder script: 0%| | 0.00/4.49k [00:00<?, ?B/s]"
|
719 |
-
]
|
720 |
-
},
|
721 |
-
"metadata": {},
|
722 |
-
"output_type": "display_data"
|
723 |
-
}
|
724 |
-
],
|
725 |
"source": [
|
726 |
"import evaluate\n",
|
727 |
"\n",
|
@@ -800,36 +633,7 @@
|
|
800 |
"execution_count": 16,
|
801 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
802 |
"metadata": {},
|
803 |
-
"outputs": [
|
804 |
-
{
|
805 |
-
"data": {
|
806 |
-
"application/vnd.jupyter.widget-view+json": {
|
807 |
-
"model_id": "e1d5d79e596a416aa96bde21be6fb551",
|
808 |
-
"version_major": 2,
|
809 |
-
"version_minor": 0
|
810 |
-
},
|
811 |
-
"text/plain": [
|
812 |
-
"Downloading: 0%| | 0.00/1.97k [00:00<?, ?B/s]"
|
813 |
-
]
|
814 |
-
},
|
815 |
-
"metadata": {},
|
816 |
-
"output_type": "display_data"
|
817 |
-
},
|
818 |
-
{
|
819 |
-
"data": {
|
820 |
-
"application/vnd.jupyter.widget-view+json": {
|
821 |
-
"model_id": "3d722a61d7a440479d0f5497a6200345",
|
822 |
-
"version_major": 2,
|
823 |
-
"version_minor": 0
|
824 |
-
},
|
825 |
-
"text/plain": [
|
826 |
-
"Downloading: 0%| | 0.00/967M [00:00<?, ?B/s]"
|
827 |
-
]
|
828 |
-
},
|
829 |
-
"metadata": {},
|
830 |
-
"output_type": "display_data"
|
831 |
-
}
|
832 |
-
],
|
833 |
"source": [
|
834 |
"from transformers import WhisperForConditionalGeneration\n",
|
835 |
"\n",
|
@@ -874,10 +678,18 @@
|
|
874 |
},
|
875 |
{
|
876 |
"cell_type": "code",
|
877 |
-
"execution_count":
|
878 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
879 |
"metadata": {},
|
880 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
881 |
"source": [
|
882 |
"from transformers import Seq2SeqTrainingArguments\n",
|
883 |
"\n",
|
@@ -953,7 +765,7 @@
|
|
953 |
},
|
954 |
{
|
955 |
"cell_type": "code",
|
956 |
-
"execution_count":
|
957 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
958 |
"metadata": {},
|
959 |
"outputs": [
|
@@ -992,7 +804,7 @@
|
|
992 |
},
|
993 |
{
|
994 |
"cell_type": "code",
|
995 |
-
"execution_count":
|
996 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
997 |
"metadata": {},
|
998 |
"outputs": [
|
@@ -1056,7 +868,8 @@
|
|
1056 |
" Gradient Accumulation steps = 1\n",
|
1057 |
" Total optimization steps = 5000\n",
|
1058 |
" Number of trainable parameters = 241734912\n",
|
1059 |
-
"Reading metadata...: 6568it [00:00,
|
|
|
1060 |
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
|
1061 |
]
|
1062 |
},
|
@@ -1066,8 +879,8 @@
|
|
1066 |
"\n",
|
1067 |
" <div>\n",
|
1068 |
" \n",
|
1069 |
-
" <progress value='
|
1070 |
-
" [
|
1071 |
" </div>\n",
|
1072 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1073 |
" <thead>\n",
|
@@ -1075,9 +888,16 @@
|
|
1075 |
" <th>Step</th>\n",
|
1076 |
" <th>Training Loss</th>\n",
|
1077 |
" <th>Validation Loss</th>\n",
|
|
|
1078 |
" </tr>\n",
|
1079 |
" </thead>\n",
|
1080 |
" <tbody>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
1081 |
" </tbody>\n",
|
1082 |
"</table><p>"
|
1083 |
],
|
@@ -1087,6 +907,40 @@
|
|
1087 |
},
|
1088 |
"metadata": {},
|
1089 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1090 |
}
|
1091 |
],
|
1092 |
"source": [
|
@@ -1115,7 +969,7 @@
|
|
1115 |
},
|
1116 |
{
|
1117 |
"cell_type": "code",
|
1118 |
-
"execution_count":
|
1119 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1120 |
"metadata": {},
|
1121 |
"outputs": [],
|
@@ -1123,7 +977,7 @@
|
|
1123 |
"kwargs = {\n",
|
1124 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
1125 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
1126 |
-
" \"language\": \"zh
|
1127 |
" \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
|
1128 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
1129 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
@@ -1141,7 +995,7 @@
|
|
1141 |
},
|
1142 |
{
|
1143 |
"cell_type": "code",
|
1144 |
-
"execution_count":
|
1145 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1146 |
"metadata": {},
|
1147 |
"outputs": [
|
@@ -1156,14 +1010,22 @@
|
|
1156 |
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1157 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
1158 |
"added tokens file saved in ./added_tokens.json\n",
|
1159 |
-
"Several commits (
|
1160 |
"The progress bars may be unreliable.\n",
|
1161 |
-
"remote: Scanning LFS files for validity, may be slow... \n",
|
1162 |
-
"remote: LFS file scan complete. \n",
|
1163 |
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1164 |
-
"
|
1165 |
"\n"
|
1166 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1167 |
}
|
1168 |
],
|
1169 |
"source": [
|
|
|
145 |
"execution_count": 2,
|
146 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
|
147 |
"metadata": {},
|
148 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
"source": [
|
150 |
"from datasets import IterableDatasetDict\n",
|
151 |
"\n",
|
152 |
"raw_datasets = IterableDatasetDict()\n",
|
153 |
"\n",
|
154 |
+
"# raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"train\", use_auth_token=True) # set split=\"train+validation\" for low-resource\n",
|
155 |
+
"# raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"test\", use_auth_token=True)\n",
|
156 |
+
"raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train+validation\", use_auth_token=True) # set split=\"train+validation\" for low-resource\n",
|
157 |
"raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
|
158 |
]
|
159 |
},
|
|
|
189 |
"cell_type": "code",
|
190 |
"execution_count": 3,
|
191 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
|
192 |
+
"metadata": {
|
193 |
+
"tags": []
|
194 |
+
},
|
195 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
"source": [
|
197 |
"from transformers import WhisperProcessor\n",
|
198 |
"\n",
|
|
|
554 |
"execution_count": 14,
|
555 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
556 |
"metadata": {},
|
557 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
"source": [
|
559 |
"import evaluate\n",
|
560 |
"\n",
|
|
|
633 |
"execution_count": 16,
|
634 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
635 |
"metadata": {},
|
636 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
637 |
"source": [
|
638 |
"from transformers import WhisperForConditionalGeneration\n",
|
639 |
"\n",
|
|
|
678 |
},
|
679 |
{
|
680 |
"cell_type": "code",
|
681 |
+
"execution_count": 22,
|
682 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
683 |
"metadata": {},
|
684 |
+
"outputs": [
|
685 |
+
{
|
686 |
+
"name": "stderr",
|
687 |
+
"output_type": "stream",
|
688 |
+
"text": [
|
689 |
+
"PyTorch: setting up devices\n"
|
690 |
+
]
|
691 |
+
}
|
692 |
+
],
|
693 |
"source": [
|
694 |
"from transformers import Seq2SeqTrainingArguments\n",
|
695 |
"\n",
|
|
|
765 |
},
|
766 |
{
|
767 |
"cell_type": "code",
|
768 |
+
"execution_count": 23,
|
769 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
770 |
"metadata": {},
|
771 |
"outputs": [
|
|
|
804 |
},
|
805 |
{
|
806 |
"cell_type": "code",
|
807 |
+
"execution_count": 24,
|
808 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
809 |
"metadata": {},
|
810 |
"outputs": [
|
|
|
868 |
" Gradient Accumulation steps = 1\n",
|
869 |
" Total optimization steps = 5000\n",
|
870 |
" Number of trainable parameters = 241734912\n",
|
871 |
+
"Reading metadata...: 6568it [00:00, 82605.00it/s]\n",
|
872 |
+
"Reading metadata...: 4709it [00:00, 34808.15it/s]\n",
|
873 |
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
|
874 |
]
|
875 |
},
|
|
|
879 |
"\n",
|
880 |
" <div>\n",
|
881 |
" \n",
|
882 |
+
" <progress value='1001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
883 |
+
" [1001/5000 1:48:25 < 7:14:02, 0.15 it/s, Epoch 6.02/9223372036854775807]\n",
|
884 |
" </div>\n",
|
885 |
" <table border=\"1\" class=\"dataframe\">\n",
|
886 |
" <thead>\n",
|
|
|
888 |
" <th>Step</th>\n",
|
889 |
" <th>Training Loss</th>\n",
|
890 |
" <th>Validation Loss</th>\n",
|
891 |
+
" <th>Wer</th>\n",
|
892 |
" </tr>\n",
|
893 |
" </thead>\n",
|
894 |
" <tbody>\n",
|
895 |
+
" <tr>\n",
|
896 |
+
" <td>1000</td>\n",
|
897 |
+
" <td>0.004600</td>\n",
|
898 |
+
" <td>0.210189</td>\n",
|
899 |
+
" <td>41.801433</td>\n",
|
900 |
+
" </tr>\n",
|
901 |
" </tbody>\n",
|
902 |
"</table><p>"
|
903 |
],
|
|
|
907 |
},
|
908 |
"metadata": {},
|
909 |
"output_type": "display_data"
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"name": "stderr",
|
913 |
+
"output_type": "stream",
|
914 |
+
"text": [
|
915 |
+
"Reading metadata...: 6568it [00:00, 59821.30it/s]\n",
|
916 |
+
"Reading metadata...: 4709it [00:00, 76452.83it/s]\n",
|
917 |
+
"Reading metadata...: 6568it [00:00, 88722.02it/s]\n",
|
918 |
+
"Reading metadata...: 4709it [00:00, 33936.10it/s]\n",
|
919 |
+
"Reading metadata...: 6568it [00:00, 20936.31it/s]\n",
|
920 |
+
"Reading metadata...: 4709it [00:00, 20573.38it/s]\n",
|
921 |
+
"Reading metadata...: 6568it [00:00, 67954.78it/s]\n",
|
922 |
+
"Reading metadata...: 4709it [00:00, 58312.87it/s]\n",
|
923 |
+
"Reading metadata...: 6568it [00:00, 89351.95it/s]\n",
|
924 |
+
"Reading metadata...: 4709it [00:00, 26579.06it/s]\n",
|
925 |
+
"Reading metadata...: 6568it [00:00, 56758.03it/s]\n",
|
926 |
+
"Reading metadata...: 4709it [00:00, 80017.25it/s]\n",
|
927 |
+
"***** Running Evaluation *****\n",
|
928 |
+
" Num examples: Unknown\n",
|
929 |
+
" Batch size = 8\n",
|
930 |
+
"Reading metadata...: 4709it [00:00, 31641.56it/s]\n",
|
931 |
+
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
932 |
+
"Saving model checkpoint to ./checkpoint-1000\n",
|
933 |
+
"Configuration saved in ./checkpoint-1000/config.json\n",
|
934 |
+
"Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
|
935 |
+
"Feature extractor saved in ./checkpoint-1000/preprocessor_config.json\n",
|
936 |
+
"tokenizer config file saved in ./checkpoint-1000/tokenizer_config.json\n",
|
937 |
+
"Special tokens file saved in ./checkpoint-1000/special_tokens_map.json\n",
|
938 |
+
"added tokens file saved in ./checkpoint-1000/added_tokens.json\n",
|
939 |
+
"Feature extractor saved in ./preprocessor_config.json\n",
|
940 |
+
"tokenizer config file saved in ./tokenizer_config.json\n",
|
941 |
+
"Special tokens file saved in ./special_tokens_map.json\n",
|
942 |
+
"added tokens file saved in ./added_tokens.json\n"
|
943 |
+
]
|
944 |
}
|
945 |
],
|
946 |
"source": [
|
|
|
969 |
},
|
970 |
{
|
971 |
"cell_type": "code",
|
972 |
+
"execution_count": 40,
|
973 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
974 |
"metadata": {},
|
975 |
"outputs": [],
|
|
|
977 |
"kwargs = {\n",
|
978 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
979 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
980 |
+
" \"language\": \"zh\",\n",
|
981 |
" \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
|
982 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
983 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
|
|
995 |
},
|
996 |
{
|
997 |
"cell_type": "code",
|
998 |
+
"execution_count": 41,
|
999 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1000 |
"metadata": {},
|
1001 |
"outputs": [
|
|
|
1010 |
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1011 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
1012 |
"added tokens file saved in ./added_tokens.json\n",
|
1013 |
+
"Several commits (2) will be pushed upstream.\n",
|
1014 |
"The progress bars may be unreliable.\n",
|
|
|
|
|
1015 |
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1016 |
+
" 61dfe27..a94bfc3 main -> main\n",
|
1017 |
"\n"
|
1018 |
]
|
1019 |
+
},
|
1020 |
+
{
|
1021 |
+
"data": {
|
1022 |
+
"text/plain": [
|
1023 |
+
"'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/a94bfc3cbb71b83e10525899df53ee0d4db96e32'"
|
1024 |
+
]
|
1025 |
+
},
|
1026 |
+
"execution_count": 41,
|
1027 |
+
"metadata": {},
|
1028 |
+
"output_type": "execute_result"
|
1029 |
}
|
1030 |
],
|
1031 |
"source": [
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 967102601
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a55e1c436f4a3d45cf396032864b1e41df706d4ce41f7465c29c839b34b723fb
|
3 |
size 967102601
|
runs/Dec10_16-51-23_129-213-89-27/1670691100.0045364/events.out.tfevents.1670691100.129-213-89-27.2038299.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:578751db7f962743d233f63d1b02e18773213fe5a7d8a669ae5aedba6e024fad
|
3 |
+
size 5863
|
runs/Dec10_16-51-23_129-213-89-27/events.out.tfevents.1670691099.129-213-89-27.2038299.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e5d4ec692e90a8cee704b0209ee2f2b3f07105c8d547ebacb9a6166b3a7ab7f
|
3 |
+
size 4286
|
runs/Dec10_16-52-02_129-213-89-27/1670691146.0308955/events.out.tfevents.1670691146.129-213-89-27.2038299.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99f1f7fb944bdd7c159c5be085a33c0b40536b0e3c87b4d3b17b747b14f50a50
|
3 |
+
size 5863
|
runs/Dec10_16-52-02_129-213-89-27/events.out.tfevents.1670691146.129-213-89-27.2038299.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:285e20376617c173f79f8cf671b83dc10034ddeead5195847ff160c50958462b
|
3 |
+
size 4593
|
runs/Dec10_17-00-05_129-213-89-27/1670691616.7069395/events.out.tfevents.1670691616.129-213-89-27.2038299.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c04abfabe9b86260bd7f6852ff64646ea61afb4c32a1f538a24b7bc1044b7b98
|
3 |
+
size 5863
|
runs/Dec10_17-00-05_129-213-89-27/events.out.tfevents.1670691616.129-213-89-27.2038299.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:070da7fa0ee0458c1a9fdff98dfae6c66ef3740e9a1be96a7e53464e908305e5
|
3 |
+
size 4285
|
runs/Dec10_17-02-04_129-213-89-27/1670691734.0866246/events.out.tfevents.1670691734.129-213-89-27.2038299.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7550e83d4a3fc05d7bdb2366917a5eef42a336652aee0eb90437a55e49155be3
|
3 |
+
size 5863
|
runs/Dec10_17-02-04_129-213-89-27/events.out.tfevents.1670691734.129-213-89-27.2038299.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0734aba51b0916f0b56bf72bc4f0d005b3bd3e23a6e1e5c2c22927b8da7dce2
|
3 |
+
size 4284
|
runs/Dec10_17-03-50_129-213-89-27/1670691839.2134347/events.out.tfevents.1670691839.129-213-89-27.2181513.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7888ed253cf7fa582a17034b7ed233827b92fbf03fea215affa25d85c205079e
|
3 |
+
size 5863
|
runs/Dec10_17-03-50_129-213-89-27/events.out.tfevents.1670691839.129-213-89-27.2181513.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfb0ec8513eb41db733ee3b6699345f1d1c595b8d3c079e96919b33438189525
|
3 |
+
size 10869
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3579
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dea523f572893e3c4ca713d3731c8194b372c3af7337897cc7ca69fa8dc28498
|
3 |
size 3579
|