Training in progress, step 1000
Browse files- .ipynb_checkpoints/fine-tune-whisper-non-streaming-id-augmented-checkpoint.ipynb +0 -0
- fine-tune-whisper-non-streaming-id-augmented.ipynb +75 -699
- pytorch_model.bin +1 -1
- runs/Dec18_12-13-24_150-136-94-223/events.out.tfevents.1671365617.150-136-94-223.67469.0 +2 -2
- runs/Dec18_13-16-20_150-136-94-223/1671369384.9831293/events.out.tfevents.1671369384.150-136-94-223.188704.1 +3 -0
- runs/Dec18_13-16-20_150-136-94-223/events.out.tfevents.1671369384.150-136-94-223.188704.0 +3 -0
- training_args.bin +1 -1
.ipynb_checkpoints/fine-tune-whisper-non-streaming-id-augmented-checkpoint.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
fine-tune-whisper-non-streaming-id-augmented.ipynb
CHANGED
@@ -155,408 +155,11 @@
|
|
155 |
"id": "d087b451",
|
156 |
"metadata": {},
|
157 |
"outputs": [
|
158 |
-
{
|
159 |
-
"data": {
|
160 |
-
"application/vnd.jupyter.widget-view+json": {
|
161 |
-
"model_id": "f06040b99e3a496a8b6a16cf575f5fe4",
|
162 |
-
"version_major": 2,
|
163 |
-
"version_minor": 0
|
164 |
-
},
|
165 |
-
"text/plain": [
|
166 |
-
"Downloading builder script: 0%| | 0.00/8.30k [00:00<?, ?B/s]"
|
167 |
-
]
|
168 |
-
},
|
169 |
-
"metadata": {},
|
170 |
-
"output_type": "display_data"
|
171 |
-
},
|
172 |
-
{
|
173 |
-
"data": {
|
174 |
-
"application/vnd.jupyter.widget-view+json": {
|
175 |
-
"model_id": "51c2249ae17f4e429c1673af03eea9e5",
|
176 |
-
"version_major": 2,
|
177 |
-
"version_minor": 0
|
178 |
-
},
|
179 |
-
"text/plain": [
|
180 |
-
"Downloading readme: 0%| | 0.00/12.2k [00:00<?, ?B/s]"
|
181 |
-
]
|
182 |
-
},
|
183 |
-
"metadata": {},
|
184 |
-
"output_type": "display_data"
|
185 |
-
},
|
186 |
-
{
|
187 |
-
"data": {
|
188 |
-
"application/vnd.jupyter.widget-view+json": {
|
189 |
-
"model_id": "7a58d7f740cb4a37aab92c2991882a94",
|
190 |
-
"version_major": 2,
|
191 |
-
"version_minor": 0
|
192 |
-
},
|
193 |
-
"text/plain": [
|
194 |
-
"Downloading extra modules: 0%| | 0.00/3.44k [00:00<?, ?B/s]"
|
195 |
-
]
|
196 |
-
},
|
197 |
-
"metadata": {},
|
198 |
-
"output_type": "display_data"
|
199 |
-
},
|
200 |
-
{
|
201 |
-
"data": {
|
202 |
-
"application/vnd.jupyter.widget-view+json": {
|
203 |
-
"model_id": "5b8e88450d8a411a8b6c52e1c77d50fc",
|
204 |
-
"version_major": 2,
|
205 |
-
"version_minor": 0
|
206 |
-
},
|
207 |
-
"text/plain": [
|
208 |
-
"Downloading extra modules: 0%| | 0.00/60.9k [00:00<?, ?B/s]"
|
209 |
-
]
|
210 |
-
},
|
211 |
-
"metadata": {},
|
212 |
-
"output_type": "display_data"
|
213 |
-
},
|
214 |
-
{
|
215 |
-
"name": "stdout",
|
216 |
-
"output_type": "stream",
|
217 |
-
"text": [
|
218 |
-
"Downloading and preparing dataset common_voice_11_0/vi to /home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f...\n"
|
219 |
-
]
|
220 |
-
},
|
221 |
-
{
|
222 |
-
"data": {
|
223 |
-
"application/vnd.jupyter.widget-view+json": {
|
224 |
-
"model_id": "b76582cd2e5547f29eaba1f558f30bf0",
|
225 |
-
"version_major": 2,
|
226 |
-
"version_minor": 0
|
227 |
-
},
|
228 |
-
"text/plain": [
|
229 |
-
"Downloading data: 0%| | 0.00/12.2k [00:00<?, ?B/s]"
|
230 |
-
]
|
231 |
-
},
|
232 |
-
"metadata": {},
|
233 |
-
"output_type": "display_data"
|
234 |
-
},
|
235 |
-
{
|
236 |
-
"data": {
|
237 |
-
"application/vnd.jupyter.widget-view+json": {
|
238 |
-
"model_id": "81291c5ad37f4c03a40aaf6ba2e32604",
|
239 |
-
"version_major": 2,
|
240 |
-
"version_minor": 0
|
241 |
-
},
|
242 |
-
"text/plain": [
|
243 |
-
"Downloading data files: 0%| | 0/5 [00:00<?, ?it/s]"
|
244 |
-
]
|
245 |
-
},
|
246 |
-
"metadata": {},
|
247 |
-
"output_type": "display_data"
|
248 |
-
},
|
249 |
-
{
|
250 |
-
"data": {
|
251 |
-
"application/vnd.jupyter.widget-view+json": {
|
252 |
-
"model_id": "bca9e66a222b4ffc86f7bb4fbacabdc1",
|
253 |
-
"version_major": 2,
|
254 |
-
"version_minor": 0
|
255 |
-
},
|
256 |
-
"text/plain": [
|
257 |
-
"Downloading data: 0%| | 0.00/76.3M [00:00<?, ?B/s]"
|
258 |
-
]
|
259 |
-
},
|
260 |
-
"metadata": {},
|
261 |
-
"output_type": "display_data"
|
262 |
-
},
|
263 |
-
{
|
264 |
-
"data": {
|
265 |
-
"application/vnd.jupyter.widget-view+json": {
|
266 |
-
"model_id": "eef674aee9d5483799efeaea18d745b2",
|
267 |
-
"version_major": 2,
|
268 |
-
"version_minor": 0
|
269 |
-
},
|
270 |
-
"text/plain": [
|
271 |
-
"Downloading data: 0%| | 0.00/5.54M [00:00<?, ?B/s]"
|
272 |
-
]
|
273 |
-
},
|
274 |
-
"metadata": {},
|
275 |
-
"output_type": "display_data"
|
276 |
-
},
|
277 |
-
{
|
278 |
-
"data": {
|
279 |
-
"application/vnd.jupyter.widget-view+json": {
|
280 |
-
"model_id": "57657b868c554ee4ac2abab4e8ed7ddc",
|
281 |
-
"version_major": 2,
|
282 |
-
"version_minor": 0
|
283 |
-
},
|
284 |
-
"text/plain": [
|
285 |
-
"Downloading data: 0%| | 0.00/33.9M [00:00<?, ?B/s]"
|
286 |
-
]
|
287 |
-
},
|
288 |
-
"metadata": {},
|
289 |
-
"output_type": "display_data"
|
290 |
-
},
|
291 |
-
{
|
292 |
-
"data": {
|
293 |
-
"application/vnd.jupyter.widget-view+json": {
|
294 |
-
"model_id": "74497f7b338b46a2a53b4c3fbbdd5749",
|
295 |
-
"version_major": 2,
|
296 |
-
"version_minor": 0
|
297 |
-
},
|
298 |
-
"text/plain": [
|
299 |
-
"Downloading data: 0%| | 0.00/274M [00:00<?, ?B/s]"
|
300 |
-
]
|
301 |
-
},
|
302 |
-
"metadata": {},
|
303 |
-
"output_type": "display_data"
|
304 |
-
},
|
305 |
-
{
|
306 |
-
"data": {
|
307 |
-
"application/vnd.jupyter.widget-view+json": {
|
308 |
-
"model_id": "e6f531a4fcee44fd961ac763e2aa2b9b",
|
309 |
-
"version_major": 2,
|
310 |
-
"version_minor": 0
|
311 |
-
},
|
312 |
-
"text/plain": [
|
313 |
-
"Downloading data: 0%| | 0.00/10.4M [00:00<?, ?B/s]"
|
314 |
-
]
|
315 |
-
},
|
316 |
-
"metadata": {},
|
317 |
-
"output_type": "display_data"
|
318 |
-
},
|
319 |
-
{
|
320 |
-
"data": {
|
321 |
-
"application/vnd.jupyter.widget-view+json": {
|
322 |
-
"model_id": "316ae0693cc6435e8216509394cc3361",
|
323 |
-
"version_major": 2,
|
324 |
-
"version_minor": 0
|
325 |
-
},
|
326 |
-
"text/plain": [
|
327 |
-
"Extracting data files: 0%| | 0/5 [00:00<?, ?it/s]"
|
328 |
-
]
|
329 |
-
},
|
330 |
-
"metadata": {},
|
331 |
-
"output_type": "display_data"
|
332 |
-
},
|
333 |
-
{
|
334 |
-
"data": {
|
335 |
-
"application/vnd.jupyter.widget-view+json": {
|
336 |
-
"model_id": "cb80410a982c418c8154892029ba9eaa",
|
337 |
-
"version_major": 2,
|
338 |
-
"version_minor": 0
|
339 |
-
},
|
340 |
-
"text/plain": [
|
341 |
-
"Downloading data files: 0%| | 0/5 [00:00<?, ?it/s]"
|
342 |
-
]
|
343 |
-
},
|
344 |
-
"metadata": {},
|
345 |
-
"output_type": "display_data"
|
346 |
-
},
|
347 |
-
{
|
348 |
-
"data": {
|
349 |
-
"application/vnd.jupyter.widget-view+json": {
|
350 |
-
"model_id": "c66ce3d597764ab2a448778c0c663c8e",
|
351 |
-
"version_major": 2,
|
352 |
-
"version_minor": 0
|
353 |
-
},
|
354 |
-
"text/plain": [
|
355 |
-
"Downloading data: 0%| | 0.00/562k [00:00<?, ?B/s]"
|
356 |
-
]
|
357 |
-
},
|
358 |
-
"metadata": {},
|
359 |
-
"output_type": "display_data"
|
360 |
-
},
|
361 |
-
{
|
362 |
-
"data": {
|
363 |
-
"application/vnd.jupyter.widget-view+json": {
|
364 |
-
"model_id": "ecba3e1244db47a0844d26bdd99dbb13",
|
365 |
-
"version_major": 2,
|
366 |
-
"version_minor": 0
|
367 |
-
},
|
368 |
-
"text/plain": [
|
369 |
-
"Downloading data: 0%| | 0.00/53.3k [00:00<?, ?B/s]"
|
370 |
-
]
|
371 |
-
},
|
372 |
-
"metadata": {},
|
373 |
-
"output_type": "display_data"
|
374 |
-
},
|
375 |
-
{
|
376 |
-
"data": {
|
377 |
-
"application/vnd.jupyter.widget-view+json": {
|
378 |
-
"model_id": "eea8dd83dc404e398ffb189e22e480bb",
|
379 |
-
"version_major": 2,
|
380 |
-
"version_minor": 0
|
381 |
-
},
|
382 |
-
"text/plain": [
|
383 |
-
"Downloading data: 0%| | 0.00/272k [00:00<?, ?B/s]"
|
384 |
-
]
|
385 |
-
},
|
386 |
-
"metadata": {},
|
387 |
-
"output_type": "display_data"
|
388 |
-
},
|
389 |
-
{
|
390 |
-
"data": {
|
391 |
-
"application/vnd.jupyter.widget-view+json": {
|
392 |
-
"model_id": "f6e7163feffd4bf8812c6c0319832fa6",
|
393 |
-
"version_major": 2,
|
394 |
-
"version_minor": 0
|
395 |
-
},
|
396 |
-
"text/plain": [
|
397 |
-
"Downloading data: 0%| | 0.00/2.52M [00:00<?, ?B/s]"
|
398 |
-
]
|
399 |
-
},
|
400 |
-
"metadata": {},
|
401 |
-
"output_type": "display_data"
|
402 |
-
},
|
403 |
-
{
|
404 |
-
"data": {
|
405 |
-
"application/vnd.jupyter.widget-view+json": {
|
406 |
-
"model_id": "cb1db21354f14d26baf884b6095cdc94",
|
407 |
-
"version_major": 2,
|
408 |
-
"version_minor": 0
|
409 |
-
},
|
410 |
-
"text/plain": [
|
411 |
-
"Downloading data: 0%| | 0.00/74.7k [00:00<?, ?B/s]"
|
412 |
-
]
|
413 |
-
},
|
414 |
-
"metadata": {},
|
415 |
-
"output_type": "display_data"
|
416 |
-
},
|
417 |
-
{
|
418 |
-
"data": {
|
419 |
-
"application/vnd.jupyter.widget-view+json": {
|
420 |
-
"model_id": "3abc46d3685c432f827dabab6dd13ffb",
|
421 |
-
"version_major": 2,
|
422 |
-
"version_minor": 0
|
423 |
-
},
|
424 |
-
"text/plain": [
|
425 |
-
"Extracting data files: 0%| | 0/5 [00:00<?, ?it/s]"
|
426 |
-
]
|
427 |
-
},
|
428 |
-
"metadata": {},
|
429 |
-
"output_type": "display_data"
|
430 |
-
},
|
431 |
-
{
|
432 |
-
"data": {
|
433 |
-
"application/vnd.jupyter.widget-view+json": {
|
434 |
-
"model_id": "",
|
435 |
-
"version_major": 2,
|
436 |
-
"version_minor": 0
|
437 |
-
},
|
438 |
-
"text/plain": [
|
439 |
-
"Generating train split: 0 examples [00:00, ? examples/s]"
|
440 |
-
]
|
441 |
-
},
|
442 |
-
"metadata": {},
|
443 |
-
"output_type": "display_data"
|
444 |
-
},
|
445 |
-
{
|
446 |
-
"name": "stderr",
|
447 |
-
"output_type": "stream",
|
448 |
-
"text": [
|
449 |
-
"\n",
|
450 |
-
"Reading metadata...: 2525it [00:00, 140488.93it/s]\n"
|
451 |
-
]
|
452 |
-
},
|
453 |
-
{
|
454 |
-
"data": {
|
455 |
-
"application/vnd.jupyter.widget-view+json": {
|
456 |
-
"model_id": "",
|
457 |
-
"version_major": 2,
|
458 |
-
"version_minor": 0
|
459 |
-
},
|
460 |
-
"text/plain": [
|
461 |
-
"Generating validation split: 0 examples [00:00, ? examples/s]"
|
462 |
-
]
|
463 |
-
},
|
464 |
-
"metadata": {},
|
465 |
-
"output_type": "display_data"
|
466 |
-
},
|
467 |
-
{
|
468 |
-
"name": "stderr",
|
469 |
-
"output_type": "stream",
|
470 |
-
"text": [
|
471 |
-
"\n",
|
472 |
-
"\n",
|
473 |
-
"Reading metadata...: 248it [00:00, 133597.15it/s]\n"
|
474 |
-
]
|
475 |
-
},
|
476 |
-
{
|
477 |
-
"data": {
|
478 |
-
"application/vnd.jupyter.widget-view+json": {
|
479 |
-
"model_id": "",
|
480 |
-
"version_major": 2,
|
481 |
-
"version_minor": 0
|
482 |
-
},
|
483 |
-
"text/plain": [
|
484 |
-
"Generating test split: 0 examples [00:00, ? examples/s]"
|
485 |
-
]
|
486 |
-
},
|
487 |
-
"metadata": {},
|
488 |
-
"output_type": "display_data"
|
489 |
-
},
|
490 |
-
{
|
491 |
-
"name": "stderr",
|
492 |
-
"output_type": "stream",
|
493 |
-
"text": [
|
494 |
-
"\n",
|
495 |
-
"\n",
|
496 |
-
"\n",
|
497 |
-
"Reading metadata...: 1237it [00:00, 141122.10it/s]\n"
|
498 |
-
]
|
499 |
-
},
|
500 |
-
{
|
501 |
-
"data": {
|
502 |
-
"application/vnd.jupyter.widget-view+json": {
|
503 |
-
"model_id": "",
|
504 |
-
"version_major": 2,
|
505 |
-
"version_minor": 0
|
506 |
-
},
|
507 |
-
"text/plain": [
|
508 |
-
"Generating other split: 0 examples [00:00, ? examples/s]"
|
509 |
-
]
|
510 |
-
},
|
511 |
-
"metadata": {},
|
512 |
-
"output_type": "display_data"
|
513 |
-
},
|
514 |
-
{
|
515 |
-
"name": "stderr",
|
516 |
-
"output_type": "stream",
|
517 |
-
"text": [
|
518 |
-
"\n",
|
519 |
-
"\n",
|
520 |
-
"\n",
|
521 |
-
"Reading metadata...: 11476it [00:00, 144676.38it/s]\n"
|
522 |
-
]
|
523 |
-
},
|
524 |
-
{
|
525 |
-
"data": {
|
526 |
-
"application/vnd.jupyter.widget-view+json": {
|
527 |
-
"model_id": "",
|
528 |
-
"version_major": 2,
|
529 |
-
"version_minor": 0
|
530 |
-
},
|
531 |
-
"text/plain": [
|
532 |
-
"Generating invalidated split: 0 examples [00:00, ? examples/s]"
|
533 |
-
]
|
534 |
-
},
|
535 |
-
"metadata": {},
|
536 |
-
"output_type": "display_data"
|
537 |
-
},
|
538 |
-
{
|
539 |
-
"name": "stderr",
|
540 |
-
"output_type": "stream",
|
541 |
-
"text": [
|
542 |
-
"\n",
|
543 |
-
"\n",
|
544 |
-
"\n",
|
545 |
-
"\n",
|
546 |
-
"Reading metadata...: 337it [00:00, 119250.86it/s][A\n"
|
547 |
-
]
|
548 |
-
},
|
549 |
-
{
|
550 |
-
"name": "stdout",
|
551 |
-
"output_type": "stream",
|
552 |
-
"text": [
|
553 |
-
"Dataset common_voice_11_0 downloaded and prepared to /home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f. Subsequent calls will reuse this data.\n"
|
554 |
-
]
|
555 |
-
},
|
556 |
{
|
557 |
"name": "stderr",
|
558 |
"output_type": "stream",
|
559 |
"text": [
|
|
|
560 |
"Found cached dataset common_voice_11_0 (/home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)\n"
|
561 |
]
|
562 |
},
|
@@ -667,26 +270,11 @@
|
|
667 |
{
|
668 |
"cell_type": "code",
|
669 |
"execution_count": 3,
|
670 |
-
"id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5",
|
671 |
-
"metadata": {
|
672 |
-
"id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5"
|
673 |
-
},
|
674 |
-
"outputs": [
|
675 |
-
{
|
676 |
-
"data": {
|
677 |
-
"application/vnd.jupyter.widget-view+json": {
|
678 |
-
"model_id": "56fc56162a2848afaee1d9943a8c545f",
|
679 |
-
"version_major": 2,
|
680 |
-
"version_minor": 0
|
681 |
-
},
|
682 |
-
"text/plain": [
|
683 |
-
"Downloading: 0%| | 0.00/185k [00:00<?, ?B/s]"
|
684 |
-
]
|
685 |
-
},
|
686 |
-
"metadata": {},
|
687 |
-
"output_type": "display_data"
|
688 |
-
}
|
689 |
-
],
|
690 |
"source": [
|
691 |
"from transformers import WhisperFeatureExtractor\n",
|
692 |
"\n",
|
@@ -724,92 +312,7 @@
|
|
724 |
"id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6",
|
725 |
"outputId": "5c004b44-86e7-4e00-88be-39e0af5eed69"
|
726 |
},
|
727 |
-
"outputs": [
|
728 |
-
{
|
729 |
-
"data": {
|
730 |
-
"application/vnd.jupyter.widget-view+json": {
|
731 |
-
"model_id": "3d2f8de076bd4fbb8e777da01a5bff36",
|
732 |
-
"version_major": 2,
|
733 |
-
"version_minor": 0
|
734 |
-
},
|
735 |
-
"text/plain": [
|
736 |
-
"Downloading: 0%| | 0.00/830 [00:00<?, ?B/s]"
|
737 |
-
]
|
738 |
-
},
|
739 |
-
"metadata": {},
|
740 |
-
"output_type": "display_data"
|
741 |
-
},
|
742 |
-
{
|
743 |
-
"data": {
|
744 |
-
"application/vnd.jupyter.widget-view+json": {
|
745 |
-
"model_id": "22c630ec1f564e16b4d2066702666490",
|
746 |
-
"version_major": 2,
|
747 |
-
"version_minor": 0
|
748 |
-
},
|
749 |
-
"text/plain": [
|
750 |
-
"Downloading: 0%| | 0.00/1.04M [00:00<?, ?B/s]"
|
751 |
-
]
|
752 |
-
},
|
753 |
-
"metadata": {},
|
754 |
-
"output_type": "display_data"
|
755 |
-
},
|
756 |
-
{
|
757 |
-
"data": {
|
758 |
-
"application/vnd.jupyter.widget-view+json": {
|
759 |
-
"model_id": "2f4cb16bef8e45f5b8abf176d675fe98",
|
760 |
-
"version_major": 2,
|
761 |
-
"version_minor": 0
|
762 |
-
},
|
763 |
-
"text/plain": [
|
764 |
-
"Downloading: 0%| | 0.00/494k [00:00<?, ?B/s]"
|
765 |
-
]
|
766 |
-
},
|
767 |
-
"metadata": {},
|
768 |
-
"output_type": "display_data"
|
769 |
-
},
|
770 |
-
{
|
771 |
-
"data": {
|
772 |
-
"application/vnd.jupyter.widget-view+json": {
|
773 |
-
"model_id": "5c3601ab893747c398323f80faa415d1",
|
774 |
-
"version_major": 2,
|
775 |
-
"version_minor": 0
|
776 |
-
},
|
777 |
-
"text/plain": [
|
778 |
-
"Downloading: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
|
779 |
-
]
|
780 |
-
},
|
781 |
-
"metadata": {},
|
782 |
-
"output_type": "display_data"
|
783 |
-
},
|
784 |
-
{
|
785 |
-
"data": {
|
786 |
-
"application/vnd.jupyter.widget-view+json": {
|
787 |
-
"model_id": "424550d6926c44cf85dfb8b538cbc450",
|
788 |
-
"version_major": 2,
|
789 |
-
"version_minor": 0
|
790 |
-
},
|
791 |
-
"text/plain": [
|
792 |
-
"Downloading: 0%| | 0.00/2.11k [00:00<?, ?B/s]"
|
793 |
-
]
|
794 |
-
},
|
795 |
-
"metadata": {},
|
796 |
-
"output_type": "display_data"
|
797 |
-
},
|
798 |
-
{
|
799 |
-
"data": {
|
800 |
-
"application/vnd.jupyter.widget-view+json": {
|
801 |
-
"model_id": "d263b9aaed5d4b9e941798363994658c",
|
802 |
-
"version_major": 2,
|
803 |
-
"version_minor": 0
|
804 |
-
},
|
805 |
-
"text/plain": [
|
806 |
-
"Downloading: 0%| | 0.00/2.06k [00:00<?, ?B/s]"
|
807 |
-
]
|
808 |
-
},
|
809 |
-
"metadata": {},
|
810 |
-
"output_type": "display_data"
|
811 |
-
}
|
812 |
-
],
|
813 |
"source": [
|
814 |
"from transformers import WhisperTokenizer\n",
|
815 |
"\n",
|
@@ -914,7 +417,7 @@
|
|
914 |
},
|
915 |
{
|
916 |
"cell_type": "code",
|
917 |
-
"execution_count":
|
918 |
"id": "4c79b333",
|
919 |
"metadata": {},
|
920 |
"outputs": [],
|
@@ -940,7 +443,7 @@
|
|
940 |
},
|
941 |
{
|
942 |
"cell_type": "code",
|
943 |
-
"execution_count":
|
944 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
945 |
"metadata": {},
|
946 |
"outputs": [],
|
@@ -978,23 +481,16 @@
|
|
978 |
},
|
979 |
{
|
980 |
"cell_type": "code",
|
981 |
-
"execution_count":
|
982 |
"id": "db271164",
|
983 |
"metadata": {},
|
984 |
"outputs": [
|
985 |
{
|
986 |
-
"
|
987 |
-
|
988 |
-
|
989 |
-
|
990 |
-
|
991 |
-
},
|
992 |
-
"text/plain": [
|
993 |
-
" 0%| | 0/2773 [00:00<?, ?ex/s]"
|
994 |
-
]
|
995 |
-
},
|
996 |
-
"metadata": {},
|
997 |
-
"output_type": "display_data"
|
998 |
}
|
999 |
],
|
1000 |
"source": [
|
@@ -1003,28 +499,21 @@
|
|
1003 |
},
|
1004 |
{
|
1005 |
"cell_type": "code",
|
1006 |
-
"execution_count":
|
1007 |
"id": "b459b0c5",
|
1008 |
"metadata": {},
|
1009 |
"outputs": [
|
1010 |
{
|
1011 |
-
"
|
1012 |
-
|
1013 |
-
|
1014 |
-
|
1015 |
-
|
1016 |
-
},
|
1017 |
-
"text/plain": [
|
1018 |
-
" 0%| | 0/2773 [00:00<?, ?ex/s]"
|
1019 |
-
]
|
1020 |
-
},
|
1021 |
-
"metadata": {},
|
1022 |
-
"output_type": "display_data"
|
1023 |
},
|
1024 |
{
|
1025 |
"data": {
|
1026 |
"application/vnd.jupyter.widget-view+json": {
|
1027 |
-
"model_id": "
|
1028 |
"version_major": 2,
|
1029 |
"version_minor": 0
|
1030 |
},
|
@@ -1050,7 +539,7 @@
|
|
1050 |
},
|
1051 |
{
|
1052 |
"cell_type": "code",
|
1053 |
-
"execution_count":
|
1054 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
1055 |
"metadata": {},
|
1056 |
"outputs": [],
|
@@ -1071,23 +560,16 @@
|
|
1071 |
},
|
1072 |
{
|
1073 |
"cell_type": "code",
|
1074 |
-
"execution_count":
|
1075 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
1076 |
"metadata": {},
|
1077 |
"outputs": [
|
1078 |
{
|
1079 |
-
"
|
1080 |
-
|
1081 |
-
|
1082 |
-
|
1083 |
-
|
1084 |
-
},
|
1085 |
-
"text/plain": [
|
1086 |
-
" 0%| | 0/3 [00:00<?, ?ba/s]"
|
1087 |
-
]
|
1088 |
-
},
|
1089 |
-
"metadata": {},
|
1090 |
-
"output_type": "display_data"
|
1091 |
}
|
1092 |
],
|
1093 |
"source": [
|
@@ -1167,7 +649,7 @@
|
|
1167 |
},
|
1168 |
{
|
1169 |
"cell_type": "code",
|
1170 |
-
"execution_count":
|
1171 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
|
1172 |
"metadata": {
|
1173 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
|
@@ -1219,7 +701,7 @@
|
|
1219 |
},
|
1220 |
{
|
1221 |
"cell_type": "code",
|
1222 |
-
"execution_count":
|
1223 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
|
1224 |
"metadata": {
|
1225 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
|
@@ -1252,41 +734,12 @@
|
|
1252 |
},
|
1253 |
{
|
1254 |
"cell_type": "code",
|
1255 |
-
"execution_count":
|
1256 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
1257 |
"metadata": {
|
1258 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890"
|
1259 |
},
|
1260 |
-
"outputs": [
|
1261 |
-
{
|
1262 |
-
"data": {
|
1263 |
-
"application/vnd.jupyter.widget-view+json": {
|
1264 |
-
"model_id": "cb228b59a10f45bc860a59f7f96b085b",
|
1265 |
-
"version_major": 2,
|
1266 |
-
"version_minor": 0
|
1267 |
-
},
|
1268 |
-
"text/plain": [
|
1269 |
-
"Downloading builder script: 0%| | 0.00/4.49k [00:00<?, ?B/s]"
|
1270 |
-
]
|
1271 |
-
},
|
1272 |
-
"metadata": {},
|
1273 |
-
"output_type": "display_data"
|
1274 |
-
},
|
1275 |
-
{
|
1276 |
-
"data": {
|
1277 |
-
"application/vnd.jupyter.widget-view+json": {
|
1278 |
-
"model_id": "36844d00e5864685a7913b31a3510284",
|
1279 |
-
"version_major": 2,
|
1280 |
-
"version_minor": 0
|
1281 |
-
},
|
1282 |
-
"text/plain": [
|
1283 |
-
"Downloading builder script: 0%| | 0.00/5.60k [00:00<?, ?B/s]"
|
1284 |
-
]
|
1285 |
-
},
|
1286 |
-
"metadata": {},
|
1287 |
-
"output_type": "display_data"
|
1288 |
-
}
|
1289 |
-
],
|
1290 |
"source": [
|
1291 |
"import evaluate\n",
|
1292 |
"\n",
|
@@ -1315,7 +768,7 @@
|
|
1315 |
},
|
1316 |
{
|
1317 |
"cell_type": "code",
|
1318 |
-
"execution_count":
|
1319 |
"id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52",
|
1320 |
"metadata": {
|
1321 |
"id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
|
@@ -1369,41 +822,12 @@
|
|
1369 |
},
|
1370 |
{
|
1371 |
"cell_type": "code",
|
1372 |
-
"execution_count":
|
1373 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
1374 |
"metadata": {
|
1375 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
|
1376 |
},
|
1377 |
-
"outputs": [
|
1378 |
-
{
|
1379 |
-
"data": {
|
1380 |
-
"application/vnd.jupyter.widget-view+json": {
|
1381 |
-
"model_id": "9c0d9c343202473290cce99917b65ea0",
|
1382 |
-
"version_major": 2,
|
1383 |
-
"version_minor": 0
|
1384 |
-
},
|
1385 |
-
"text/plain": [
|
1386 |
-
"Downloading: 0%| | 0.00/1.97k [00:00<?, ?B/s]"
|
1387 |
-
]
|
1388 |
-
},
|
1389 |
-
"metadata": {},
|
1390 |
-
"output_type": "display_data"
|
1391 |
-
},
|
1392 |
-
{
|
1393 |
-
"data": {
|
1394 |
-
"application/vnd.jupyter.widget-view+json": {
|
1395 |
-
"model_id": "e0d6c82f82ed4aa292d1fbee51ebb865",
|
1396 |
-
"version_major": 2,
|
1397 |
-
"version_minor": 0
|
1398 |
-
},
|
1399 |
-
"text/plain": [
|
1400 |
-
"Downloading: 0%| | 0.00/3.06G [00:00<?, ?B/s]"
|
1401 |
-
]
|
1402 |
-
},
|
1403 |
-
"metadata": {},
|
1404 |
-
"output_type": "display_data"
|
1405 |
-
}
|
1406 |
-
],
|
1407 |
"source": [
|
1408 |
"from transformers import WhisperForConditionalGeneration\n",
|
1409 |
"\n",
|
@@ -1422,7 +846,7 @@
|
|
1422 |
},
|
1423 |
{
|
1424 |
"cell_type": "code",
|
1425 |
-
"execution_count":
|
1426 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
|
1427 |
"metadata": {
|
1428 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
|
@@ -1456,7 +880,7 @@
|
|
1456 |
},
|
1457 |
{
|
1458 |
"cell_type": "code",
|
1459 |
-
"execution_count":
|
1460 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
1461 |
"metadata": {
|
1462 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
|
@@ -1469,17 +893,17 @@
|
|
1469 |
" output_dir=\"./\",\n",
|
1470 |
" per_device_train_batch_size=32,\n",
|
1471 |
" gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
|
1472 |
-
" learning_rate=
|
1473 |
-
" warmup_steps=
|
1474 |
-
" max_steps=
|
1475 |
" gradient_checkpointing=True,\n",
|
1476 |
" fp16=True,\n",
|
1477 |
" evaluation_strategy=\"steps\",\n",
|
1478 |
" per_device_eval_batch_size=16,\n",
|
1479 |
" predict_with_generate=True,\n",
|
1480 |
" generation_max_length=225,\n",
|
1481 |
-
" save_steps=
|
1482 |
-
" eval_steps=
|
1483 |
" logging_steps=25,\n",
|
1484 |
" report_to=[\"tensorboard\"],\n",
|
1485 |
" load_best_model_at_end=True,\n",
|
@@ -1513,7 +937,7 @@
|
|
1513 |
},
|
1514 |
{
|
1515 |
"cell_type": "code",
|
1516 |
-
"execution_count":
|
1517 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
1518 |
"metadata": {
|
1519 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493"
|
@@ -1555,7 +979,7 @@
|
|
1555 |
},
|
1556 |
{
|
1557 |
"cell_type": "code",
|
1558 |
-
"execution_count":
|
1559 |
"id": "-2zQwMfEOBJq",
|
1560 |
"metadata": {
|
1561 |
"id": "-2zQwMfEOBJq"
|
@@ -1608,7 +1032,7 @@
|
|
1608 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1609 |
"metadata": {
|
1610 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1611 |
-
"scrolled":
|
1612 |
},
|
1613 |
"outputs": [
|
1614 |
{
|
@@ -1620,11 +1044,11 @@
|
|
1620 |
" warnings.warn(\n",
|
1621 |
"***** Running training *****\n",
|
1622 |
" Num examples = 2773\n",
|
1623 |
-
" Num Epochs =
|
1624 |
" Instantaneous batch size per device = 32\n",
|
1625 |
" Total train batch size (w. parallel, distributed & accumulation) = 32\n",
|
1626 |
" Gradient Accumulation steps = 1\n",
|
1627 |
-
" Total optimization steps =
|
1628 |
" Number of trainable parameters = 763857920\n"
|
1629 |
]
|
1630 |
},
|
@@ -1634,8 +1058,8 @@
|
|
1634 |
"\n",
|
1635 |
" <div>\n",
|
1636 |
" \n",
|
1637 |
-
" <progress value='
|
1638 |
-
" [
|
1639 |
" </div>\n",
|
1640 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1641 |
" <thead>\n",
|
@@ -1643,17 +1067,20 @@
|
|
1643 |
" <th>Step</th>\n",
|
1644 |
" <th>Training Loss</th>\n",
|
1645 |
" <th>Validation Loss</th>\n",
|
|
|
|
|
1646 |
" </tr>\n",
|
1647 |
" </thead>\n",
|
1648 |
" <tbody>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1649 |
" </tbody>\n",
|
1650 |
-
"</table><p
|
1651 |
-
" <div>\n",
|
1652 |
-
" \n",
|
1653 |
-
" <progress value='77' max='78' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1654 |
-
" [77/78 10:16 < 00:08, 0.12 it/s]\n",
|
1655 |
-
" </div>\n",
|
1656 |
-
" "
|
1657 |
],
|
1658 |
"text/plain": [
|
1659 |
"<IPython.core.display.HTML object>"
|
@@ -2853,7 +2280,18 @@
|
|
2853 |
" \"transformers_version\": \"4.26.0.dev0\",\n",
|
2854 |
" \"use_cache\": false\n",
|
2855 |
"}\n",
|
2856 |
-
"\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2857 |
]
|
2858 |
}
|
2859 |
],
|
@@ -2920,14 +2358,8 @@
|
|
2920 |
"metadata": {},
|
2921 |
"outputs": [],
|
2922 |
"source": [
|
2923 |
-
"fleurs_results = trainer.evaluate(fleurs['test'])\n",
|
2924 |
-
"print(fleurs_results)\n",
|
2925 |
-
"\n",
|
2926 |
"cv_results = trainer.evaluate(cv['test'])\n",
|
2927 |
-
"print(cv_results)
|
2928 |
-
"\n",
|
2929 |
-
"lbv_results = trainer.evaluate(lbv['test'])\n",
|
2930 |
-
"print(lbv_results)"
|
2931 |
]
|
2932 |
},
|
2933 |
{
|
@@ -2937,34 +2369,6 @@
|
|
2937 |
"metadata": {},
|
2938 |
"outputs": [],
|
2939 |
"source": [
|
2940 |
-
"evaluate.push_to_hub(\n",
|
2941 |
-
" model_id='Scrya/whisper-medium-id',\n",
|
2942 |
-
" metric_value=round(fleurs_results['eval_wer'], 2),\n",
|
2943 |
-
" metric_type=\"wer\",\n",
|
2944 |
-
" metric_name=\"WER\",\n",
|
2945 |
-
" dataset_name='google/fleurs',\n",
|
2946 |
-
" dataset_type='google/fleurs',\n",
|
2947 |
-
" dataset_split='test',\n",
|
2948 |
-
" dataset_config='id_id',\n",
|
2949 |
-
" task_type=\"automatic-speech-recognition\",\n",
|
2950 |
-
" task_name=\"Automatic Speech Recognition\",\n",
|
2951 |
-
" overwrite=True\n",
|
2952 |
-
" )\n",
|
2953 |
-
"\n",
|
2954 |
-
"evaluate.push_to_hub(\n",
|
2955 |
-
" model_id='Scrya/whisper-medium-id',\n",
|
2956 |
-
" metric_value=round(fleurs_results['eval_cer'], 2),\n",
|
2957 |
-
" metric_type=\"cer\",\n",
|
2958 |
-
" metric_name=\"CER\",\n",
|
2959 |
-
" dataset_name='google/fleurs',\n",
|
2960 |
-
" dataset_type='google/fleurs',\n",
|
2961 |
-
" dataset_split='test',\n",
|
2962 |
-
" dataset_config='id_id',\n",
|
2963 |
-
" task_type=\"automatic-speech-recognition\",\n",
|
2964 |
-
" task_name=\"Automatic Speech Recognition\",\n",
|
2965 |
-
" overwrite=True\n",
|
2966 |
-
" )\n",
|
2967 |
-
"\n",
|
2968 |
"evaluate.push_to_hub(\n",
|
2969 |
" model_id='Scrya/whisper-medium-id',\n",
|
2970 |
" metric_value=round(cv_results['eval_wer'], 2),\n",
|
@@ -2973,7 +2377,7 @@
|
|
2973 |
" dataset_name='mozilla-foundation/common_voice_11_0',\n",
|
2974 |
" dataset_type='mozilla-foundation/common_voice_11_0',\n",
|
2975 |
" dataset_split='test',\n",
|
2976 |
-
" dataset_config='
|
2977 |
" task_type=\"automatic-speech-recognition\",\n",
|
2978 |
" task_name=\"Automatic Speech Recognition\",\n",
|
2979 |
" overwrite=True\n",
|
@@ -2987,35 +2391,7 @@
|
|
2987 |
" dataset_name='mozilla-foundation/common_voice_11_0',\n",
|
2988 |
" dataset_type='mozilla-foundation/common_voice_11_0',\n",
|
2989 |
" dataset_split='test',\n",
|
2990 |
-
" dataset_config='
|
2991 |
-
" task_type=\"automatic-speech-recognition\",\n",
|
2992 |
-
" task_name=\"Automatic Speech Recognition\",\n",
|
2993 |
-
" overwrite=True\n",
|
2994 |
-
" )\n",
|
2995 |
-
"\n",
|
2996 |
-
"evaluate.push_to_hub(\n",
|
2997 |
-
" model_id='Scrya/whisper-medium-id',\n",
|
2998 |
-
" metric_value=round(lbv_results['eval_wer'], 2),\n",
|
2999 |
-
" metric_type=\"wer\",\n",
|
3000 |
-
" metric_name=\"WER\",\n",
|
3001 |
-
" dataset_name='indonesian-nlp/librivox-indonesia',\n",
|
3002 |
-
" dataset_type='indonesian-nlp/librivox-indonesia',\n",
|
3003 |
-
" dataset_split='test',\n",
|
3004 |
-
" dataset_config='ind',\n",
|
3005 |
-
" task_type=\"automatic-speech-recognition\",\n",
|
3006 |
-
" task_name=\"Automatic Speech Recognition\",\n",
|
3007 |
-
" overwrite=True\n",
|
3008 |
-
" )\n",
|
3009 |
-
"\n",
|
3010 |
-
"evaluate.push_to_hub(\n",
|
3011 |
-
" model_id='Scrya/whisper-medium-id',\n",
|
3012 |
-
" metric_value=round(lbv_results['eval_cer'], 2),\n",
|
3013 |
-
" metric_type=\"cer\",\n",
|
3014 |
-
" metric_name=\"CER\",\n",
|
3015 |
-
" dataset_name='indonesian-nlp/librivox-indonesia',\n",
|
3016 |
-
" dataset_type='indonesian-nlp/librivox-indonesia',\n",
|
3017 |
-
" dataset_split='test',\n",
|
3018 |
-
" dataset_config='ind',\n",
|
3019 |
" task_type=\"automatic-speech-recognition\",\n",
|
3020 |
" task_name=\"Automatic Speech Recognition\",\n",
|
3021 |
" overwrite=True\n",
|
|
|
155 |
"id": "d087b451",
|
156 |
"metadata": {},
|
157 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
{
|
159 |
"name": "stderr",
|
160 |
"output_type": "stream",
|
161 |
"text": [
|
162 |
+
"Found cached dataset common_voice_11_0 (/home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)\n",
|
163 |
"Found cached dataset common_voice_11_0 (/home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)\n"
|
164 |
]
|
165 |
},
|
|
|
270 |
{
|
271 |
"cell_type": "code",
|
272 |
"execution_count": 3,
|
273 |
+
"id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5",
|
274 |
+
"metadata": {
|
275 |
+
"id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5"
|
276 |
+
},
|
277 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
"source": [
|
279 |
"from transformers import WhisperFeatureExtractor\n",
|
280 |
"\n",
|
|
|
312 |
"id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6",
|
313 |
"outputId": "5c004b44-86e7-4e00-88be-39e0af5eed69"
|
314 |
},
|
315 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
"source": [
|
317 |
"from transformers import WhisperTokenizer\n",
|
318 |
"\n",
|
|
|
417 |
},
|
418 |
{
|
419 |
"cell_type": "code",
|
420 |
+
"execution_count": 8,
|
421 |
"id": "4c79b333",
|
422 |
"metadata": {},
|
423 |
"outputs": [],
|
|
|
443 |
},
|
444 |
{
|
445 |
"cell_type": "code",
|
446 |
+
"execution_count": 9,
|
447 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
448 |
"metadata": {},
|
449 |
"outputs": [],
|
|
|
481 |
},
|
482 |
{
|
483 |
"cell_type": "code",
|
484 |
+
"execution_count": 10,
|
485 |
"id": "db271164",
|
486 |
"metadata": {},
|
487 |
"outputs": [
|
488 |
{
|
489 |
+
"name": "stderr",
|
490 |
+
"output_type": "stream",
|
491 |
+
"text": [
|
492 |
+
"Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-43c2297beff94678.arrow\n"
|
493 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
}
|
495 |
],
|
496 |
"source": [
|
|
|
499 |
},
|
500 |
{
|
501 |
"cell_type": "code",
|
502 |
+
"execution_count": 11,
|
503 |
"id": "b459b0c5",
|
504 |
"metadata": {},
|
505 |
"outputs": [
|
506 |
{
|
507 |
+
"name": "stderr",
|
508 |
+
"output_type": "stream",
|
509 |
+
"text": [
|
510 |
+
"Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-4ea982ccf5dc2951.arrow\n"
|
511 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
},
|
513 |
{
|
514 |
"data": {
|
515 |
"application/vnd.jupyter.widget-view+json": {
|
516 |
+
"model_id": "01fdf7e3e86f4080b11126a6ec744788",
|
517 |
"version_major": 2,
|
518 |
"version_minor": 0
|
519 |
},
|
|
|
539 |
},
|
540 |
{
|
541 |
"cell_type": "code",
|
542 |
+
"execution_count": 12,
|
543 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
544 |
"metadata": {},
|
545 |
"outputs": [],
|
|
|
560 |
},
|
561 |
{
|
562 |
"cell_type": "code",
|
563 |
+
"execution_count": 13,
|
564 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
565 |
"metadata": {},
|
566 |
"outputs": [
|
567 |
{
|
568 |
+
"name": "stderr",
|
569 |
+
"output_type": "stream",
|
570 |
+
"text": [
|
571 |
+
"Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/vi/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-93491e8b5f5bd854.arrow\n"
|
572 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
573 |
}
|
574 |
],
|
575 |
"source": [
|
|
|
649 |
},
|
650 |
{
|
651 |
"cell_type": "code",
|
652 |
+
"execution_count": 14,
|
653 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
|
654 |
"metadata": {
|
655 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
|
|
|
701 |
},
|
702 |
{
|
703 |
"cell_type": "code",
|
704 |
+
"execution_count": 15,
|
705 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
|
706 |
"metadata": {
|
707 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
|
|
|
734 |
},
|
735 |
{
|
736 |
"cell_type": "code",
|
737 |
+
"execution_count": 16,
|
738 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
739 |
"metadata": {
|
740 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890"
|
741 |
},
|
742 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
743 |
"source": [
|
744 |
"import evaluate\n",
|
745 |
"\n",
|
|
|
768 |
},
|
769 |
{
|
770 |
"cell_type": "code",
|
771 |
+
"execution_count": 17,
|
772 |
"id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52",
|
773 |
"metadata": {
|
774 |
"id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
|
|
|
822 |
},
|
823 |
{
|
824 |
"cell_type": "code",
|
825 |
+
"execution_count": 18,
|
826 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
827 |
"metadata": {
|
828 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
|
829 |
},
|
830 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
831 |
"source": [
|
832 |
"from transformers import WhisperForConditionalGeneration\n",
|
833 |
"\n",
|
|
|
846 |
},
|
847 |
{
|
848 |
"cell_type": "code",
|
849 |
+
"execution_count": 19,
|
850 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
|
851 |
"metadata": {
|
852 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
|
|
|
880 |
},
|
881 |
{
|
882 |
"cell_type": "code",
|
883 |
+
"execution_count": 20,
|
884 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
885 |
"metadata": {
|
886 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
|
|
|
893 |
" output_dir=\"./\",\n",
|
894 |
" per_device_train_batch_size=32,\n",
|
895 |
" gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
|
896 |
+
" learning_rate=1e-5,\n",
|
897 |
+
" warmup_steps=500,\n",
|
898 |
+
" max_steps=5000,\n",
|
899 |
" gradient_checkpointing=True,\n",
|
900 |
" fp16=True,\n",
|
901 |
" evaluation_strategy=\"steps\",\n",
|
902 |
" per_device_eval_batch_size=16,\n",
|
903 |
" predict_with_generate=True,\n",
|
904 |
" generation_max_length=225,\n",
|
905 |
+
" save_steps=1000,\n",
|
906 |
+
" eval_steps=1000,\n",
|
907 |
" logging_steps=25,\n",
|
908 |
" report_to=[\"tensorboard\"],\n",
|
909 |
" load_best_model_at_end=True,\n",
|
|
|
937 |
},
|
938 |
{
|
939 |
"cell_type": "code",
|
940 |
+
"execution_count": 21,
|
941 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
942 |
"metadata": {
|
943 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493"
|
|
|
979 |
},
|
980 |
{
|
981 |
"cell_type": "code",
|
982 |
+
"execution_count": 22,
|
983 |
"id": "-2zQwMfEOBJq",
|
984 |
"metadata": {
|
985 |
"id": "-2zQwMfEOBJq"
|
|
|
1032 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1033 |
"metadata": {
|
1034 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1035 |
+
"scrolled": false
|
1036 |
},
|
1037 |
"outputs": [
|
1038 |
{
|
|
|
1044 |
" warnings.warn(\n",
|
1045 |
"***** Running training *****\n",
|
1046 |
" Num examples = 2773\n",
|
1047 |
+
" Num Epochs = 58\n",
|
1048 |
" Instantaneous batch size per device = 32\n",
|
1049 |
" Total train batch size (w. parallel, distributed & accumulation) = 32\n",
|
1050 |
" Gradient Accumulation steps = 1\n",
|
1051 |
+
" Total optimization steps = 5000\n",
|
1052 |
" Number of trainable parameters = 763857920\n"
|
1053 |
]
|
1054 |
},
|
|
|
1058 |
"\n",
|
1059 |
" <div>\n",
|
1060 |
" \n",
|
1061 |
+
" <progress value='1001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1062 |
+
" [1001/5000 1:06:20 < 4:25:35, 0.25 it/s, Epoch 11.49/58]\n",
|
1063 |
" </div>\n",
|
1064 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1065 |
" <thead>\n",
|
|
|
1067 |
" <th>Step</th>\n",
|
1068 |
" <th>Training Loss</th>\n",
|
1069 |
" <th>Validation Loss</th>\n",
|
1070 |
+
" <th>Wer</th>\n",
|
1071 |
+
" <th>Cer</th>\n",
|
1072 |
" </tr>\n",
|
1073 |
" </thead>\n",
|
1074 |
" <tbody>\n",
|
1075 |
+
" <tr>\n",
|
1076 |
+
" <td>1000</td>\n",
|
1077 |
+
" <td>0.005300</td>\n",
|
1078 |
+
" <td>0.542897</td>\n",
|
1079 |
+
" <td>18.128976</td>\n",
|
1080 |
+
" <td>8.464310</td>\n",
|
1081 |
+
" </tr>\n",
|
1082 |
" </tbody>\n",
|
1083 |
+
"</table><p>"
|
|
|
|
|
|
|
|
|
|
|
|
|
1084 |
],
|
1085 |
"text/plain": [
|
1086 |
"<IPython.core.display.HTML object>"
|
|
|
2280 |
" \"transformers_version\": \"4.26.0.dev0\",\n",
|
2281 |
" \"use_cache\": false\n",
|
2282 |
"}\n",
|
2283 |
+
"\n",
|
2284 |
+
"Saving model checkpoint to ./checkpoint-1000\n",
|
2285 |
+
"Configuration saved in ./checkpoint-1000/config.json\n",
|
2286 |
+
"Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
|
2287 |
+
"Feature extractor saved in ./checkpoint-1000/preprocessor_config.json\n"
|
2288 |
+
]
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"name": "stderr",
|
2292 |
+
"output_type": "stream",
|
2293 |
+
"text": [
|
2294 |
+
"Feature extractor saved in ./preprocessor_config.json\n"
|
2295 |
]
|
2296 |
}
|
2297 |
],
|
|
|
2358 |
"metadata": {},
|
2359 |
"outputs": [],
|
2360 |
"source": [
|
|
|
|
|
|
|
2361 |
"cv_results = trainer.evaluate(cv['test'])\n",
|
2362 |
+
"print(cv_results)"
|
|
|
|
|
|
|
2363 |
]
|
2364 |
},
|
2365 |
{
|
|
|
2369 |
"metadata": {},
|
2370 |
"outputs": [],
|
2371 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2372 |
"evaluate.push_to_hub(\n",
|
2373 |
" model_id='Scrya/whisper-medium-id',\n",
|
2374 |
" metric_value=round(cv_results['eval_wer'], 2),\n",
|
|
|
2377 |
" dataset_name='mozilla-foundation/common_voice_11_0',\n",
|
2378 |
" dataset_type='mozilla-foundation/common_voice_11_0',\n",
|
2379 |
" dataset_split='test',\n",
|
2380 |
+
" dataset_config='vi',\n",
|
2381 |
" task_type=\"automatic-speech-recognition\",\n",
|
2382 |
" task_name=\"Automatic Speech Recognition\",\n",
|
2383 |
" overwrite=True\n",
|
|
|
2391 |
" dataset_name='mozilla-foundation/common_voice_11_0',\n",
|
2392 |
" dataset_type='mozilla-foundation/common_voice_11_0',\n",
|
2393 |
" dataset_split='test',\n",
|
2394 |
+
" dataset_config='vi',\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2395 |
" task_type=\"automatic-speech-recognition\",\n",
|
2396 |
" task_name=\"Automatic Speech Recognition\",\n",
|
2397 |
" overwrite=True\n",
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055754841
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04a4c5aa4fd868f15c45ea43819bf1a6d4e1cd9a54087bf4c7e3542040491896
|
3 |
size 3055754841
|
runs/Dec18_12-13-24_150-136-94-223/events.out.tfevents.1671365617.150-136-94-223.67469.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82e79925d6935ad64abeab6957af5c2421302c5eaabde093f13d112431abb2e2
|
3 |
+
size 9034
|
runs/Dec18_13-16-20_150-136-94-223/1671369384.9831293/events.out.tfevents.1671369384.150-136-94-223.188704.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30bb44e17063d501a2c481a6b2bc2d9942bccf3ec23df2ef39eeae3f60d9ec4e
|
3 |
+
size 5864
|
runs/Dec18_13-16-20_150-136-94-223/events.out.tfevents.1671369384.150-136-94-223.188704.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91d5c1e008fca368c38df82e9c94653dc1219462e0d29b6e634a51da2fe160f3
|
3 |
+
size 10920
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3579
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f973458f69771b6165ab9524285a6bd70a0e5439fe024937cb09b41b37c8902a
|
3 |
size 3579
|