GGarri commited on
Commit
c2d3ed2
1 Parent(s): 56665f4

Training in progress, step 1000

Browse files
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bb067030f872fb2b70e06a73454240f478fe01ec627e1b1bfdc3d253c2fcd57
3
  size 967102729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c953e09032d3a83a5cc7c9ef098d1e59f4182bae3fcfc2975fc69db9c359c7b
3
  size 967102729
test_whisper_finetuned.ipynb CHANGED
@@ -2,51 +2,9 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 6,
6
  "metadata": {},
7
  "outputs": [
8
- {
9
- "data": {
10
- "application/vnd.jupyter.widget-view+json": {
11
- "model_id": "f7537db8beff4744b3555f4d360cb867",
12
- "version_major": 2,
13
- "version_minor": 0
14
- },
15
- "text/plain": [
16
- "Downloading data: 0%| | 0.00/10.3G [00:00<?, ?B/s]"
17
- ]
18
- },
19
- "metadata": {},
20
- "output_type": "display_data"
21
- },
22
- {
23
- "data": {
24
- "application/vnd.jupyter.widget-view+json": {
25
- "model_id": "0f469fd6738a484bafc3a42ff32bf21e",
26
- "version_major": 2,
27
- "version_minor": 0
28
- },
29
- "text/plain": [
30
- "Generating train split: 0 examples [00:00, ? examples/s]"
31
- ]
32
- },
33
- "metadata": {},
34
- "output_type": "display_data"
35
- },
36
- {
37
- "data": {
38
- "application/vnd.jupyter.widget-view+json": {
39
- "model_id": "ec98f490c01240eb8f89d294a2c59dde",
40
- "version_major": 2,
41
- "version_minor": 0
42
- },
43
- "text/plain": [
44
- "Generating test split: 0 examples [00:00, ? examples/s]"
45
- ]
46
- },
47
- "metadata": {},
48
- "output_type": "display_data"
49
- },
50
  {
51
  "name": "stdout",
52
  "output_type": "stream",
@@ -77,7 +35,7 @@
77
  },
78
  {
79
  "cell_type": "code",
80
- "execution_count": 7,
81
  "metadata": {},
82
  "outputs": [
83
  {
@@ -107,24 +65,9 @@
107
  },
108
  {
109
  "cell_type": "code",
110
- "execution_count": 8,
111
  "metadata": {},
112
- "outputs": [
113
- {
114
- "data": {
115
- "application/vnd.jupyter.widget-view+json": {
116
- "model_id": "909a04095ce84a70bb1cf0b693bfa843",
117
- "version_major": 2,
118
- "version_minor": 0
119
- },
120
- "text/plain": [
121
- "Downloading (…)rocessor_config.json: 0%| | 0.00/185k [00:00<?, ?B/s]"
122
- ]
123
- },
124
- "metadata": {},
125
- "output_type": "display_data"
126
- }
127
- ],
128
  "source": [
129
  "from transformers import WhisperFeatureExtractor\n",
130
  "\n",
@@ -133,107 +76,9 @@
133
  },
134
  {
135
  "cell_type": "code",
136
- "execution_count": 9,
137
  "metadata": {},
138
  "outputs": [
139
- {
140
- "data": {
141
- "application/vnd.jupyter.widget-view+json": {
142
- "model_id": "ac6c9f270ae44be7a07f3a2b2518a090",
143
- "version_major": 2,
144
- "version_minor": 0
145
- },
146
- "text/plain": [
147
- "Downloading (…)okenizer_config.json: 0%| | 0.00/805 [00:00<?, ?B/s]"
148
- ]
149
- },
150
- "metadata": {},
151
- "output_type": "display_data"
152
- },
153
- {
154
- "data": {
155
- "application/vnd.jupyter.widget-view+json": {
156
- "model_id": "c5ffc60826bb4582aef22999cd582ec1",
157
- "version_major": 2,
158
- "version_minor": 0
159
- },
160
- "text/plain": [
161
- "Downloading (…)olve/main/vocab.json: 0%| | 0.00/836k [00:00<?, ?B/s]"
162
- ]
163
- },
164
- "metadata": {},
165
- "output_type": "display_data"
166
- },
167
- {
168
- "data": {
169
- "application/vnd.jupyter.widget-view+json": {
170
- "model_id": "9aeafe7afc0241e296b4393281369665",
171
- "version_major": 2,
172
- "version_minor": 0
173
- },
174
- "text/plain": [
175
- "Downloading (…)/main/tokenizer.json: 0%| | 0.00/2.48M [00:00<?, ?B/s]"
176
- ]
177
- },
178
- "metadata": {},
179
- "output_type": "display_data"
180
- },
181
- {
182
- "data": {
183
- "application/vnd.jupyter.widget-view+json": {
184
- "model_id": "91ca2e2c0039401a95603600ccff5f1f",
185
- "version_major": 2,
186
- "version_minor": 0
187
- },
188
- "text/plain": [
189
- "Downloading (…)olve/main/merges.txt: 0%| | 0.00/494k [00:00<?, ?B/s]"
190
- ]
191
- },
192
- "metadata": {},
193
- "output_type": "display_data"
194
- },
195
- {
196
- "data": {
197
- "application/vnd.jupyter.widget-view+json": {
198
- "model_id": "67536c658ac64b96a09abf3f8e558533",
199
- "version_major": 2,
200
- "version_minor": 0
201
- },
202
- "text/plain": [
203
- "Downloading (…)main/normalizer.json: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
204
- ]
205
- },
206
- "metadata": {},
207
- "output_type": "display_data"
208
- },
209
- {
210
- "data": {
211
- "application/vnd.jupyter.widget-view+json": {
212
- "model_id": "07d64b8f7c964df8b138f638629d84b7",
213
- "version_major": 2,
214
- "version_minor": 0
215
- },
216
- "text/plain": [
217
- "Downloading (…)in/added_tokens.json: 0%| | 0.00/34.6k [00:00<?, ?B/s]"
218
- ]
219
- },
220
- "metadata": {},
221
- "output_type": "display_data"
222
- },
223
- {
224
- "data": {
225
- "application/vnd.jupyter.widget-view+json": {
226
- "model_id": "c338b9a3ecb84de1b397a21489da4a2f",
227
- "version_major": 2,
228
- "version_minor": 0
229
- },
230
- "text/plain": [
231
- "Downloading (…)cial_tokens_map.json: 0%| | 0.00/2.08k [00:00<?, ?B/s]"
232
- ]
233
- },
234
- "metadata": {},
235
- "output_type": "display_data"
236
- },
237
  {
238
  "name": "stderr",
239
  "output_type": "stream",
@@ -250,7 +95,7 @@
250
  },
251
  {
252
  "cell_type": "code",
253
- "execution_count": 10,
254
  "metadata": {},
255
  "outputs": [
256
  {
@@ -269,7 +114,7 @@
269
  },
270
  {
271
  "cell_type": "code",
272
- "execution_count": 12,
273
  "metadata": {},
274
  "outputs": [
275
  {
@@ -287,7 +132,7 @@
287
  },
288
  {
289
  "cell_type": "code",
290
- "execution_count": 13,
291
  "metadata": {},
292
  "outputs": [],
293
  "source": [
@@ -299,7 +144,7 @@
299
  },
300
  {
301
  "cell_type": "code",
302
- "execution_count": 14,
303
  "metadata": {},
304
  "outputs": [
305
  {
@@ -317,7 +162,7 @@
317
  },
318
  {
319
  "cell_type": "code",
320
- "execution_count": 15,
321
  "metadata": {},
322
  "outputs": [],
323
  "source": [
@@ -331,7 +176,7 @@
331
  },
332
  {
333
  "cell_type": "code",
334
- "execution_count": 16,
335
  "metadata": {},
336
  "outputs": [],
337
  "source": [
@@ -358,13 +203,13 @@
358
  },
359
  {
360
  "cell_type": "code",
361
- "execution_count": 17,
362
  "metadata": {},
363
  "outputs": [
364
  {
365
  "data": {
366
  "application/vnd.jupyter.widget-view+json": {
367
- "model_id": "a65c6a2e3f5d4d56bec901dbf14f2921",
368
  "version_major": 2,
369
  "version_minor": 0
370
  },
@@ -378,7 +223,7 @@
378
  {
379
  "data": {
380
  "application/vnd.jupyter.widget-view+json": {
381
- "model_id": "fe2de34ac35a415e8fb7661b41852c69",
382
  "version_major": 2,
383
  "version_minor": 0
384
  },
@@ -396,7 +241,7 @@
396
  },
397
  {
398
  "cell_type": "code",
399
- "execution_count": 18,
400
  "metadata": {},
401
  "outputs": [],
402
  "source": [
@@ -409,13 +254,13 @@
409
  },
410
  {
411
  "cell_type": "code",
412
- "execution_count": 19,
413
  "metadata": {},
414
  "outputs": [
415
  {
416
  "data": {
417
  "application/vnd.jupyter.widget-view+json": {
418
- "model_id": "65b061edac6c4ef58f81cb8edec172e0",
419
  "version_major": 2,
420
  "version_minor": 0
421
  },
@@ -436,7 +281,7 @@
436
  },
437
  {
438
  "cell_type": "code",
439
- "execution_count": 20,
440
  "metadata": {},
441
  "outputs": [],
442
  "source": [
@@ -475,7 +320,7 @@
475
  },
476
  {
477
  "cell_type": "code",
478
- "execution_count": 21,
479
  "metadata": {},
480
  "outputs": [],
481
  "source": [
@@ -484,7 +329,7 @@
484
  },
485
  {
486
  "cell_type": "code",
487
- "execution_count": 25,
488
  "metadata": {},
489
  "outputs": [],
490
  "source": [
@@ -495,24 +340,9 @@
495
  },
496
  {
497
  "cell_type": "code",
498
- "execution_count": 26,
499
  "metadata": {},
500
- "outputs": [
501
- {
502
- "data": {
503
- "application/vnd.jupyter.widget-view+json": {
504
- "model_id": "a57f57bdfa4e4f1ba64458a6e381ebc1",
505
- "version_major": 2,
506
- "version_minor": 0
507
- },
508
- "text/plain": [
509
- "Downloading builder script: 0%| | 0.00/5.60k [00:00<?, ?B/s]"
510
- ]
511
- },
512
- "metadata": {},
513
- "output_type": "display_data"
514
- }
515
- ],
516
  "source": [
517
  "from evaluate import load\n",
518
  "cer_score = evaluate.load(\"cer\")"
@@ -550,7 +380,7 @@
550
  },
551
  {
552
  "cell_type": "code",
553
- "execution_count": 27,
554
  "metadata": {},
555
  "outputs": [],
556
  "source": [
@@ -580,52 +410,9 @@
580
  },
581
  {
582
  "cell_type": "code",
583
- "execution_count": 28,
584
  "metadata": {},
585
- "outputs": [
586
- {
587
- "data": {
588
- "application/vnd.jupyter.widget-view+json": {
589
- "model_id": "34f215d7229e419ba31b6db84d40dd4e",
590
- "version_major": 2,
591
- "version_minor": 0
592
- },
593
- "text/plain": [
594
- "Downloading (…)lve/main/config.json: 0%| | 0.00/1.97k [00:00<?, ?B/s]"
595
- ]
596
- },
597
- "metadata": {},
598
- "output_type": "display_data"
599
- },
600
- {
601
- "data": {
602
- "application/vnd.jupyter.widget-view+json": {
603
- "model_id": "9a10f9361120449ba5cec6baaf0064b3",
604
- "version_major": 2,
605
- "version_minor": 0
606
- },
607
- "text/plain": [
608
- "Downloading model.safetensors: 0%| | 0.00/967M [00:00<?, ?B/s]"
609
- ]
610
- },
611
- "metadata": {},
612
- "output_type": "display_data"
613
- },
614
- {
615
- "data": {
616
- "application/vnd.jupyter.widget-view+json": {
617
- "model_id": "37cb5f6cd0ec4c6c878d37afea648468",
618
- "version_major": 2,
619
- "version_minor": 0
620
- },
621
- "text/plain": [
622
- "Downloading (…)neration_config.json: 0%| | 0.00/3.84k [00:00<?, ?B/s]"
623
- ]
624
- },
625
- "metadata": {},
626
- "output_type": "display_data"
627
- }
628
- ],
629
  "source": [
630
  "from transformers import WhisperForConditionalGeneration\n",
631
  "\n",
@@ -634,7 +421,7 @@
634
  },
635
  {
636
  "cell_type": "code",
637
- "execution_count": 29,
638
  "metadata": {},
639
  "outputs": [],
640
  "source": [
@@ -645,7 +432,7 @@
645
  },
646
  {
647
  "cell_type": "code",
648
- "execution_count": 30,
649
  "metadata": {},
650
  "outputs": [],
651
  "source": [
@@ -677,7 +464,7 @@
677
  },
678
  {
679
  "cell_type": "code",
680
- "execution_count": 32,
681
  "metadata": {},
682
  "outputs": [],
683
  "source": [
@@ -696,7 +483,7 @@
696
  },
697
  {
698
  "cell_type": "code",
699
- "execution_count": 33,
700
  "metadata": {},
701
  "outputs": [],
702
  "source": [
@@ -705,41 +492,42 @@
705
  },
706
  {
707
  "cell_type": "code",
708
- "execution_count": 34,
709
  "metadata": {},
710
  "outputs": [
711
  {
712
  "data": {
713
  "application/vnd.jupyter.widget-view+json": {
714
- "model_id": "0303d20e3e2442dab77e2db5cc32f0bf",
715
  "version_major": 2,
716
  "version_minor": 0
717
  },
718
  "text/plain": [
719
- " 0%| | 0/5000 [00:00<?, ?it/s]"
720
  ]
721
  },
722
  "metadata": {},
723
  "output_type": "display_data"
724
  },
725
  {
726
- "ename": "OutOfMemoryError",
727
- "evalue": "CUDA out of memory. Tried to allocate 6.44 GiB (GPU 0; 23.64 GiB total capacity; 16.89 GiB already allocated; 211.81 MiB free; 22.32 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF",
728
- "output_type": "error",
729
- "traceback": [
730
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
731
- "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)",
732
- "\u001b[1;32m/home/kwon/kwon/01_Coding/hmi/test_whisper_finetuned.ipynb 25\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/kwon/kwon/01_Coding/hmi/test_whisper_finetuned.ipynb#X31sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m trainer\u001b[39m.\u001b[39;49mtrain()\n",
733
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/transformers/trainer.py:1582\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1580\u001b[0m \u001b[39m# Disable progress bars when uploading models during checkpoints to avoid polluting stdout\u001b[39;00m\n\u001b[1;32m 1581\u001b[0m hf_hub_utils\u001b[39m.\u001b[39mdisable_progress_bars()\n\u001b[0;32m-> 1582\u001b[0m \u001b[39mreturn\u001b[39;00m inner_training_loop(\n\u001b[1;32m 1583\u001b[0m args\u001b[39m=\u001b[39;49margs,\n\u001b[1;32m 1584\u001b[0m resume_from_checkpoint\u001b[39m=\u001b[39;49mresume_from_checkpoint,\n\u001b[1;32m 1585\u001b[0m trial\u001b[39m=\u001b[39;49mtrial,\n\u001b[1;32m 1586\u001b[0m ignore_keys_for_eval\u001b[39m=\u001b[39;49mignore_keys_for_eval,\n\u001b[1;32m 1587\u001b[0m )\n\u001b[1;32m 1588\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 1589\u001b[0m hf_hub_utils\u001b[39m.\u001b[39menable_progress_bars()\n",
734
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/transformers/trainer.py:1892\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1889\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontrol \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback_handler\u001b[39m.\u001b[39mon_step_begin(args, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontrol)\n\u001b[1;32m 1891\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maccelerator\u001b[39m.\u001b[39maccumulate(model):\n\u001b[0;32m-> 1892\u001b[0m tr_loss_step \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtraining_step(model, inputs)\n\u001b[1;32m 1894\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[1;32m 1895\u001b[0m args\u001b[39m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 1896\u001b[0m \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[1;32m 1897\u001b[0m \u001b[39mand\u001b[39;00m (torch\u001b[39m.\u001b[39misnan(tr_loss_step) \u001b[39mor\u001b[39;00m torch\u001b[39m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 1898\u001b[0m ):\n\u001b[1;32m 1899\u001b[0m \u001b[39m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 1900\u001b[0m tr_loss \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m tr_loss \u001b[39m/\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m+\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mglobal_step \u001b[39m-\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_globalstep_last_logged)\n",
735
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/transformers/trainer.py:2787\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2785\u001b[0m scaled_loss\u001b[39m.\u001b[39mbackward()\n\u001b[1;32m 2786\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 2787\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49maccelerator\u001b[39m.\u001b[39;49mbackward(loss)\n\u001b[1;32m 2789\u001b[0m \u001b[39mreturn\u001b[39;00m loss\u001b[39m.\u001b[39mdetach() \u001b[39m/\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39mgradient_accumulation_steps\n",
736
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/accelerate/accelerator.py:1983\u001b[0m, in \u001b[0;36mAccelerator.backward\u001b[0;34m(self, loss, **kwargs)\u001b[0m\n\u001b[1;32m 1981\u001b[0m \u001b[39mreturn\u001b[39;00m\n\u001b[1;32m 1982\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscaler \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1983\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mscaler\u001b[39m.\u001b[39;49mscale(loss)\u001b[39m.\u001b[39;49mbackward(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1984\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1985\u001b[0m loss\u001b[39m.\u001b[39mbackward(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
737
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 477\u001b[0m \u001b[39mif\u001b[39;00m has_torch_function_unary(\u001b[39mself\u001b[39m):\n\u001b[1;32m 478\u001b[0m \u001b[39mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 479\u001b[0m Tensor\u001b[39m.\u001b[39mbackward,\n\u001b[1;32m 480\u001b[0m (\u001b[39mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 485\u001b[0m inputs\u001b[39m=\u001b[39minputs,\n\u001b[1;32m 486\u001b[0m )\n\u001b[0;32m--> 487\u001b[0m torch\u001b[39m.\u001b[39;49mautograd\u001b[39m.\u001b[39;49mbackward(\n\u001b[1;32m 488\u001b[0m \u001b[39mself\u001b[39;49m, gradient, retain_graph, create_graph, inputs\u001b[39m=\u001b[39;49minputs\n\u001b[1;32m 489\u001b[0m )\n",
738
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 192\u001b[0m retain_graph \u001b[39m=\u001b[39m create_graph\n\u001b[1;32m 194\u001b[0m \u001b[39m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m 195\u001b[0m \u001b[39m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 196\u001b[0m \u001b[39m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m Variable\u001b[39m.\u001b[39;49m_execution_engine\u001b[39m.\u001b[39;49mrun_backward( \u001b[39m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 198\u001b[0m tensors, grad_tensors_, retain_graph, create_graph, inputs,\n\u001b[1;32m 199\u001b[0m allow_unreachable\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, accumulate_grad\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n",
739
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/torch/autograd/function.py:267\u001b[0m, in \u001b[0;36mBackwardCFunction.apply\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mImplementing both \u001b[39m\u001b[39m'\u001b[39m\u001b[39mbackward\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39mvjp\u001b[39m\u001b[39m'\u001b[39m\u001b[39m for a custom \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 264\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mFunction is not allowed. You should only implement one \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 265\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mof them.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 266\u001b[0m user_fn \u001b[39m=\u001b[39m vjp_fn \u001b[39mif\u001b[39;00m vjp_fn \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m Function\u001b[39m.\u001b[39mvjp \u001b[39melse\u001b[39;00m backward_fn\n\u001b[0;32m--> 267\u001b[0m \u001b[39mreturn\u001b[39;00m user_fn(\u001b[39mself\u001b[39;49m, \u001b[39m*\u001b[39;49margs)\n",
740
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/torch/utils/checkpoint.py:157\u001b[0m, in \u001b[0;36mCheckpointFunction.backward\u001b[0;34m(ctx, *args)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(outputs_with_grad) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 154\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[1;32m 155\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnone of output has requires_grad=True,\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 156\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m this checkpoint() is not necessary\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 157\u001b[0m torch\u001b[39m.\u001b[39;49mautograd\u001b[39m.\u001b[39;49mbackward(outputs_with_grad, args_with_grad)\n\u001b[1;32m 158\u001b[0m grads \u001b[39m=\u001b[39m \u001b[39mtuple\u001b[39m(inp\u001b[39m.\u001b[39mgrad \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(inp, torch\u001b[39m.\u001b[39mTensor) \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 159\u001b[0m \u001b[39mfor\u001b[39;00m inp \u001b[39min\u001b[39;00m detached_inputs)\n\u001b[1;32m 161\u001b[0m \u001b[39mreturn\u001b[39;00m (\u001b[39mNone\u001b[39;00m, \u001b[39mNone\u001b[39;00m) \u001b[39m+\u001b[39m grads\n",
741
- "File \u001b[0;32m~/anaconda3/envs/stt/lib/python3.8/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 192\u001b[0m retain_graph \u001b[39m=\u001b[39m create_graph\n\u001b[1;32m 194\u001b[0m \u001b[39m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m 195\u001b[0m \u001b[39m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 196\u001b[0m \u001b[39m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m Variable\u001b[39m.\u001b[39;49m_execution_engine\u001b[39m.\u001b[39;49mrun_backward( \u001b[39m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 198\u001b[0m tensors, grad_tensors_, retain_graph, create_graph, inputs,\n\u001b[1;32m 199\u001b[0m allow_unreachable\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, accumulate_grad\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n",
742
- "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 6.44 GiB (GPU 0; 23.64 GiB total capacity; 16.89 GiB already allocated; 211.81 MiB free; 22.32 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"
 
743
  ]
744
  }
745
  ],
@@ -755,10 +543,10 @@
755
  "outputs": [],
756
  "source": [
757
  "kwargs = {\n",
758
- " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
759
  " \"dataset\": \"zeroth_korean\", # a 'pretty' name for the training dataset\n",
760
- " \"language\": \"hi\",\n",
761
- " \"model_name\": \"Whisper Small Hi - Sanchit Gandhi\", # a 'pretty' name for your model\n",
762
  " \"finetuned_from\": \"openai/whisper-small\",\n",
763
  " \"tasks\": \"automatic-speech-recognition\",\n",
764
  " \"tags\": \"whisper-event\",\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
 
35
  },
36
  {
37
  "cell_type": "code",
38
+ "execution_count": 2,
39
  "metadata": {},
40
  "outputs": [
41
  {
 
65
  },
66
  {
67
  "cell_type": "code",
68
+ "execution_count": 3,
69
  "metadata": {},
70
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  "source": [
72
  "from transformers import WhisperFeatureExtractor\n",
73
  "\n",
 
76
  },
77
  {
78
  "cell_type": "code",
79
+ "execution_count": 4,
80
  "metadata": {},
81
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  {
83
  "name": "stderr",
84
  "output_type": "stream",
 
95
  },
96
  {
97
  "cell_type": "code",
98
+ "execution_count": 5,
99
  "metadata": {},
100
  "outputs": [
101
  {
 
114
  },
115
  {
116
  "cell_type": "code",
117
+ "execution_count": 6,
118
  "metadata": {},
119
  "outputs": [
120
  {
 
132
  },
133
  {
134
  "cell_type": "code",
135
+ "execution_count": 7,
136
  "metadata": {},
137
  "outputs": [],
138
  "source": [
 
144
  },
145
  {
146
  "cell_type": "code",
147
+ "execution_count": 8,
148
  "metadata": {},
149
  "outputs": [
150
  {
 
162
  },
163
  {
164
  "cell_type": "code",
165
+ "execution_count": 9,
166
  "metadata": {},
167
  "outputs": [],
168
  "source": [
 
176
  },
177
  {
178
  "cell_type": "code",
179
+ "execution_count": 10,
180
  "metadata": {},
181
  "outputs": [],
182
  "source": [
 
203
  },
204
  {
205
  "cell_type": "code",
206
+ "execution_count": 11,
207
  "metadata": {},
208
  "outputs": [
209
  {
210
  "data": {
211
  "application/vnd.jupyter.widget-view+json": {
212
+ "model_id": "0c3d689f889f4a71a6ffe0350727d7d7",
213
  "version_major": 2,
214
  "version_minor": 0
215
  },
 
223
  {
224
  "data": {
225
  "application/vnd.jupyter.widget-view+json": {
226
+ "model_id": "f28073a5c15944e9a9c7da6c994fb3da",
227
  "version_major": 2,
228
  "version_minor": 0
229
  },
 
241
  },
242
  {
243
  "cell_type": "code",
244
+ "execution_count": 12,
245
  "metadata": {},
246
  "outputs": [],
247
  "source": [
 
254
  },
255
  {
256
  "cell_type": "code",
257
+ "execution_count": 13,
258
  "metadata": {},
259
  "outputs": [
260
  {
261
  "data": {
262
  "application/vnd.jupyter.widget-view+json": {
263
+ "model_id": "a0ff361ac7824e87a1360eb36314f126",
264
  "version_major": 2,
265
  "version_minor": 0
266
  },
 
281
  },
282
  {
283
  "cell_type": "code",
284
+ "execution_count": 14,
285
  "metadata": {},
286
  "outputs": [],
287
  "source": [
 
320
  },
321
  {
322
  "cell_type": "code",
323
+ "execution_count": 15,
324
  "metadata": {},
325
  "outputs": [],
326
  "source": [
 
329
  },
330
  {
331
  "cell_type": "code",
332
+ "execution_count": 16,
333
  "metadata": {},
334
  "outputs": [],
335
  "source": [
 
340
  },
341
  {
342
  "cell_type": "code",
343
+ "execution_count": 17,
344
  "metadata": {},
345
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  "source": [
347
  "from evaluate import load\n",
348
  "cer_score = evaluate.load(\"cer\")"
 
380
  },
381
  {
382
  "cell_type": "code",
383
+ "execution_count": 18,
384
  "metadata": {},
385
  "outputs": [],
386
  "source": [
 
410
  },
411
  {
412
  "cell_type": "code",
413
+ "execution_count": 19,
414
  "metadata": {},
415
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  "source": [
417
  "from transformers import WhisperForConditionalGeneration\n",
418
  "\n",
 
421
  },
422
  {
423
  "cell_type": "code",
424
+ "execution_count": 20,
425
  "metadata": {},
426
  "outputs": [],
427
  "source": [
 
432
  },
433
  {
434
  "cell_type": "code",
435
+ "execution_count": 21,
436
  "metadata": {},
437
  "outputs": [],
438
  "source": [
 
464
  },
465
  {
466
  "cell_type": "code",
467
+ "execution_count": 22,
468
  "metadata": {},
469
  "outputs": [],
470
  "source": [
 
483
  },
484
  {
485
  "cell_type": "code",
486
+ "execution_count": 23,
487
  "metadata": {},
488
  "outputs": [],
489
  "source": [
 
492
  },
493
  {
494
  "cell_type": "code",
495
+ "execution_count": 24,
496
  "metadata": {},
497
  "outputs": [
498
  {
499
  "data": {
500
  "application/vnd.jupyter.widget-view+json": {
501
+ "model_id": "e65ce3bc72904c3999b034bfeb8a12b6",
502
  "version_major": 2,
503
  "version_minor": 0
504
  },
505
  "text/plain": [
506
+ " 0%| | 0/4000 [00:00<?, ?it/s]"
507
  ]
508
  },
509
  "metadata": {},
510
  "output_type": "display_data"
511
  },
512
  {
513
+ "name": "stdout",
514
+ "output_type": "stream",
515
+ "text": [
516
+ "{'loss': 2.0528, 'learning_rate': 4.4e-07, 'epoch': 0.02}\n",
517
+ "{'loss': 1.6367, 'learning_rate': 9.400000000000001e-07, 'epoch': 0.04}\n",
518
+ "{'loss': 1.2439, 'learning_rate': 1.44e-06, 'epoch': 0.05}\n",
519
+ "{'loss': 0.7302, 'learning_rate': 1.94e-06, 'epoch': 0.07}\n",
520
+ "{'loss': 0.5361, 'learning_rate': 2.4400000000000004e-06, 'epoch': 0.09}\n",
521
+ "{'loss': 0.4687, 'learning_rate': 2.9400000000000002e-06, 'epoch': 0.11}\n",
522
+ "{'loss': 0.4293, 'learning_rate': 3.44e-06, 'epoch': 0.13}\n",
523
+ "{'loss': 0.3663, 'learning_rate': 3.94e-06, 'epoch': 0.14}\n",
524
+ "{'loss': 0.3301, 'learning_rate': 4.440000000000001e-06, 'epoch': 0.16}\n",
525
+ "{'loss': 0.3001, 'learning_rate': 4.94e-06, 'epoch': 0.18}\n",
526
+ "{'loss': 0.2242, 'learning_rate': 5.4400000000000004e-06, 'epoch': 0.2}\n",
527
+ "{'loss': 0.2262, 'learning_rate': 5.94e-06, 'epoch': 0.22}\n",
528
+ "{'loss': 0.2143, 'learning_rate': 6.440000000000001e-06, 'epoch': 0.23}\n",
529
+ "{'loss': 0.2019, 'learning_rate': 6.9400000000000005e-06, 'epoch': 0.25}\n",
530
+ "{'loss': 0.1992, 'learning_rate': 7.440000000000001e-06, 'epoch': 0.27}\n"
531
  ]
532
  }
533
  ],
 
543
  "outputs": [],
544
  "source": [
545
  "kwargs = {\n",
546
+ " \"dataset_tags\": \"kresnik/zeroth_korean\",\n",
547
  " \"dataset\": \"zeroth_korean\", # a 'pretty' name for the training dataset\n",
548
+ " \"language\": \"ko\",\n",
549
+ " \"model_name\": \"Whisper Small Ko\", # a 'pretty' name for your model\n",
550
  " \"finetuned_from\": \"openai/whisper-small\",\n",
551
  " \"tasks\": \"automatic-speech-recognition\",\n",
552
  " \"tags\": \"whisper-event\",\n",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cddb675e1cf557b1a6326846a2385741c669e6a0d34dee3f65cc9f58add729e4
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c514187e98b3a26a315cb738cd44d24e71b91b83d22b4b6e336329fe80abf2
3
  size 4155