ihanif commited on
Commit
6df5c6e
1 Parent(s): 0e2b556

Training in progress, step 100

Browse files
.gitignore CHANGED
@@ -1 +1 @@
1
- checkpoint-*/
 
1
+ #checkpoint-*/
checkpoint-100/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50257
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 50258,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 12,
22
+ "encoder_ffn_dim": 3072,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 50257,
26
+ "forced_decoder_ids": null,
27
+ "init_std": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "max_length": 448,
30
+ "max_source_positions": 1500,
31
+ "max_target_positions": 448,
32
+ "model_type": "whisper",
33
+ "num_hidden_layers": 12,
34
+ "num_mel_bins": 80,
35
+ "pad_token_id": 50257,
36
+ "scale_embedding": false,
37
+ "suppress_tokens": [],
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.26.0.dev0",
40
+ "use_cache": false,
41
+ "vocab_size": 51865
42
+ }
checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1002d1d1f52543b717c6981da336cb2b7d5de41f31e4ce2d17156a34b040ee6
3
+ size 1934160645
checkpoint-100/preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-100/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946ddc68bfd507e62f2331943cb15a787bbdda6a4389be1fcfd25cff7522f970
3
+ size 967102601
checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cb1efe7cf554d988385a8b3b6ee72bcb439a80ae12b6972729b5fd2605e1c6e
3
+ size 14511
checkpoint-100/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1d2baeb4b37bcdfe6e1f87922490255ada6e3a285d3888ea80d73c23b6b2a5
3
+ size 557
checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27824508c9b835e63987aba694f6253319bfea6d8742d490d9270366f9e41f6c
3
+ size 627
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 146.85230024213075,
3
+ "best_model_checkpoint": "./checkpoint-100",
4
+ "epoch": 2.376470588235294,
5
+ "global_step": 100,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.24,
12
+ "learning_rate": 2.666666666666667e-06,
13
+ "loss": 4.3134,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.47,
18
+ "learning_rate": 6e-06,
19
+ "loss": 2.9329,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.71,
24
+ "learning_rate": 9.333333333333334e-06,
25
+ "loss": 2.0584,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.94,
30
+ "learning_rate": 9.829787234042554e-06,
31
+ "loss": 1.566,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 1.19,
36
+ "learning_rate": 9.617021276595745e-06,
37
+ "loss": 1.3777,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 1.42,
42
+ "learning_rate": 9.404255319148937e-06,
43
+ "loss": 1.1469,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 1.66,
48
+ "learning_rate": 9.191489361702128e-06,
49
+ "loss": 1.0638,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 1.89,
54
+ "learning_rate": 8.97872340425532e-06,
55
+ "loss": 0.9974,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 2.14,
60
+ "learning_rate": 8.765957446808512e-06,
61
+ "loss": 0.9615,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 2.38,
66
+ "learning_rate": 8.553191489361703e-06,
67
+ "loss": 0.8262,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 2.38,
72
+ "eval_cer": 149.33157314260887,
73
+ "eval_loss": 0.8188337683677673,
74
+ "eval_runtime": 459.9395,
75
+ "eval_samples_per_second": 1.113,
76
+ "eval_steps_per_second": 0.139,
77
+ "eval_wer": 146.85230024213075,
78
+ "step": 100
79
+ }
80
+ ],
81
+ "max_steps": 500,
82
+ "num_train_epochs": 12,
83
+ "total_flos": 1.86022149046272e+18,
84
+ "trial_name": null,
85
+ "trial_params": null
86
+ }
checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5518cd575512706c66c89476ae9c64328c1bcac98ec06151bdc9afebf6c503fb
3
+ size 3643
checkpoint-200/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50257
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 50258,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 12,
22
+ "encoder_ffn_dim": 3072,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 50257,
26
+ "forced_decoder_ids": null,
27
+ "init_std": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "max_length": 448,
30
+ "max_source_positions": 1500,
31
+ "max_target_positions": 448,
32
+ "model_type": "whisper",
33
+ "num_hidden_layers": 12,
34
+ "num_mel_bins": 80,
35
+ "pad_token_id": 50257,
36
+ "scale_embedding": false,
37
+ "suppress_tokens": [],
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.26.0.dev0",
40
+ "use_cache": false,
41
+ "vocab_size": 51865
42
+ }
checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138a6ddac36f3135f9ce799bc0aa0c6ee6e8510cecdd696448298fce2403ddb0
3
+ size 1934160645
checkpoint-200/preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d091c8707cb0763632c8b311f2d581a51bd141ec488b3c948362989bfac0bdda
3
+ size 967102601
checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b22328e86e2f01e3b161f44df6b51159ec6b4a940fe644d7490ef269503f585
3
+ size 14511
checkpoint-200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ee58f3dc2c47aec3ec5dbf8581a593f6a668e46904253580e157d97de3e149
3
+ size 557
checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82e796ecfbb8fa98125f306d5ff56f81ebaf5a863f8fefc032c86533b2fdf8f
3
+ size 627
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 139.37651331719127,
3
+ "best_model_checkpoint": "./checkpoint-100",
4
+ "epoch": 2.377581120943953,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "learning_rate": 3e-06,
13
+ "loss": 4.1495,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.24,
18
+ "learning_rate": 6.333333333333333e-06,
19
+ "loss": 2.9287,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.35,
24
+ "learning_rate": 9.666666666666667e-06,
25
+ "loss": 2.0462,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.47,
30
+ "learning_rate": 9.666666666666667e-06,
31
+ "loss": 1.6138,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.59,
36
+ "learning_rate": 9.296296296296296e-06,
37
+ "loss": 1.3862,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.71,
42
+ "learning_rate": 8.925925925925927e-06,
43
+ "loss": 1.2604,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.83,
48
+ "learning_rate": 8.555555555555556e-06,
49
+ "loss": 1.1436,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.94,
54
+ "learning_rate": 8.185185185185187e-06,
55
+ "loss": 1.168,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 1.07,
60
+ "learning_rate": 7.814814814814816e-06,
61
+ "loss": 1.1041,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 1.19,
66
+ "learning_rate": 7.444444444444445e-06,
67
+ "loss": 0.9683,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 1.19,
72
+ "eval_cer": 131.61659035460045,
73
+ "eval_loss": 0.8811978697776794,
74
+ "eval_runtime": 593.3196,
75
+ "eval_samples_per_second": 0.863,
76
+ "eval_steps_per_second": 0.431,
77
+ "eval_wer": 139.37651331719127,
78
+ "step": 100
79
+ },
80
+ {
81
+ "epoch": 1.31,
82
+ "learning_rate": 7.074074074074074e-06,
83
+ "loss": 0.909,
84
+ "step": 110
85
+ },
86
+ {
87
+ "epoch": 1.42,
88
+ "learning_rate": 6.703703703703704e-06,
89
+ "loss": 0.9213,
90
+ "step": 120
91
+ },
92
+ {
93
+ "epoch": 1.54,
94
+ "learning_rate": 6.333333333333333e-06,
95
+ "loss": 0.9092,
96
+ "step": 130
97
+ },
98
+ {
99
+ "epoch": 1.66,
100
+ "learning_rate": 5.962962962962963e-06,
101
+ "loss": 0.8481,
102
+ "step": 140
103
+ },
104
+ {
105
+ "epoch": 1.78,
106
+ "learning_rate": 5.5925925925925926e-06,
107
+ "loss": 0.8471,
108
+ "step": 150
109
+ },
110
+ {
111
+ "epoch": 1.9,
112
+ "learning_rate": 5.2222222222222226e-06,
113
+ "loss": 0.8504,
114
+ "step": 160
115
+ },
116
+ {
117
+ "epoch": 2.02,
118
+ "learning_rate": 4.851851851851852e-06,
119
+ "loss": 0.8264,
120
+ "step": 170
121
+ },
122
+ {
123
+ "epoch": 2.14,
124
+ "learning_rate": 4.481481481481482e-06,
125
+ "loss": 0.7236,
126
+ "step": 180
127
+ },
128
+ {
129
+ "epoch": 2.26,
130
+ "learning_rate": 4.111111111111111e-06,
131
+ "loss": 0.6898,
132
+ "step": 190
133
+ },
134
+ {
135
+ "epoch": 2.38,
136
+ "learning_rate": 3.740740740740741e-06,
137
+ "loss": 0.6848,
138
+ "step": 200
139
+ },
140
+ {
141
+ "epoch": 2.38,
142
+ "eval_cer": 151.33685371478225,
143
+ "eval_loss": 0.7542899250984192,
144
+ "eval_runtime": 551.6472,
145
+ "eval_samples_per_second": 0.928,
146
+ "eval_steps_per_second": 0.464,
147
+ "eval_wer": 145.9972760290557,
148
+ "step": 200
149
+ }
150
+ ],
151
+ "max_steps": 300,
152
+ "num_train_epochs": 4,
153
+ "total_flos": 1.86022149046272e+18,
154
+ "trial_name": null,
155
+ "trial_params": null
156
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf3a24325bb49c25270193816ed6035a253ca3ae300c31ffcec0afeb4229266
3
+ size 3579
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3659cbd57caabfa6834081314e1044f720d4f82db5a36a341158bdc9fc0cf4f2
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946ddc68bfd507e62f2331943cb15a787bbdda6a4389be1fcfd25cff7522f970
3
  size 967102601
runs/Dec20_20-02-45_129-146-32-172/1671566571.0086486/events.out.tfevents.1671566571.129-146-32-172.146952.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ebfd8972b7d001f37f279924678679a975977b7be87a6d8f31555aad29e3833
3
+ size 5862
runs/Dec20_20-02-45_129-146-32-172/events.out.tfevents.1671566571.129-146-32-172.146952.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8bad0c4902c1b9b166697cb4e7edeb7f56d7c5dec9f45409cd1c9c00f0c914
3
+ size 4747
runs/Dec20_20-22-25_129-146-32-172/1671567753.387425/events.out.tfevents.1671567753.129-146-32-172.150576.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a9e3c9153b155528dd9febed01a15398f438aafea941f78410a763a9be2cb2
3
+ size 5862
runs/Dec20_20-22-25_129-146-32-172/events.out.tfevents.1671567753.129-146-32-172.150576.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:961a3b8618713dfc24c483c8f04cfa559969b9e3a329008c3b26019ce9fabdd4
3
+ size 4903
runs/Dec20_20-28-50_129-146-32-172/1671568135.4799154/events.out.tfevents.1671568135.129-146-32-172.151517.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5843873d279e134418900972f42218231fa7cff59eac0ce951355225ec162fa1
3
+ size 5862
runs/Dec20_20-28-50_129-146-32-172/events.out.tfevents.1671568135.129-146-32-172.151517.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:782028836de28c83d061b5870d65ffd6d0cfb2cf7e8386ba8180ee926b2c5967
3
+ size 6185
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcf3a24325bb49c25270193816ed6035a253ca3ae300c31ffcec0afeb4229266
3
- size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5518cd575512706c66c89476ae9c64328c1bcac98ec06151bdc9afebf6c503fb
3
+ size 3643
whisper_small_ps_augmented.py CHANGED
@@ -251,16 +251,16 @@ In the final step, we define all the parameters related to training. For more de
251
 
252
  training_args = Seq2SeqTrainingArguments(
253
  output_dir="./",
254
- per_device_train_batch_size=2,
255
  # increase by 2x for every 2x decrease in batch size
256
- gradient_accumulation_steps=16,
257
  learning_rate=1e-5,
258
  warmup_steps=30,
259
  max_steps=500,
260
  gradient_checkpointing=True,
261
  fp16=True,
262
  evaluation_strategy="steps",
263
- per_device_eval_batch_size=2,
264
  predict_with_generate=True,
265
  generation_max_length=225,
266
  save_steps=100,
@@ -272,7 +272,8 @@ training_args = Seq2SeqTrainingArguments(
272
  greater_is_better=False,
273
  push_to_hub=True,
274
  #optim='adamw_bnb_8bit', # 'adamw_bnb_8bit',
275
- overwrite_output_dir="False"
 
276
  )
277
 
278
 
@@ -298,7 +299,7 @@ trainer.train()
298
  kwargs = {
299
  "dataset_tags": "google/fleurs",
300
  "dataset": "google/fleurs", # a 'pretty' name for the training dataset
301
- "language": "ps_af",
302
  "model_name": "Whisper Small Pashto - Augmented", # a 'pretty' name for your model
303
  "finetuned_from": "openai/whisper-small",
304
  "tasks": "automatic-speech-recognition",
 
251
 
252
  training_args = Seq2SeqTrainingArguments(
253
  output_dir="./",
254
+ per_device_train_batch_size=16,
255
  # increase by 2x for every 2x decrease in batch size
256
+ gradient_accumulation_steps=4,
257
  learning_rate=1e-5,
258
  warmup_steps=30,
259
  max_steps=500,
260
  gradient_checkpointing=True,
261
  fp16=True,
262
  evaluation_strategy="steps",
263
+ per_device_eval_batch_size=8,
264
  predict_with_generate=True,
265
  generation_max_length=225,
266
  save_steps=100,
 
272
  greater_is_better=False,
273
  push_to_hub=True,
274
  #optim='adamw_bnb_8bit', # 'adamw_bnb_8bit',
275
+ overwrite_output_dir="False",
276
+ resume_from_checkpoint="True"
277
  )
278
 
279
 
 
299
  kwargs = {
300
  "dataset_tags": "google/fleurs",
301
  "dataset": "google/fleurs", # a 'pretty' name for the training dataset
302
+ "language": "ps",
303
  "model_name": "Whisper Small Pashto - Augmented", # a 'pretty' name for your model
304
  "finetuned_from": "openai/whisper-small",
305
  "tasks": "automatic-speech-recognition",