steja commited on
Commit
5bf5f2f
1 Parent(s): 1e2bd91

persian whisper small ft

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +92 -0
  2. added_tokens.json +109 -0
  3. all_results.json +12 -0
  4. checkpoint-1000/config.json +41 -0
  5. checkpoint-1000/optimizer.pt +3 -0
  6. checkpoint-1000/preprocessor_config.json +0 -0
  7. checkpoint-1000/pytorch_model.bin +3 -0
  8. checkpoint-1000/rng_state_0.pth +3 -0
  9. checkpoint-1000/rng_state_1.pth +3 -0
  10. checkpoint-1000/rng_state_2.pth +3 -0
  11. checkpoint-1000/rng_state_3.pth +3 -0
  12. checkpoint-1000/scaler.pt +3 -0
  13. checkpoint-1000/scheduler.pt +3 -0
  14. checkpoint-1000/trainer_state.json +274 -0
  15. checkpoint-1000/training_args.bin +3 -0
  16. checkpoint-1500/config.json +41 -0
  17. checkpoint-1500/optimizer.pt +3 -0
  18. checkpoint-1500/preprocessor_config.json +0 -0
  19. checkpoint-1500/pytorch_model.bin +3 -0
  20. checkpoint-1500/rng_state_0.pth +3 -0
  21. checkpoint-1500/rng_state_1.pth +3 -0
  22. checkpoint-1500/rng_state_2.pth +3 -0
  23. checkpoint-1500/rng_state_3.pth +3 -0
  24. checkpoint-1500/scaler.pt +3 -0
  25. checkpoint-1500/scheduler.pt +3 -0
  26. checkpoint-1500/trainer_state.json +403 -0
  27. checkpoint-1500/training_args.bin +3 -0
  28. checkpoint-2000/config.json +41 -0
  29. checkpoint-2000/optimizer.pt +3 -0
  30. checkpoint-2000/preprocessor_config.json +0 -0
  31. checkpoint-2000/pytorch_model.bin +3 -0
  32. checkpoint-2000/rng_state_0.pth +3 -0
  33. checkpoint-2000/rng_state_1.pth +3 -0
  34. checkpoint-2000/rng_state_2.pth +3 -0
  35. checkpoint-2000/rng_state_3.pth +3 -0
  36. checkpoint-2000/scaler.pt +3 -0
  37. checkpoint-2000/scheduler.pt +3 -0
  38. checkpoint-2000/trainer_state.json +532 -0
  39. checkpoint-2000/training_args.bin +3 -0
  40. checkpoint-2500/config.json +41 -0
  41. checkpoint-2500/optimizer.pt +3 -0
  42. checkpoint-2500/preprocessor_config.json +0 -0
  43. checkpoint-2500/pytorch_model.bin +3 -0
  44. checkpoint-2500/rng_state_0.pth +3 -0
  45. checkpoint-2500/rng_state_1.pth +3 -0
  46. checkpoint-2500/rng_state_2.pth +3 -0
  47. checkpoint-2500/rng_state_3.pth +3 -0
  48. checkpoint-2500/scaler.pt +3 -0
  49. checkpoint-2500/scheduler.pt +3 -0
  50. checkpoint-2500/trainer_state.json +661 -0
README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - fa
4
+ license: apache-2.0
5
+ tags:
6
+ - whisper-event
7
+ - generated_from_trainer
8
+ datasets:
9
+ - mozilla-foundation/common_voice_11_0
10
+ metrics:
11
+ - wer
12
+ model-index:
13
+ - name: Whisper small Persian
14
+ results:
15
+ - task:
16
+ name: Automatic Speech Recognition
17
+ type: automatic-speech-recognition
18
+ dataset:
19
+ name: mozilla-foundation/common_voice_11_0 fa
20
+ type: mozilla-foundation/common_voice_11_0
21
+ config: null
22
+ split: None
23
+ metrics:
24
+ - name: Wer
25
+ type: wer
26
+ value: 32.89950864725314
27
+ ---
28
+
29
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
+ should probably proofread and complete it, then remove this comment. -->
31
+
32
+ # Whisper small Persian
33
+
34
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the mozilla-foundation/common_voice_11_0 fa dataset.
35
+ It achieves the following results on the evaluation set:
36
+ - Loss: 0.4924
37
+ - Wer: 39.8995
38
+
39
+ ## Model description
40
+
41
+ More information needed
42
+
43
+ ## Intended uses & limitations
44
+
45
+ More information needed
46
+
47
+ ## Training and evaluation data
48
+
49
+ More information needed
50
+
51
+ ## Training procedure
52
+
53
+ ### Training hyperparameters
54
+
55
+ The following hyperparameters were used during training:
56
+ - learning_rate: 1e-06
57
+ - train_batch_size: 8
58
+ - eval_batch_size: 16
59
+ - seed: 42
60
+ - distributed_type: multi-GPU
61
+ - num_devices: 4
62
+ - gradient_accumulation_steps: 2
63
+ - total_train_batch_size: 64
64
+ - total_eval_batch_size: 64
65
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
66
+ - lr_scheduler_type: linear
67
+ - lr_scheduler_warmup_steps: 500
68
+ - training_steps: 5000
69
+ - mixed_precision_training: Native AMP
70
+
71
+ ### Training results
72
+
73
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
74
+ |:-------------:|:-----:|:----:|:---------------:|:-------:|
75
+ | 0.5533 | 1.56 | 500 | 0.7044 | 54.5499 |
76
+ | 0.3951 | 3.12 | 1000 | 0.5893 | 47.5210 |
77
+ | 0.3296 | 4.67 | 1500 | 0.5429 | 42.6451 |
78
+ | 0.2662 | 6.23 | 2000 | 0.5223 | 40.6644 |
79
+ | 0.2535 | 7.79 | 2500 | 0.5045 | 38.5304 |
80
+ | 0.224 | 9.35 | 3000 | 0.5002 | 36.8822 |
81
+ | 0.2204 | 10.9 | 3500 | 0.4967 | 35.3076 |
82
+ | 0.2024 | 12.46 | 4000 | 0.4951 | 34.9883 |
83
+ | 0.2099 | 14.02 | 4500 | 0.4921 | 34.9842 |
84
+ | 0.1836 | 15.58 | 5000 | 0.4924 | 34.8995 |
85
+
86
+
87
+ ### Framework versions
88
+
89
+ - Transformers 4.25.1
90
+ - Pytorch 1.13.0+cu117
91
+ - Datasets 2.7.1
92
+ - Tokenizers 0.13.2
added_tokens.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|af|>": 50327,
3
+ "<|am|>": 50334,
4
+ "<|ar|>": 50272,
5
+ "<|as|>": 50350,
6
+ "<|az|>": 50304,
7
+ "<|ba|>": 50355,
8
+ "<|be|>": 50330,
9
+ "<|bg|>": 50292,
10
+ "<|bn|>": 50302,
11
+ "<|bo|>": 50347,
12
+ "<|br|>": 50309,
13
+ "<|bs|>": 50315,
14
+ "<|ca|>": 50270,
15
+ "<|cs|>": 50283,
16
+ "<|cy|>": 50297,
17
+ "<|da|>": 50285,
18
+ "<|de|>": 50261,
19
+ "<|el|>": 50281,
20
+ "<|endoftext|>": 50257,
21
+ "<|en|>": 50259,
22
+ "<|es|>": 50262,
23
+ "<|et|>": 50307,
24
+ "<|eu|>": 50310,
25
+ "<|fa|>": 50300,
26
+ "<|fi|>": 50277,
27
+ "<|fo|>": 50338,
28
+ "<|fr|>": 50265,
29
+ "<|gl|>": 50319,
30
+ "<|gu|>": 50333,
31
+ "<|haw|>": 50352,
32
+ "<|ha|>": 50354,
33
+ "<|hi|>": 50276,
34
+ "<|hr|>": 50291,
35
+ "<|ht|>": 50339,
36
+ "<|hu|>": 50286,
37
+ "<|hy|>": 50312,
38
+ "<|id|>": 50275,
39
+ "<|is|>": 50311,
40
+ "<|it|>": 50274,
41
+ "<|iw|>": 50279,
42
+ "<|ja|>": 50266,
43
+ "<|jw|>": 50356,
44
+ "<|ka|>": 50329,
45
+ "<|kk|>": 50316,
46
+ "<|km|>": 50323,
47
+ "<|kn|>": 50306,
48
+ "<|ko|>": 50264,
49
+ "<|la|>": 50294,
50
+ "<|lb|>": 50345,
51
+ "<|ln|>": 50353,
52
+ "<|lo|>": 50336,
53
+ "<|lt|>": 50293,
54
+ "<|lv|>": 50301,
55
+ "<|mg|>": 50349,
56
+ "<|mi|>": 50295,
57
+ "<|mk|>": 50308,
58
+ "<|ml|>": 50296,
59
+ "<|mn|>": 50314,
60
+ "<|mr|>": 50320,
61
+ "<|ms|>": 50282,
62
+ "<|mt|>": 50343,
63
+ "<|my|>": 50346,
64
+ "<|ne|>": 50313,
65
+ "<|nl|>": 50271,
66
+ "<|nn|>": 50342,
67
+ "<|nocaptions|>": 50362,
68
+ "<|notimestamps|>": 50363,
69
+ "<|no|>": 50288,
70
+ "<|oc|>": 50328,
71
+ "<|pa|>": 50321,
72
+ "<|pl|>": 50269,
73
+ "<|ps|>": 50340,
74
+ "<|pt|>": 50267,
75
+ "<|ro|>": 50284,
76
+ "<|ru|>": 50263,
77
+ "<|sa|>": 50344,
78
+ "<|sd|>": 50332,
79
+ "<|si|>": 50322,
80
+ "<|sk|>": 50298,
81
+ "<|sl|>": 50305,
82
+ "<|sn|>": 50324,
83
+ "<|so|>": 50326,
84
+ "<|sq|>": 50317,
85
+ "<|sr|>": 50303,
86
+ "<|startoflm|>": 50360,
87
+ "<|startofprev|>": 50361,
88
+ "<|startoftranscript|>": 50258,
89
+ "<|su|>": 50357,
90
+ "<|sv|>": 50273,
91
+ "<|sw|>": 50318,
92
+ "<|ta|>": 50287,
93
+ "<|te|>": 50299,
94
+ "<|tg|>": 50331,
95
+ "<|th|>": 50289,
96
+ "<|tk|>": 50341,
97
+ "<|tl|>": 50348,
98
+ "<|transcribe|>": 50359,
99
+ "<|translate|>": 50358,
100
+ "<|tr|>": 50268,
101
+ "<|tt|>": 50351,
102
+ "<|uk|>": 50280,
103
+ "<|ur|>": 50290,
104
+ "<|uz|>": 50337,
105
+ "<|vi|>": 50278,
106
+ "<|yi|>": 50335,
107
+ "<|yo|>": 50325,
108
+ "<|zh|>": 50260
109
+ }
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.58,
3
+ "eval_loss": 0.4923795163631439,
4
+ "eval_runtime": 1370.1044,
5
+ "eval_samples_per_second": 7.509,
6
+ "eval_steps_per_second": 0.118,
7
+ "eval_wer": 39.89950864725314,
8
+ "train_loss": 0.3303420036315918,
9
+ "train_runtime": 21571.4866,
10
+ "train_samples_per_second": 14.834,
11
+ "train_steps_per_second": 0.232
12
+ }
checkpoint-1000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50257
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 50258,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 12,
22
+ "encoder_ffn_dim": 3072,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 50257,
26
+ "forced_decoder_ids": null,
27
+ "init_std": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "max_length": 448,
30
+ "max_source_positions": 1500,
31
+ "max_target_positions": 448,
32
+ "model_type": "whisper",
33
+ "num_hidden_layers": 12,
34
+ "num_mel_bins": 80,
35
+ "pad_token_id": 50257,
36
+ "scale_embedding": false,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.25.1",
39
+ "use_cache": false,
40
+ "vocab_size": 51865
41
+ }
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e47e4300e4e3bd21b0c869c587bdfcea35e8bf36cebb1e53270fe67f5f88ff
3
+ size 1934161093
checkpoint-1000/preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:effd83aa50562711333009da3acee71cb8d4cc97401631fc6051fb672aa01d45
3
+ size 967102601
checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc55a315e5b7e256395762f330f90c34727ffc733709c8144bb9eb79a2f1f225
3
+ size 14519
checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b943b140f7b35b4e08fe884358b1d5cd34372a8626a75ebdf6d8895e2b886e3
3
+ size 14519
checkpoint-1000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a72f4ae227453cc8ce249882042f38fc236801402867a065febe98ec49fcc3eb
3
+ size 14519
checkpoint-1000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8045f82b260f2c2d7af0513d9c0de4f50e24893816efc96d254dea13220280a
3
+ size 14583
checkpoint-1000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dce1d2ae382ebe376f59073c1e3e94fa0133976fb67ca102a1b392494adc915
3
+ size 557
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f250987d7a2989ae5f1dfe2a3d3533de6b31bfde834549e82ed7a565b251efa
3
+ size 627
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 47.521028231962916,
3
+ "best_model_checkpoint": "./whisper-small-Persian/checkpoint-1000",
4
+ "epoch": 3.1150855365474337,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 4.4e-08,
13
+ "loss": 1.4232,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.16,
18
+ "learning_rate": 9.4e-08,
19
+ "loss": 1.4027,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 1.44e-07,
25
+ "loss": 1.3731,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 1.94e-07,
31
+ "loss": 1.2969,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.39,
36
+ "learning_rate": 2.4399999999999996e-07,
37
+ "loss": 1.1964,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.47,
42
+ "learning_rate": 2.9399999999999996e-07,
43
+ "loss": 1.0995,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.54,
48
+ "learning_rate": 3.4399999999999996e-07,
49
+ "loss": 1.0068,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 0.62,
54
+ "learning_rate": 3.94e-07,
55
+ "loss": 0.8949,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 0.7,
60
+ "learning_rate": 4.44e-07,
61
+ "loss": 0.8521,
62
+ "step": 225
63
+ },
64
+ {
65
+ "epoch": 0.78,
66
+ "learning_rate": 4.94e-07,
67
+ "loss": 0.7694,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 0.86,
72
+ "learning_rate": 5.44e-07,
73
+ "loss": 0.7196,
74
+ "step": 275
75
+ },
76
+ {
77
+ "epoch": 0.93,
78
+ "learning_rate": 5.939999999999999e-07,
79
+ "loss": 0.6902,
80
+ "step": 300
81
+ },
82
+ {
83
+ "epoch": 1.01,
84
+ "learning_rate": 6.44e-07,
85
+ "loss": 0.6748,
86
+ "step": 325
87
+ },
88
+ {
89
+ "epoch": 1.09,
90
+ "learning_rate": 6.939999999999999e-07,
91
+ "loss": 0.6247,
92
+ "step": 350
93
+ },
94
+ {
95
+ "epoch": 1.17,
96
+ "learning_rate": 7.44e-07,
97
+ "loss": 0.63,
98
+ "step": 375
99
+ },
100
+ {
101
+ "epoch": 1.25,
102
+ "learning_rate": 7.94e-07,
103
+ "loss": 0.5899,
104
+ "step": 400
105
+ },
106
+ {
107
+ "epoch": 1.32,
108
+ "learning_rate": 8.439999999999999e-07,
109
+ "loss": 0.5628,
110
+ "step": 425
111
+ },
112
+ {
113
+ "epoch": 1.4,
114
+ "learning_rate": 8.939999999999999e-07,
115
+ "loss": 0.5634,
116
+ "step": 450
117
+ },
118
+ {
119
+ "epoch": 1.48,
120
+ "learning_rate": 9.439999999999999e-07,
121
+ "loss": 0.5549,
122
+ "step": 475
123
+ },
124
+ {
125
+ "epoch": 1.56,
126
+ "learning_rate": 9.94e-07,
127
+ "loss": 0.5533,
128
+ "step": 500
129
+ },
130
+ {
131
+ "epoch": 1.56,
132
+ "eval_loss": 0.7044046521186829,
133
+ "eval_runtime": 1265.1903,
134
+ "eval_samples_per_second": 8.132,
135
+ "eval_steps_per_second": 0.127,
136
+ "eval_wer": 54.54987091580379,
137
+ "step": 500
138
+ },
139
+ {
140
+ "epoch": 1.63,
141
+ "learning_rate": 9.95111111111111e-07,
142
+ "loss": 0.5329,
143
+ "step": 525
144
+ },
145
+ {
146
+ "epoch": 1.71,
147
+ "learning_rate": 9.895555555555554e-07,
148
+ "loss": 0.5116,
149
+ "step": 550
150
+ },
151
+ {
152
+ "epoch": 1.79,
153
+ "learning_rate": 9.84e-07,
154
+ "loss": 0.511,
155
+ "step": 575
156
+ },
157
+ {
158
+ "epoch": 1.87,
159
+ "learning_rate": 9.784444444444444e-07,
160
+ "loss": 0.4963,
161
+ "step": 600
162
+ },
163
+ {
164
+ "epoch": 1.95,
165
+ "learning_rate": 9.728888888888888e-07,
166
+ "loss": 0.5177,
167
+ "step": 625
168
+ },
169
+ {
170
+ "epoch": 2.02,
171
+ "learning_rate": 9.673333333333332e-07,
172
+ "loss": 0.4947,
173
+ "step": 650
174
+ },
175
+ {
176
+ "epoch": 2.1,
177
+ "learning_rate": 9.617777777777776e-07,
178
+ "loss": 0.4529,
179
+ "step": 675
180
+ },
181
+ {
182
+ "epoch": 2.18,
183
+ "learning_rate": 9.562222222222223e-07,
184
+ "loss": 0.4543,
185
+ "step": 700
186
+ },
187
+ {
188
+ "epoch": 2.26,
189
+ "learning_rate": 9.506666666666667e-07,
190
+ "loss": 0.4478,
191
+ "step": 725
192
+ },
193
+ {
194
+ "epoch": 2.34,
195
+ "learning_rate": 9.451111111111111e-07,
196
+ "loss": 0.4421,
197
+ "step": 750
198
+ },
199
+ {
200
+ "epoch": 2.41,
201
+ "learning_rate": 9.395555555555556e-07,
202
+ "loss": 0.4465,
203
+ "step": 775
204
+ },
205
+ {
206
+ "epoch": 2.49,
207
+ "learning_rate": 9.34e-07,
208
+ "loss": 0.4164,
209
+ "step": 800
210
+ },
211
+ {
212
+ "epoch": 2.57,
213
+ "learning_rate": 9.284444444444444e-07,
214
+ "loss": 0.4291,
215
+ "step": 825
216
+ },
217
+ {
218
+ "epoch": 2.65,
219
+ "learning_rate": 9.228888888888888e-07,
220
+ "loss": 0.4149,
221
+ "step": 850
222
+ },
223
+ {
224
+ "epoch": 2.72,
225
+ "learning_rate": 9.173333333333333e-07,
226
+ "loss": 0.4242,
227
+ "step": 875
228
+ },
229
+ {
230
+ "epoch": 2.8,
231
+ "learning_rate": 9.117777777777778e-07,
232
+ "loss": 0.4329,
233
+ "step": 900
234
+ },
235
+ {
236
+ "epoch": 2.88,
237
+ "learning_rate": 9.062222222222222e-07,
238
+ "loss": 0.4165,
239
+ "step": 925
240
+ },
241
+ {
242
+ "epoch": 2.96,
243
+ "learning_rate": 9.006666666666666e-07,
244
+ "loss": 0.4166,
245
+ "step": 950
246
+ },
247
+ {
248
+ "epoch": 3.04,
249
+ "learning_rate": 8.95111111111111e-07,
250
+ "loss": 0.409,
251
+ "step": 975
252
+ },
253
+ {
254
+ "epoch": 3.12,
255
+ "learning_rate": 8.895555555555555e-07,
256
+ "loss": 0.3951,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 3.12,
261
+ "eval_loss": 0.5893104076385498,
262
+ "eval_runtime": 1326.5175,
263
+ "eval_samples_per_second": 7.756,
264
+ "eval_steps_per_second": 0.121,
265
+ "eval_wer": 47.521028231962916,
266
+ "step": 1000
267
+ }
268
+ ],
269
+ "max_steps": 5000,
270
+ "num_train_epochs": 16,
271
+ "total_flos": 1.849716994470209e+19,
272
+ "trial_name": null,
273
+ "trial_params": null
274
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda70737a84052aee98fe01ec95a00d27783f83ee9889ee9d6ffa0984826347e
3
+ size 3579
checkpoint-1500/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50257
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 50258,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 12,
22
+ "encoder_ffn_dim": 3072,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 50257,
26
+ "forced_decoder_ids": null,
27
+ "init_std": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "max_length": 448,
30
+ "max_source_positions": 1500,
31
+ "max_target_positions": 448,
32
+ "model_type": "whisper",
33
+ "num_hidden_layers": 12,
34
+ "num_mel_bins": 80,
35
+ "pad_token_id": 50257,
36
+ "scale_embedding": false,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.25.1",
39
+ "use_cache": false,
40
+ "vocab_size": 51865
41
+ }
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:847a61d4fd60ee9d944adfb2acfc7039ceb8a4ed184bb09e0dfc78c7bb78a9d0
3
+ size 1934161093
checkpoint-1500/preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b773c4e1eb73396fab8cc0254287f1ac0298a07d6af06b9ba6b8ca0a99a6bc5
3
+ size 967102601
checkpoint-1500/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c241fb277d290b4db104171998a510d48dd6d96c6d37fdb5ea448dd0b2e6f723
3
+ size 14647
checkpoint-1500/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4db9742f4079692a73cad94c5651453235f2bbd8dc05620d82692f1f2189526a
3
+ size 14583
checkpoint-1500/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97940afe4e3f5dd75207e0c11d204341b59b0e48f3c15c64a35e306180ab69a0
3
+ size 14583
checkpoint-1500/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1143ff2c3189c690c8d13b34f0ede037dd914c70a4495c1c1e60700f6a62dbc5
3
+ size 14519
checkpoint-1500/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a1080490c5f23904ed7607a160b86a63d0fbf35f6ea0f13b4c1b02ec4a7646b
3
+ size 557
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c9b0e7f2e678434af32e69234b2356b6547a35c00d4d4f5a4d4e7b59be2066b
3
+ size 627
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 44.64508786053355,
3
+ "best_model_checkpoint": "./whisper-small-Persian/checkpoint-1500",
4
+ "epoch": 4.671850699844479,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 4.4e-08,
13
+ "loss": 1.4232,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.16,
18
+ "learning_rate": 9.4e-08,
19
+ "loss": 1.4027,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 1.44e-07,
25
+ "loss": 1.3731,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 1.94e-07,
31
+ "loss": 1.2969,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.39,
36
+ "learning_rate": 2.4399999999999996e-07,
37
+ "loss": 1.1964,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.47,
42
+ "learning_rate": 2.9399999999999996e-07,
43
+ "loss": 1.0995,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.54,
48
+ "learning_rate": 3.4399999999999996e-07,
49
+ "loss": 1.0068,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 0.62,
54
+ "learning_rate": 3.94e-07,
55
+ "loss": 0.8949,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 0.7,
60
+ "learning_rate": 4.44e-07,
61
+ "loss": 0.8521,
62
+ "step": 225
63
+ },
64
+ {
65
+ "epoch": 0.78,
66
+ "learning_rate": 4.94e-07,
67
+ "loss": 0.7694,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 0.86,
72
+ "learning_rate": 5.44e-07,
73
+ "loss": 0.7196,
74
+ "step": 275
75
+ },
76
+ {
77
+ "epoch": 0.93,
78
+ "learning_rate": 5.939999999999999e-07,
79
+ "loss": 0.6902,
80
+ "step": 300
81
+ },
82
+ {
83
+ "epoch": 1.01,
84
+ "learning_rate": 6.44e-07,
85
+ "loss": 0.6748,
86
+ "step": 325
87
+ },
88
+ {
89
+ "epoch": 1.09,
90
+ "learning_rate": 6.939999999999999e-07,
91
+ "loss": 0.6247,
92
+ "step": 350
93
+ },
94
+ {
95
+ "epoch": 1.17,
96
+ "learning_rate": 7.44e-07,
97
+ "loss": 0.63,
98
+ "step": 375
99
+ },
100
+ {
101
+ "epoch": 1.25,
102
+ "learning_rate": 7.94e-07,
103
+ "loss": 0.5899,
104
+ "step": 400
105
+ },
106
+ {
107
+ "epoch": 1.32,
108
+ "learning_rate": 8.439999999999999e-07,
109
+ "loss": 0.5628,
110
+ "step": 425
111
+ },
112
+ {
113
+ "epoch": 1.4,
114
+ "learning_rate": 8.939999999999999e-07,
115
+ "loss": 0.5634,
116
+ "step": 450
117
+ },
118
+ {
119
+ "epoch": 1.48,
120
+ "learning_rate": 9.439999999999999e-07,
121
+ "loss": 0.5549,
122
+ "step": 475
123
+ },
124
+ {
125
+ "epoch": 1.56,
126
+ "learning_rate": 9.94e-07,
127
+ "loss": 0.5533,
128
+ "step": 500
129
+ },
130
+ {
131
+ "epoch": 1.56,
132
+ "eval_loss": 0.7044046521186829,
133
+ "eval_runtime": 1265.1903,
134
+ "eval_samples_per_second": 8.132,
135
+ "eval_steps_per_second": 0.127,
136
+ "eval_wer": 54.54987091580379,
137
+ "step": 500
138
+ },
139
+ {
140
+ "epoch": 1.63,
141
+ "learning_rate": 9.95111111111111e-07,
142
+ "loss": 0.5329,
143
+ "step": 525
144
+ },
145
+ {
146
+ "epoch": 1.71,
147
+ "learning_rate": 9.895555555555554e-07,
148
+ "loss": 0.5116,
149
+ "step": 550
150
+ },
151
+ {
152
+ "epoch": 1.79,
153
+ "learning_rate": 9.84e-07,
154
+ "loss": 0.511,
155
+ "step": 575
156
+ },
157
+ {
158
+ "epoch": 1.87,
159
+ "learning_rate": 9.784444444444444e-07,
160
+ "loss": 0.4963,
161
+ "step": 600
162
+ },
163
+ {
164
+ "epoch": 1.95,
165
+ "learning_rate": 9.728888888888888e-07,
166
+ "loss": 0.5177,
167
+ "step": 625
168
+ },
169
+ {
170
+ "epoch": 2.02,
171
+ "learning_rate": 9.673333333333332e-07,
172
+ "loss": 0.4947,
173
+ "step": 650
174
+ },
175
+ {
176
+ "epoch": 2.1,
177
+ "learning_rate": 9.617777777777776e-07,
178
+ "loss": 0.4529,
179
+ "step": 675
180
+ },
181
+ {
182
+ "epoch": 2.18,
183
+ "learning_rate": 9.562222222222223e-07,
184
+ "loss": 0.4543,
185
+ "step": 700
186
+ },
187
+ {
188
+ "epoch": 2.26,
189
+ "learning_rate": 9.506666666666667e-07,
190
+ "loss": 0.4478,
191
+ "step": 725
192
+ },
193
+ {
194
+ "epoch": 2.34,
195
+ "learning_rate": 9.451111111111111e-07,
196
+ "loss": 0.4421,
197
+ "step": 750
198
+ },
199
+ {
200
+ "epoch": 2.41,
201
+ "learning_rate": 9.395555555555556e-07,
202
+ "loss": 0.4465,
203
+ "step": 775
204
+ },
205
+ {
206
+ "epoch": 2.49,
207
+ "learning_rate": 9.34e-07,
208
+ "loss": 0.4164,
209
+ "step": 800
210
+ },
211
+ {
212
+ "epoch": 2.57,
213
+ "learning_rate": 9.284444444444444e-07,
214
+ "loss": 0.4291,
215
+ "step": 825
216
+ },
217
+ {
218
+ "epoch": 2.65,
219
+ "learning_rate": 9.228888888888888e-07,
220
+ "loss": 0.4149,
221
+ "step": 850
222
+ },
223
+ {
224
+ "epoch": 2.72,
225
+ "learning_rate": 9.173333333333333e-07,
226
+ "loss": 0.4242,
227
+ "step": 875
228
+ },
229
+ {
230
+ "epoch": 2.8,
231
+ "learning_rate": 9.117777777777778e-07,
232
+ "loss": 0.4329,
233
+ "step": 900
234
+ },
235
+ {
236
+ "epoch": 2.88,
237
+ "learning_rate": 9.062222222222222e-07,
238
+ "loss": 0.4165,
239
+ "step": 925
240
+ },
241
+ {
242
+ "epoch": 2.96,
243
+ "learning_rate": 9.006666666666666e-07,
244
+ "loss": 0.4166,
245
+ "step": 950
246
+ },
247
+ {
248
+ "epoch": 3.04,
249
+ "learning_rate": 8.95111111111111e-07,
250
+ "loss": 0.409,
251
+ "step": 975
252
+ },
253
+ {
254
+ "epoch": 3.12,
255
+ "learning_rate": 8.895555555555555e-07,
256
+ "loss": 0.3951,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 3.12,
261
+ "eval_loss": 0.5893104076385498,
262
+ "eval_runtime": 1326.5175,
263
+ "eval_samples_per_second": 7.756,
264
+ "eval_steps_per_second": 0.121,
265
+ "eval_wer": 47.521028231962916,
266
+ "step": 1000
267
+ },
268
+ {
269
+ "epoch": 3.19,
270
+ "learning_rate": 8.839999999999999e-07,
271
+ "loss": 0.3808,
272
+ "step": 1025
273
+ },
274
+ {
275
+ "epoch": 3.27,
276
+ "learning_rate": 8.784444444444444e-07,
277
+ "loss": 0.3786,
278
+ "step": 1050
279
+ },
280
+ {
281
+ "epoch": 3.35,
282
+ "learning_rate": 8.728888888888889e-07,
283
+ "loss": 0.3811,
284
+ "step": 1075
285
+ },
286
+ {
287
+ "epoch": 3.43,
288
+ "learning_rate": 8.673333333333332e-07,
289
+ "loss": 0.3644,
290
+ "step": 1100
291
+ },
292
+ {
293
+ "epoch": 3.5,
294
+ "learning_rate": 8.617777777777777e-07,
295
+ "loss": 0.3737,
296
+ "step": 1125
297
+ },
298
+ {
299
+ "epoch": 3.58,
300
+ "learning_rate": 8.562222222222222e-07,
301
+ "loss": 0.3698,
302
+ "step": 1150
303
+ },
304
+ {
305
+ "epoch": 3.66,
306
+ "learning_rate": 8.506666666666667e-07,
307
+ "loss": 0.3601,
308
+ "step": 1175
309
+ },
310
+ {
311
+ "epoch": 3.74,
312
+ "learning_rate": 8.451111111111111e-07,
313
+ "loss": 0.3675,
314
+ "step": 1200
315
+ },
316
+ {
317
+ "epoch": 3.81,
318
+ "learning_rate": 8.395555555555556e-07,
319
+ "loss": 0.3588,
320
+ "step": 1225
321
+ },
322
+ {
323
+ "epoch": 3.89,
324
+ "learning_rate": 8.34e-07,
325
+ "loss": 0.3541,
326
+ "step": 1250
327
+ },
328
+ {
329
+ "epoch": 3.97,
330
+ "learning_rate": 8.284444444444444e-07,
331
+ "loss": 0.3537,
332
+ "step": 1275
333
+ },
334
+ {
335
+ "epoch": 4.05,
336
+ "learning_rate": 8.228888888888889e-07,
337
+ "loss": 0.3666,
338
+ "step": 1300
339
+ },
340
+ {
341
+ "epoch": 4.13,
342
+ "learning_rate": 8.173333333333333e-07,
343
+ "loss": 0.3242,
344
+ "step": 1325
345
+ },
346
+ {
347
+ "epoch": 4.21,
348
+ "learning_rate": 8.117777777777778e-07,
349
+ "loss": 0.3212,
350
+ "step": 1350
351
+ },
352
+ {
353
+ "epoch": 4.28,
354
+ "learning_rate": 8.062222222222221e-07,
355
+ "loss": 0.3374,
356
+ "step": 1375
357
+ },
358
+ {
359
+ "epoch": 4.36,
360
+ "learning_rate": 8.006666666666666e-07,
361
+ "loss": 0.3259,
362
+ "step": 1400
363
+ },
364
+ {
365
+ "epoch": 4.44,
366
+ "learning_rate": 7.95111111111111e-07,
367
+ "loss": 0.3361,
368
+ "step": 1425
369
+ },
370
+ {
371
+ "epoch": 4.52,
372
+ "learning_rate": 7.895555555555555e-07,
373
+ "loss": 0.3355,
374
+ "step": 1450
375
+ },
376
+ {
377
+ "epoch": 4.59,
378
+ "learning_rate": 7.84e-07,
379
+ "loss": 0.3254,
380
+ "step": 1475
381
+ },
382
+ {
383
+ "epoch": 4.67,
384
+ "learning_rate": 7.784444444444444e-07,
385
+ "loss": 0.3296,
386
+ "step": 1500
387
+ },
388
+ {
389
+ "epoch": 4.67,
390
+ "eval_loss": 0.5428555011749268,
391
+ "eval_runtime": 1299.6224,
392
+ "eval_samples_per_second": 7.916,
393
+ "eval_steps_per_second": 0.124,
394
+ "eval_wer": 44.64508786053355,
395
+ "step": 1500
396
+ }
397
+ ],
398
+ "max_steps": 5000,
399
+ "num_train_epochs": 16,
400
+ "total_flos": 2.774113755233203e+19,
401
+ "trial_name": null,
402
+ "trial_params": null
403
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda70737a84052aee98fe01ec95a00d27783f83ee9889ee9d6ffa0984826347e
3
+ size 3579
checkpoint-2000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50257
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 50258,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 12,
22
+ "encoder_ffn_dim": 3072,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 50257,
26
+ "forced_decoder_ids": null,
27
+ "init_std": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "max_length": 448,
30
+ "max_source_positions": 1500,
31
+ "max_target_positions": 448,
32
+ "model_type": "whisper",
33
+ "num_hidden_layers": 12,
34
+ "num_mel_bins": 80,
35
+ "pad_token_id": 50257,
36
+ "scale_embedding": false,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.25.1",
39
+ "use_cache": false,
40
+ "vocab_size": 51865
41
+ }
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55615f6bde936bb1e58c190a081c5e2667f693ae46397930b43e29e8d9523962
3
+ size 1934161093
checkpoint-2000/preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c7544da5f411a2cce995c121ea9d8fe855d38a67dd9376d4ff9bdc152e3f95
3
+ size 967102601
checkpoint-2000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b9c72c7eed1fd4c4f798e93fa1ede8521ac20b90fe4cf8875169df39ebe280
3
+ size 14583
checkpoint-2000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8080778388d210607de5b81682e40562c65c649583ce16519fb3b8c62009fc41
3
+ size 14583
checkpoint-2000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073cd8a681f64f906aee66bc4e51e3745829cf0d7a5e40767cb64fd2ad8dcfb2
3
+ size 14519
checkpoint-2000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8de99171336a8003c52583af34523efe031e9e49849784cd4024882cdb3efb
3
+ size 14583
checkpoint-2000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172321c3064ae4a11267185f654f9636260463be1a922f305c5370008f78b7f2
3
+ size 557
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07a414e9d8627cd29b61655f515b44908fdbf1b87394717d76f2608595276d9f
3
+ size 627
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,532 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 42.664408849901456,
3
+ "best_model_checkpoint": "./whisper-small-Persian/checkpoint-2000",
4
+ "epoch": 6.230171073094867,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 4.4e-08,
13
+ "loss": 1.4232,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.16,
18
+ "learning_rate": 9.4e-08,
19
+ "loss": 1.4027,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 1.44e-07,
25
+ "loss": 1.3731,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 1.94e-07,
31
+ "loss": 1.2969,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.39,
36
+ "learning_rate": 2.4399999999999996e-07,
37
+ "loss": 1.1964,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.47,
42
+ "learning_rate": 2.9399999999999996e-07,
43
+ "loss": 1.0995,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.54,
48
+ "learning_rate": 3.4399999999999996e-07,
49
+ "loss": 1.0068,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 0.62,
54
+ "learning_rate": 3.94e-07,
55
+ "loss": 0.8949,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 0.7,
60
+ "learning_rate": 4.44e-07,
61
+ "loss": 0.8521,
62
+ "step": 225
63
+ },
64
+ {
65
+ "epoch": 0.78,
66
+ "learning_rate": 4.94e-07,
67
+ "loss": 0.7694,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 0.86,
72
+ "learning_rate": 5.44e-07,
73
+ "loss": 0.7196,
74
+ "step": 275
75
+ },
76
+ {
77
+ "epoch": 0.93,
78
+ "learning_rate": 5.939999999999999e-07,
79
+ "loss": 0.6902,
80
+ "step": 300
81
+ },
82
+ {
83
+ "epoch": 1.01,
84
+ "learning_rate": 6.44e-07,
85
+ "loss": 0.6748,
86
+ "step": 325
87
+ },
88
+ {
89
+ "epoch": 1.09,
90
+ "learning_rate": 6.939999999999999e-07,
91
+ "loss": 0.6247,
92
+ "step": 350
93
+ },
94
+ {
95
+ "epoch": 1.17,
96
+ "learning_rate": 7.44e-07,
97
+ "loss": 0.63,
98
+ "step": 375
99
+ },
100
+ {
101
+ "epoch": 1.25,
102
+ "learning_rate": 7.94e-07,
103
+ "loss": 0.5899,
104
+ "step": 400
105
+ },
106
+ {
107
+ "epoch": 1.32,
108
+ "learning_rate": 8.439999999999999e-07,
109
+ "loss": 0.5628,
110
+ "step": 425
111
+ },
112
+ {
113
+ "epoch": 1.4,
114
+ "learning_rate": 8.939999999999999e-07,
115
+ "loss": 0.5634,
116
+ "step": 450
117
+ },
118
+ {
119
+ "epoch": 1.48,
120
+ "learning_rate": 9.439999999999999e-07,
121
+ "loss": 0.5549,
122
+ "step": 475
123
+ },
124
+ {
125
+ "epoch": 1.56,
126
+ "learning_rate": 9.94e-07,
127
+ "loss": 0.5533,
128
+ "step": 500
129
+ },
130
+ {
131
+ "epoch": 1.56,
132
+ "eval_loss": 0.7044046521186829,
133
+ "eval_runtime": 1265.1903,
134
+ "eval_samples_per_second": 8.132,
135
+ "eval_steps_per_second": 0.127,
136
+ "eval_wer": 54.54987091580379,
137
+ "step": 500
138
+ },
139
+ {
140
+ "epoch": 1.63,
141
+ "learning_rate": 9.95111111111111e-07,
142
+ "loss": 0.5329,
143
+ "step": 525
144
+ },
145
+ {
146
+ "epoch": 1.71,
147
+ "learning_rate": 9.895555555555554e-07,
148
+ "loss": 0.5116,
149
+ "step": 550
150
+ },
151
+ {
152
+ "epoch": 1.79,
153
+ "learning_rate": 9.84e-07,
154
+ "loss": 0.511,
155
+ "step": 575
156
+ },
157
+ {
158
+ "epoch": 1.87,
159
+ "learning_rate": 9.784444444444444e-07,
160
+ "loss": 0.4963,
161
+ "step": 600
162
+ },
163
+ {
164
+ "epoch": 1.95,
165
+ "learning_rate": 9.728888888888888e-07,
166
+ "loss": 0.5177,
167
+ "step": 625
168
+ },
169
+ {
170
+ "epoch": 2.02,
171
+ "learning_rate": 9.673333333333332e-07,
172
+ "loss": 0.4947,
173
+ "step": 650
174
+ },
175
+ {
176
+ "epoch": 2.1,
177
+ "learning_rate": 9.617777777777776e-07,
178
+ "loss": 0.4529,
179
+ "step": 675
180
+ },
181
+ {
182
+ "epoch": 2.18,
183
+ "learning_rate": 9.562222222222223e-07,
184
+ "loss": 0.4543,
185
+ "step": 700
186
+ },
187
+ {
188
+ "epoch": 2.26,
189
+ "learning_rate": 9.506666666666667e-07,
190
+ "loss": 0.4478,
191
+ "step": 725
192
+ },
193
+ {
194
+ "epoch": 2.34,
195
+ "learning_rate": 9.451111111111111e-07,
196
+ "loss": 0.4421,
197
+ "step": 750
198
+ },
199
+ {
200
+ "epoch": 2.41,
201
+ "learning_rate": 9.395555555555556e-07,
202
+ "loss": 0.4465,
203
+ "step": 775
204
+ },
205
+ {
206
+ "epoch": 2.49,
207
+ "learning_rate": 9.34e-07,
208
+ "loss": 0.4164,
209
+ "step": 800
210
+ },
211
+ {
212
+ "epoch": 2.57,
213
+ "learning_rate": 9.284444444444444e-07,
214
+ "loss": 0.4291,
215
+ "step": 825
216
+ },
217
+ {
218
+ "epoch": 2.65,
219
+ "learning_rate": 9.228888888888888e-07,
220
+ "loss": 0.4149,
221
+ "step": 850
222
+ },
223
+ {
224
+ "epoch": 2.72,
225
+ "learning_rate": 9.173333333333333e-07,
226
+ "loss": 0.4242,
227
+ "step": 875
228
+ },
229
+ {
230
+ "epoch": 2.8,
231
+ "learning_rate": 9.117777777777778e-07,
232
+ "loss": 0.4329,
233
+ "step": 900
234
+ },
235
+ {
236
+ "epoch": 2.88,
237
+ "learning_rate": 9.062222222222222e-07,
238
+ "loss": 0.4165,
239
+ "step": 925
240
+ },
241
+ {
242
+ "epoch": 2.96,
243
+ "learning_rate": 9.006666666666666e-07,
244
+ "loss": 0.4166,
245
+ "step": 950
246
+ },
247
+ {
248
+ "epoch": 3.04,
249
+ "learning_rate": 8.95111111111111e-07,
250
+ "loss": 0.409,
251
+ "step": 975
252
+ },
253
+ {
254
+ "epoch": 3.12,
255
+ "learning_rate": 8.895555555555555e-07,
256
+ "loss": 0.3951,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 3.12,
261
+ "eval_loss": 0.5893104076385498,
262
+ "eval_runtime": 1326.5175,
263
+ "eval_samples_per_second": 7.756,
264
+ "eval_steps_per_second": 0.121,
265
+ "eval_wer": 47.521028231962916,
266
+ "step": 1000
267
+ },
268
+ {
269
+ "epoch": 3.19,
270
+ "learning_rate": 8.839999999999999e-07,
271
+ "loss": 0.3808,
272
+ "step": 1025
273
+ },
274
+ {
275
+ "epoch": 3.27,
276
+ "learning_rate": 8.784444444444444e-07,
277
+ "loss": 0.3786,
278
+ "step": 1050
279
+ },
280
+ {
281
+ "epoch": 3.35,
282
+ "learning_rate": 8.728888888888889e-07,
283
+ "loss": 0.3811,
284
+ "step": 1075
285
+ },
286
+ {
287
+ "epoch": 3.43,
288
+ "learning_rate": 8.673333333333332e-07,
289
+ "loss": 0.3644,
290
+ "step": 1100
291
+ },
292
+ {
293
+ "epoch": 3.5,
294
+ "learning_rate": 8.617777777777777e-07,
295
+ "loss": 0.3737,
296
+ "step": 1125
297
+ },
298
+ {
299
+ "epoch": 3.58,
300
+ "learning_rate": 8.562222222222222e-07,
301
+ "loss": 0.3698,
302
+ "step": 1150
303
+ },
304
+ {
305
+ "epoch": 3.66,
306
+ "learning_rate": 8.506666666666667e-07,
307
+ "loss": 0.3601,
308
+ "step": 1175
309
+ },
310
+ {
311
+ "epoch": 3.74,
312
+ "learning_rate": 8.451111111111111e-07,
313
+ "loss": 0.3675,
314
+ "step": 1200
315
+ },
316
+ {
317
+ "epoch": 3.81,
318
+ "learning_rate": 8.395555555555556e-07,
319
+ "loss": 0.3588,
320
+ "step": 1225
321
+ },
322
+ {
323
+ "epoch": 3.89,
324
+ "learning_rate": 8.34e-07,
325
+ "loss": 0.3541,
326
+ "step": 1250
327
+ },
328
+ {
329
+ "epoch": 3.97,
330
+ "learning_rate": 8.284444444444444e-07,
331
+ "loss": 0.3537,
332
+ "step": 1275
333
+ },
334
+ {
335
+ "epoch": 4.05,
336
+ "learning_rate": 8.228888888888889e-07,
337
+ "loss": 0.3666,
338
+ "step": 1300
339
+ },
340
+ {
341
+ "epoch": 4.13,
342
+ "learning_rate": 8.173333333333333e-07,
343
+ "loss": 0.3242,
344
+ "step": 1325
345
+ },
346
+ {
347
+ "epoch": 4.21,
348
+ "learning_rate": 8.117777777777778e-07,
349
+ "loss": 0.3212,
350
+ "step": 1350
351
+ },
352
+ {
353
+ "epoch": 4.28,
354
+ "learning_rate": 8.062222222222221e-07,
355
+ "loss": 0.3374,
356
+ "step": 1375
357
+ },
358
+ {
359
+ "epoch": 4.36,
360
+ "learning_rate": 8.006666666666666e-07,
361
+ "loss": 0.3259,
362
+ "step": 1400
363
+ },
364
+ {
365
+ "epoch": 4.44,
366
+ "learning_rate": 7.95111111111111e-07,
367
+ "loss": 0.3361,
368
+ "step": 1425
369
+ },
370
+ {
371
+ "epoch": 4.52,
372
+ "learning_rate": 7.895555555555555e-07,
373
+ "loss": 0.3355,
374
+ "step": 1450
375
+ },
376
+ {
377
+ "epoch": 4.59,
378
+ "learning_rate": 7.84e-07,
379
+ "loss": 0.3254,
380
+ "step": 1475
381
+ },
382
+ {
383
+ "epoch": 4.67,
384
+ "learning_rate": 7.784444444444444e-07,
385
+ "loss": 0.3296,
386
+ "step": 1500
387
+ },
388
+ {
389
+ "epoch": 4.67,
390
+ "eval_loss": 0.5428555011749268,
391
+ "eval_runtime": 1299.6224,
392
+ "eval_samples_per_second": 7.916,
393
+ "eval_steps_per_second": 0.124,
394
+ "eval_wer": 44.64508786053355,
395
+ "step": 1500
396
+ },
397
+ {
398
+ "epoch": 4.75,
399
+ "learning_rate": 7.728888888888888e-07,
400
+ "loss": 0.3229,
401
+ "step": 1525
402
+ },
403
+ {
404
+ "epoch": 4.83,
405
+ "learning_rate": 7.673333333333332e-07,
406
+ "loss": 0.3372,
407
+ "step": 1550
408
+ },
409
+ {
410
+ "epoch": 4.91,
411
+ "learning_rate": 7.617777777777778e-07,
412
+ "loss": 0.3297,
413
+ "step": 1575
414
+ },
415
+ {
416
+ "epoch": 4.98,
417
+ "learning_rate": 7.562222222222222e-07,
418
+ "loss": 0.3239,
419
+ "step": 1600
420
+ },
421
+ {
422
+ "epoch": 5.06,
423
+ "learning_rate": 7.506666666666667e-07,
424
+ "loss": 0.3113,
425
+ "step": 1625
426
+ },
427
+ {
428
+ "epoch": 5.14,
429
+ "learning_rate": 7.451111111111111e-07,
430
+ "loss": 0.3142,
431
+ "step": 1650
432
+ },
433
+ {
434
+ "epoch": 5.22,
435
+ "learning_rate": 7.395555555555555e-07,
436
+ "loss": 0.303,
437
+ "step": 1675
438
+ },
439
+ {
440
+ "epoch": 5.3,
441
+ "learning_rate": 7.34e-07,
442
+ "loss": 0.2973,
443
+ "step": 1700
444
+ },
445
+ {
446
+ "epoch": 5.37,
447
+ "learning_rate": 7.284444444444444e-07,
448
+ "loss": 0.2998,
449
+ "step": 1725
450
+ },
451
+ {
452
+ "epoch": 5.45,
453
+ "learning_rate": 7.228888888888889e-07,
454
+ "loss": 0.2962,
455
+ "step": 1750
456
+ },
457
+ {
458
+ "epoch": 5.53,
459
+ "learning_rate": 7.173333333333333e-07,
460
+ "loss": 0.3007,
461
+ "step": 1775
462
+ },
463
+ {
464
+ "epoch": 5.61,
465
+ "learning_rate": 7.117777777777777e-07,
466
+ "loss": 0.2906,
467
+ "step": 1800
468
+ },
469
+ {
470
+ "epoch": 5.68,
471
+ "learning_rate": 7.062222222222222e-07,
472
+ "loss": 0.295,
473
+ "step": 1825
474
+ },
475
+ {
476
+ "epoch": 5.76,
477
+ "learning_rate": 7.006666666666666e-07,
478
+ "loss": 0.3083,
479
+ "step": 1850
480
+ },
481
+ {
482
+ "epoch": 5.84,
483
+ "learning_rate": 6.951111111111111e-07,
484
+ "loss": 0.2869,
485
+ "step": 1875
486
+ },
487
+ {
488
+ "epoch": 5.92,
489
+ "learning_rate": 6.895555555555555e-07,
490
+ "loss": 0.2936,
491
+ "step": 1900
492
+ },
493
+ {
494
+ "epoch": 6.0,
495
+ "learning_rate": 6.84e-07,
496
+ "loss": 0.2982,
497
+ "step": 1925
498
+ },
499
+ {
500
+ "epoch": 6.07,
501
+ "learning_rate": 6.784444444444443e-07,
502
+ "loss": 0.2885,
503
+ "step": 1950
504
+ },
505
+ {
506
+ "epoch": 6.15,
507
+ "learning_rate": 6.728888888888888e-07,
508
+ "loss": 0.2719,
509
+ "step": 1975
510
+ },
511
+ {
512
+ "epoch": 6.23,
513
+ "learning_rate": 6.673333333333334e-07,
514
+ "loss": 0.2662,
515
+ "step": 2000
516
+ },
517
+ {
518
+ "epoch": 6.23,
519
+ "eval_loss": 0.5223153829574585,
520
+ "eval_runtime": 1347.3336,
521
+ "eval_samples_per_second": 7.636,
522
+ "eval_steps_per_second": 0.119,
523
+ "eval_wer": 42.664408849901456,
524
+ "step": 2000
525
+ }
526
+ ],
527
+ "max_steps": 5000,
528
+ "num_train_epochs": 16,
529
+ "total_flos": 3.699433988940418e+19,
530
+ "trial_name": null,
531
+ "trial_params": null
532
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda70737a84052aee98fe01ec95a00d27783f83ee9889ee9d6ffa0984826347e
3
+ size 3579
checkpoint-2500/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50257
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 50258,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 12,
22
+ "encoder_ffn_dim": 3072,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 50257,
26
+ "forced_decoder_ids": null,
27
+ "init_std": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "max_length": 448,
30
+ "max_source_positions": 1500,
31
+ "max_target_positions": 448,
32
+ "model_type": "whisper",
33
+ "num_hidden_layers": 12,
34
+ "num_mel_bins": 80,
35
+ "pad_token_id": 50257,
36
+ "scale_embedding": false,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.25.1",
39
+ "use_cache": false,
40
+ "vocab_size": 51865
41
+ }
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1bac388e9ccb5c85426c16252364dc258a48aebb84a068f2b63513a42ffb4a
3
+ size 1934161093
checkpoint-2500/preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e637219ffc3358d4ca0ea19cad9ea7f996d874a2c7b6a52a1e706fa59302e3
3
+ size 967102601
checkpoint-2500/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d582dbdb4c1e2b98c61b22baf74e479d8d8e8a93b2151bd6293afe2786d2f8
3
+ size 14583
checkpoint-2500/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07f727622efb4a24b1cfc5b9f0ab5af9a6aadd797759821cf175f71466f919b
3
+ size 14647
checkpoint-2500/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105882858c82a984658b5d4f76b92652efc3df674f31929b33a1fae0cf114417
3
+ size 14583
checkpoint-2500/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7037146f98830f9a234ab8852ec5401c0cc6283a54ab1a65f2a9bd9059d87a43
3
+ size 14583
checkpoint-2500/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf730497bda1dd7d810be24acd7ff911a283f9988fb87926ddea4a8506d9615
3
+ size 557
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a77720265737918b69cda81377c78110d51cb0982308e66f42db7acc9269820
3
+ size 627
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 41.53041112622491,
3
+ "best_model_checkpoint": "./whisper-small-Persian/checkpoint-2500",
4
+ "epoch": 7.786936236391913,
5
+ "global_step": 2500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 4.4e-08,
13
+ "loss": 1.4232,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.16,
18
+ "learning_rate": 9.4e-08,
19
+ "loss": 1.4027,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 1.44e-07,
25
+ "loss": 1.3731,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 1.94e-07,
31
+ "loss": 1.2969,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.39,
36
+ "learning_rate": 2.4399999999999996e-07,
37
+ "loss": 1.1964,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.47,
42
+ "learning_rate": 2.9399999999999996e-07,
43
+ "loss": 1.0995,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.54,
48
+ "learning_rate": 3.4399999999999996e-07,
49
+ "loss": 1.0068,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 0.62,
54
+ "learning_rate": 3.94e-07,
55
+ "loss": 0.8949,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 0.7,
60
+ "learning_rate": 4.44e-07,
61
+ "loss": 0.8521,
62
+ "step": 225
63
+ },
64
+ {
65
+ "epoch": 0.78,
66
+ "learning_rate": 4.94e-07,
67
+ "loss": 0.7694,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 0.86,
72
+ "learning_rate": 5.44e-07,
73
+ "loss": 0.7196,
74
+ "step": 275
75
+ },
76
+ {
77
+ "epoch": 0.93,
78
+ "learning_rate": 5.939999999999999e-07,
79
+ "loss": 0.6902,
80
+ "step": 300
81
+ },
82
+ {
83
+ "epoch": 1.01,
84
+ "learning_rate": 6.44e-07,
85
+ "loss": 0.6748,
86
+ "step": 325
87
+ },
88
+ {
89
+ "epoch": 1.09,
90
+ "learning_rate": 6.939999999999999e-07,
91
+ "loss": 0.6247,
92
+ "step": 350
93
+ },
94
+ {
95
+ "epoch": 1.17,
96
+ "learning_rate": 7.44e-07,
97
+ "loss": 0.63,
98
+ "step": 375
99
+ },
100
+ {
101
+ "epoch": 1.25,
102
+ "learning_rate": 7.94e-07,
103
+ "loss": 0.5899,
104
+ "step": 400
105
+ },
106
+ {
107
+ "epoch": 1.32,
108
+ "learning_rate": 8.439999999999999e-07,
109
+ "loss": 0.5628,
110
+ "step": 425
111
+ },
112
+ {
113
+ "epoch": 1.4,
114
+ "learning_rate": 8.939999999999999e-07,
115
+ "loss": 0.5634,
116
+ "step": 450
117
+ },
118
+ {
119
+ "epoch": 1.48,
120
+ "learning_rate": 9.439999999999999e-07,
121
+ "loss": 0.5549,
122
+ "step": 475
123
+ },
124
+ {
125
+ "epoch": 1.56,
126
+ "learning_rate": 9.94e-07,
127
+ "loss": 0.5533,
128
+ "step": 500
129
+ },
130
+ {
131
+ "epoch": 1.56,
132
+ "eval_loss": 0.7044046521186829,
133
+ "eval_runtime": 1265.1903,
134
+ "eval_samples_per_second": 8.132,
135
+ "eval_steps_per_second": 0.127,
136
+ "eval_wer": 54.54987091580379,
137
+ "step": 500
138
+ },
139
+ {
140
+ "epoch": 1.63,
141
+ "learning_rate": 9.95111111111111e-07,
142
+ "loss": 0.5329,
143
+ "step": 525
144
+ },
145
+ {
146
+ "epoch": 1.71,
147
+ "learning_rate": 9.895555555555554e-07,
148
+ "loss": 0.5116,
149
+ "step": 550
150
+ },
151
+ {
152
+ "epoch": 1.79,
153
+ "learning_rate": 9.84e-07,
154
+ "loss": 0.511,
155
+ "step": 575
156
+ },
157
+ {
158
+ "epoch": 1.87,
159
+ "learning_rate": 9.784444444444444e-07,
160
+ "loss": 0.4963,
161
+ "step": 600
162
+ },
163
+ {
164
+ "epoch": 1.95,
165
+ "learning_rate": 9.728888888888888e-07,
166
+ "loss": 0.5177,
167
+ "step": 625
168
+ },
169
+ {
170
+ "epoch": 2.02,
171
+ "learning_rate": 9.673333333333332e-07,
172
+ "loss": 0.4947,
173
+ "step": 650
174
+ },
175
+ {
176
+ "epoch": 2.1,
177
+ "learning_rate": 9.617777777777776e-07,
178
+ "loss": 0.4529,
179
+ "step": 675
180
+ },
181
+ {
182
+ "epoch": 2.18,
183
+ "learning_rate": 9.562222222222223e-07,
184
+ "loss": 0.4543,
185
+ "step": 700
186
+ },
187
+ {
188
+ "epoch": 2.26,
189
+ "learning_rate": 9.506666666666667e-07,
190
+ "loss": 0.4478,
191
+ "step": 725
192
+ },
193
+ {
194
+ "epoch": 2.34,
195
+ "learning_rate": 9.451111111111111e-07,
196
+ "loss": 0.4421,
197
+ "step": 750
198
+ },
199
+ {
200
+ "epoch": 2.41,
201
+ "learning_rate": 9.395555555555556e-07,
202
+ "loss": 0.4465,
203
+ "step": 775
204
+ },
205
+ {
206
+ "epoch": 2.49,
207
+ "learning_rate": 9.34e-07,
208
+ "loss": 0.4164,
209
+ "step": 800
210
+ },
211
+ {
212
+ "epoch": 2.57,
213
+ "learning_rate": 9.284444444444444e-07,
214
+ "loss": 0.4291,
215
+ "step": 825
216
+ },
217
+ {
218
+ "epoch": 2.65,
219
+ "learning_rate": 9.228888888888888e-07,
220
+ "loss": 0.4149,
221
+ "step": 850
222
+ },
223
+ {
224
+ "epoch": 2.72,
225
+ "learning_rate": 9.173333333333333e-07,
226
+ "loss": 0.4242,
227
+ "step": 875
228
+ },
229
+ {
230
+ "epoch": 2.8,
231
+ "learning_rate": 9.117777777777778e-07,
232
+ "loss": 0.4329,
233
+ "step": 900
234
+ },
235
+ {
236
+ "epoch": 2.88,
237
+ "learning_rate": 9.062222222222222e-07,
238
+ "loss": 0.4165,
239
+ "step": 925
240
+ },
241
+ {
242
+ "epoch": 2.96,
243
+ "learning_rate": 9.006666666666666e-07,
244
+ "loss": 0.4166,
245
+ "step": 950
246
+ },
247
+ {
248
+ "epoch": 3.04,
249
+ "learning_rate": 8.95111111111111e-07,
250
+ "loss": 0.409,
251
+ "step": 975
252
+ },
253
+ {
254
+ "epoch": 3.12,
255
+ "learning_rate": 8.895555555555555e-07,
256
+ "loss": 0.3951,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 3.12,
261
+ "eval_loss": 0.5893104076385498,
262
+ "eval_runtime": 1326.5175,
263
+ "eval_samples_per_second": 7.756,
264
+ "eval_steps_per_second": 0.121,
265
+ "eval_wer": 47.521028231962916,
266
+ "step": 1000
267
+ },
268
+ {
269
+ "epoch": 3.19,
270
+ "learning_rate": 8.839999999999999e-07,
271
+ "loss": 0.3808,
272
+ "step": 1025
273
+ },
274
+ {
275
+ "epoch": 3.27,
276
+ "learning_rate": 8.784444444444444e-07,
277
+ "loss": 0.3786,
278
+ "step": 1050
279
+ },
280
+ {
281
+ "epoch": 3.35,
282
+ "learning_rate": 8.728888888888889e-07,
283
+ "loss": 0.3811,
284
+ "step": 1075
285
+ },
286
+ {
287
+ "epoch": 3.43,
288
+ "learning_rate": 8.673333333333332e-07,
289
+ "loss": 0.3644,
290
+ "step": 1100
291
+ },
292
+ {
293
+ "epoch": 3.5,
294
+ "learning_rate": 8.617777777777777e-07,
295
+ "loss": 0.3737,
296
+ "step": 1125
297
+ },
298
+ {
299
+ "epoch": 3.58,
300
+ "learning_rate": 8.562222222222222e-07,
301
+ "loss": 0.3698,
302
+ "step": 1150
303
+ },
304
+ {
305
+ "epoch": 3.66,
306
+ "learning_rate": 8.506666666666667e-07,
307
+ "loss": 0.3601,
308
+ "step": 1175
309
+ },
310
+ {
311
+ "epoch": 3.74,
312
+ "learning_rate": 8.451111111111111e-07,
313
+ "loss": 0.3675,
314
+ "step": 1200
315
+ },
316
+ {
317
+ "epoch": 3.81,
318
+ "learning_rate": 8.395555555555556e-07,
319
+ "loss": 0.3588,
320
+ "step": 1225
321
+ },
322
+ {
323
+ "epoch": 3.89,
324
+ "learning_rate": 8.34e-07,
325
+ "loss": 0.3541,
326
+ "step": 1250
327
+ },
328
+ {
329
+ "epoch": 3.97,
330
+ "learning_rate": 8.284444444444444e-07,
331
+ "loss": 0.3537,
332
+ "step": 1275
333
+ },
334
+ {
335
+ "epoch": 4.05,
336
+ "learning_rate": 8.228888888888889e-07,
337
+ "loss": 0.3666,
338
+ "step": 1300
339
+ },
340
+ {
341
+ "epoch": 4.13,
342
+ "learning_rate": 8.173333333333333e-07,
343
+ "loss": 0.3242,
344
+ "step": 1325
345
+ },
346
+ {
347
+ "epoch": 4.21,
348
+ "learning_rate": 8.117777777777778e-07,
349
+ "loss": 0.3212,
350
+ "step": 1350
351
+ },
352
+ {
353
+ "epoch": 4.28,
354
+ "learning_rate": 8.062222222222221e-07,
355
+ "loss": 0.3374,
356
+ "step": 1375
357
+ },
358
+ {
359
+ "epoch": 4.36,
360
+ "learning_rate": 8.006666666666666e-07,
361
+ "loss": 0.3259,
362
+ "step": 1400
363
+ },
364
+ {
365
+ "epoch": 4.44,
366
+ "learning_rate": 7.95111111111111e-07,
367
+ "loss": 0.3361,
368
+ "step": 1425
369
+ },
370
+ {
371
+ "epoch": 4.52,
372
+ "learning_rate": 7.895555555555555e-07,
373
+ "loss": 0.3355,
374
+ "step": 1450
375
+ },
376
+ {
377
+ "epoch": 4.59,
378
+ "learning_rate": 7.84e-07,
379
+ "loss": 0.3254,
380
+ "step": 1475
381
+ },
382
+ {
383
+ "epoch": 4.67,
384
+ "learning_rate": 7.784444444444444e-07,
385
+ "loss": 0.3296,
386
+ "step": 1500
387
+ },
388
+ {
389
+ "epoch": 4.67,
390
+ "eval_loss": 0.5428555011749268,
391
+ "eval_runtime": 1299.6224,
392
+ "eval_samples_per_second": 7.916,
393
+ "eval_steps_per_second": 0.124,
394
+ "eval_wer": 44.64508786053355,
395
+ "step": 1500
396
+ },
397
+ {
398
+ "epoch": 4.75,
399
+ "learning_rate": 7.728888888888888e-07,
400
+ "loss": 0.3229,
401
+ "step": 1525
402
+ },
403
+ {
404
+ "epoch": 4.83,
405
+ "learning_rate": 7.673333333333332e-07,
406
+ "loss": 0.3372,
407
+ "step": 1550
408
+ },
409
+ {
410
+ "epoch": 4.91,
411
+ "learning_rate": 7.617777777777778e-07,
412
+ "loss": 0.3297,
413
+ "step": 1575
414
+ },
415
+ {
416
+ "epoch": 4.98,
417
+ "learning_rate": 7.562222222222222e-07,
418
+ "loss": 0.3239,
419
+ "step": 1600
420
+ },
421
+ {
422
+ "epoch": 5.06,
423
+ "learning_rate": 7.506666666666667e-07,
424
+ "loss": 0.3113,
425
+ "step": 1625
426
+ },
427
+ {
428
+ "epoch": 5.14,
429
+ "learning_rate": 7.451111111111111e-07,
430
+ "loss": 0.3142,
431
+ "step": 1650
432
+ },
433
+ {
434
+ "epoch": 5.22,
435
+ "learning_rate": 7.395555555555555e-07,
436
+ "loss": 0.303,
437
+ "step": 1675
438
+ },
439
+ {
440
+ "epoch": 5.3,
441
+ "learning_rate": 7.34e-07,
442
+ "loss": 0.2973,
443
+ "step": 1700
444
+ },
445
+ {
446
+ "epoch": 5.37,
447
+ "learning_rate": 7.284444444444444e-07,
448
+ "loss": 0.2998,
449
+ "step": 1725
450
+ },
451
+ {
452
+ "epoch": 5.45,
453
+ "learning_rate": 7.228888888888889e-07,
454
+ "loss": 0.2962,
455
+ "step": 1750
456
+ },
457
+ {
458
+ "epoch": 5.53,
459
+ "learning_rate": 7.173333333333333e-07,
460
+ "loss": 0.3007,
461
+ "step": 1775
462
+ },
463
+ {
464
+ "epoch": 5.61,
465
+ "learning_rate": 7.117777777777777e-07,
466
+ "loss": 0.2906,
467
+ "step": 1800
468
+ },
469
+ {
470
+ "epoch": 5.68,
471
+ "learning_rate": 7.062222222222222e-07,
472
+ "loss": 0.295,
473
+ "step": 1825
474
+ },
475
+ {
476
+ "epoch": 5.76,
477
+ "learning_rate": 7.006666666666666e-07,
478
+ "loss": 0.3083,
479
+ "step": 1850
480
+ },
481
+ {
482
+ "epoch": 5.84,
483
+ "learning_rate": 6.951111111111111e-07,
484
+ "loss": 0.2869,
485
+ "step": 1875
486
+ },
487
+ {
488
+ "epoch": 5.92,
489
+ "learning_rate": 6.895555555555555e-07,
490
+ "loss": 0.2936,
491
+ "step": 1900
492
+ },
493
+ {
494
+ "epoch": 6.0,
495
+ "learning_rate": 6.84e-07,
496
+ "loss": 0.2982,
497
+ "step": 1925
498
+ },
499
+ {
500
+ "epoch": 6.07,
501
+ "learning_rate": 6.784444444444443e-07,
502
+ "loss": 0.2885,
503
+ "step": 1950
504
+ },
505
+ {
506
+ "epoch": 6.15,
507
+ "learning_rate": 6.728888888888888e-07,
508
+ "loss": 0.2719,
509
+ "step": 1975
510
+ },
511
+ {
512
+ "epoch": 6.23,
513
+ "learning_rate": 6.673333333333334e-07,
514
+ "loss": 0.2662,
515
+ "step": 2000
516
+ },
517
+ {
518
+ "epoch": 6.23,
519
+ "eval_loss": 0.5223153829574585,
520
+ "eval_runtime": 1347.3336,
521
+ "eval_samples_per_second": 7.636,
522
+ "eval_steps_per_second": 0.119,
523
+ "eval_wer": 42.664408849901456,
524
+ "step": 2000
525
+ },
526
+ {
527
+ "epoch": 6.31,
528
+ "learning_rate": 6.617777777777778e-07,
529
+ "loss": 0.2739,
530
+ "step": 2025
531
+ },
532
+ {
533
+ "epoch": 6.39,
534
+ "learning_rate": 6.562222222222223e-07,
535
+ "loss": 0.2694,
536
+ "step": 2050
537
+ },
538
+ {
539
+ "epoch": 6.46,
540
+ "learning_rate": 6.506666666666666e-07,
541
+ "loss": 0.2678,
542
+ "step": 2075
543
+ },
544
+ {
545
+ "epoch": 6.54,
546
+ "learning_rate": 6.451111111111111e-07,
547
+ "loss": 0.2716,
548
+ "step": 2100
549
+ },
550
+ {
551
+ "epoch": 6.62,
552
+ "learning_rate": 6.395555555555555e-07,
553
+ "loss": 0.27,
554
+ "step": 2125
555
+ },
556
+ {
557
+ "epoch": 6.7,
558
+ "learning_rate": 6.34e-07,
559
+ "loss": 0.2715,
560
+ "step": 2150
561
+ },
562
+ {
563
+ "epoch": 6.77,
564
+ "learning_rate": 6.284444444444445e-07,
565
+ "loss": 0.2705,
566
+ "step": 2175
567
+ },
568
+ {
569
+ "epoch": 6.85,
570
+ "learning_rate": 6.228888888888889e-07,
571
+ "loss": 0.2771,
572
+ "step": 2200
573
+ },
574
+ {
575
+ "epoch": 6.93,
576
+ "learning_rate": 6.173333333333333e-07,
577
+ "loss": 0.2814,
578
+ "step": 2225
579
+ },
580
+ {
581
+ "epoch": 7.01,
582
+ "learning_rate": 6.117777777777777e-07,
583
+ "loss": 0.302,
584
+ "step": 2250
585
+ },
586
+ {
587
+ "epoch": 7.09,
588
+ "learning_rate": 6.062222222222222e-07,
589
+ "loss": 0.2619,
590
+ "step": 2275
591
+ },
592
+ {
593
+ "epoch": 7.16,
594
+ "learning_rate": 6.006666666666666e-07,
595
+ "loss": 0.2561,
596
+ "step": 2300
597
+ },
598
+ {
599
+ "epoch": 7.24,
600
+ "learning_rate": 5.951111111111111e-07,
601
+ "loss": 0.2559,
602
+ "step": 2325
603
+ },
604
+ {
605
+ "epoch": 7.32,
606
+ "learning_rate": 5.895555555555555e-07,
607
+ "loss": 0.2524,
608
+ "step": 2350
609
+ },
610
+ {
611
+ "epoch": 7.4,
612
+ "learning_rate": 5.839999999999999e-07,
613
+ "loss": 0.2583,
614
+ "step": 2375
615
+ },
616
+ {
617
+ "epoch": 7.48,
618
+ "learning_rate": 5.784444444444444e-07,
619
+ "loss": 0.2434,
620
+ "step": 2400
621
+ },
622
+ {
623
+ "epoch": 7.55,
624
+ "learning_rate": 5.728888888888888e-07,
625
+ "loss": 0.2546,
626
+ "step": 2425
627
+ },
628
+ {
629
+ "epoch": 7.63,
630
+ "learning_rate": 5.673333333333334e-07,
631
+ "loss": 0.2492,
632
+ "step": 2450
633
+ },
634
+ {
635
+ "epoch": 7.71,
636
+ "learning_rate": 5.617777777777778e-07,
637
+ "loss": 0.2544,
638
+ "step": 2475
639
+ },
640
+ {
641
+ "epoch": 7.79,
642
+ "learning_rate": 5.562222222222222e-07,
643
+ "loss": 0.2535,
644
+ "step": 2500
645
+ },
646
+ {
647
+ "epoch": 7.79,
648
+ "eval_loss": 0.5044606924057007,
649
+ "eval_runtime": 1321.646,
650
+ "eval_samples_per_second": 7.784,
651
+ "eval_steps_per_second": 0.122,
652
+ "eval_wer": 41.53041112622491,
653
+ "step": 2500
654
+ }
655
+ ],
656
+ "max_steps": 5000,
657
+ "num_train_epochs": 16,
658
+ "total_flos": 4.623830749703412e+19,
659
+ "trial_name": null,
660
+ "trial_params": null
661
+ }