alicebz commited on
Commit
47ea28a
1 Parent(s): e81d92d

Upload 7 files

Browse files
Files changed (7) hide show
  1. config.json +116 -0
  2. preprocessor_config.json +10 -0
  3. rng_state.pth +3 -0
  4. scheduler.pt +3 -0
  5. trainer_state.json +2057 -0
  6. training_args.bin +3 -0
  7. vocab.json +1 -0
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 768,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": false,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 26,
89
+ "proj_codevector_dim": 768,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.41.1",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 29,
115
+ "xvector_output_dim": 512
116
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6d8be337d23ba212f987b1383487c3e673290d8ec4a9a819f5aab5b08628c5
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2ea29afe0c11f282dd386c9043289e8b142c9be1c58f45cdd17f545ae4647f
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,2057 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.923943661971831,
3
+ "best_model_checkpoint": "./ssw-finetune/checkpoint-1150",
4
+ "epoch": 115.0,
5
+ "eval_steps": 25,
6
+ "global_step": 1150,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5,
13
+ "grad_norm": 5.561491012573242,
14
+ "learning_rate": 2.9999999999999997e-06,
15
+ "loss": 7.7799,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "grad_norm": 4.18166971206665,
21
+ "learning_rate": 6.749999999999999e-06,
22
+ "loss": 7.4713,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 1.5,
27
+ "grad_norm": 6.806884288787842,
28
+ "learning_rate": 1.05e-05,
29
+ "loss": 7.696,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "grad_norm": NaN,
35
+ "learning_rate": 1.3499999999999998e-05,
36
+ "loss": 7.9462,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 2.5,
41
+ "grad_norm": NaN,
42
+ "learning_rate": 1.6499999999999998e-05,
43
+ "loss": 8.0165,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 2.5,
48
+ "eval_loss": 7.467132091522217,
49
+ "eval_runtime": 1.054,
50
+ "eval_samples_per_second": 22.77,
51
+ "eval_steps_per_second": 0.949,
52
+ "eval_wer": 1.0,
53
+ "step": 25
54
+ },
55
+ {
56
+ "epoch": 3.0,
57
+ "grad_norm": 2.7979044914245605,
58
+ "learning_rate": 2.025e-05,
59
+ "loss": 6.5204,
60
+ "step": 30
61
+ },
62
+ {
63
+ "epoch": 3.5,
64
+ "grad_norm": 9.716986656188965,
65
+ "learning_rate": 2.3999999999999997e-05,
66
+ "loss": 7.6715,
67
+ "step": 35
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "grad_norm": 3.3519299030303955,
72
+ "learning_rate": 2.7749999999999997e-05,
73
+ "loss": 7.0161,
74
+ "step": 40
75
+ },
76
+ {
77
+ "epoch": 4.5,
78
+ "grad_norm": 5.6749138832092285,
79
+ "learning_rate": 3.149999999999999e-05,
80
+ "loss": 8.0617,
81
+ "step": 45
82
+ },
83
+ {
84
+ "epoch": 5.0,
85
+ "grad_norm": 8.150848388671875,
86
+ "learning_rate": 3.5249999999999996e-05,
87
+ "loss": 6.3142,
88
+ "step": 50
89
+ },
90
+ {
91
+ "epoch": 5.0,
92
+ "eval_loss": 6.626723766326904,
93
+ "eval_runtime": 1.071,
94
+ "eval_samples_per_second": 22.409,
95
+ "eval_steps_per_second": 0.934,
96
+ "eval_wer": 1.0,
97
+ "step": 50
98
+ },
99
+ {
100
+ "epoch": 5.5,
101
+ "grad_norm": 6.339476585388184,
102
+ "learning_rate": 3.9e-05,
103
+ "loss": 6.2643,
104
+ "step": 55
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 13.012835502624512,
109
+ "learning_rate": 4.2749999999999996e-05,
110
+ "loss": 7.1655,
111
+ "step": 60
112
+ },
113
+ {
114
+ "epoch": 6.5,
115
+ "grad_norm": 11.24893569946289,
116
+ "learning_rate": 4.65e-05,
117
+ "loss": 5.8178,
118
+ "step": 65
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "grad_norm": 24.677473068237305,
123
+ "learning_rate": 5.025e-05,
124
+ "loss": 5.5684,
125
+ "step": 70
126
+ },
127
+ {
128
+ "epoch": 7.5,
129
+ "grad_norm": 8.214367866516113,
130
+ "learning_rate": 5.399999999999999e-05,
131
+ "loss": 4.3185,
132
+ "step": 75
133
+ },
134
+ {
135
+ "epoch": 7.5,
136
+ "eval_loss": 3.72790789604187,
137
+ "eval_runtime": 1.0391,
138
+ "eval_samples_per_second": 23.097,
139
+ "eval_steps_per_second": 0.962,
140
+ "eval_wer": 1.0,
141
+ "step": 75
142
+ },
143
+ {
144
+ "epoch": 8.0,
145
+ "grad_norm": 8.643641471862793,
146
+ "learning_rate": 5.7749999999999994e-05,
147
+ "loss": 4.1807,
148
+ "step": 80
149
+ },
150
+ {
151
+ "epoch": 8.5,
152
+ "grad_norm": 10.54008674621582,
153
+ "learning_rate": 6.149999999999999e-05,
154
+ "loss": 3.7552,
155
+ "step": 85
156
+ },
157
+ {
158
+ "epoch": 9.0,
159
+ "grad_norm": 3.332289934158325,
160
+ "learning_rate": 6.525e-05,
161
+ "loss": 3.7053,
162
+ "step": 90
163
+ },
164
+ {
165
+ "epoch": 9.5,
166
+ "grad_norm": 4.925398349761963,
167
+ "learning_rate": 6.9e-05,
168
+ "loss": 3.3661,
169
+ "step": 95
170
+ },
171
+ {
172
+ "epoch": 10.0,
173
+ "grad_norm": 5.291933536529541,
174
+ "learning_rate": 7.274999999999999e-05,
175
+ "loss": 3.1777,
176
+ "step": 100
177
+ },
178
+ {
179
+ "epoch": 10.0,
180
+ "eval_loss": 3.050647735595703,
181
+ "eval_runtime": 1.0273,
182
+ "eval_samples_per_second": 23.362,
183
+ "eval_steps_per_second": 0.973,
184
+ "eval_wer": 1.0,
185
+ "step": 100
186
+ },
187
+ {
188
+ "epoch": 10.5,
189
+ "grad_norm": 1.6660319566726685,
190
+ "learning_rate": 7.649999999999999e-05,
191
+ "loss": 3.0435,
192
+ "step": 105
193
+ },
194
+ {
195
+ "epoch": 11.0,
196
+ "grad_norm": 0.681082546710968,
197
+ "learning_rate": 8.025e-05,
198
+ "loss": 3.254,
199
+ "step": 110
200
+ },
201
+ {
202
+ "epoch": 11.5,
203
+ "grad_norm": 2.713016986846924,
204
+ "learning_rate": 8.4e-05,
205
+ "loss": 2.972,
206
+ "step": 115
207
+ },
208
+ {
209
+ "epoch": 12.0,
210
+ "grad_norm": 7.211615085601807,
211
+ "learning_rate": 8.774999999999999e-05,
212
+ "loss": 3.1145,
213
+ "step": 120
214
+ },
215
+ {
216
+ "epoch": 12.5,
217
+ "grad_norm": 3.3372182846069336,
218
+ "learning_rate": 9.149999999999999e-05,
219
+ "loss": 3.0587,
220
+ "step": 125
221
+ },
222
+ {
223
+ "epoch": 12.5,
224
+ "eval_loss": 2.936924695968628,
225
+ "eval_runtime": 1.0424,
226
+ "eval_samples_per_second": 23.023,
227
+ "eval_steps_per_second": 0.959,
228
+ "eval_wer": 1.0,
229
+ "step": 125
230
+ },
231
+ {
232
+ "epoch": 13.0,
233
+ "grad_norm": 1.895374059677124,
234
+ "learning_rate": 9.525e-05,
235
+ "loss": 2.9096,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 13.5,
240
+ "grad_norm": 8.356375694274902,
241
+ "learning_rate": 9.9e-05,
242
+ "loss": 3.3159,
243
+ "step": 135
244
+ },
245
+ {
246
+ "epoch": 14.0,
247
+ "grad_norm": 1.6825320720672607,
248
+ "learning_rate": 0.00010275,
249
+ "loss": 2.9022,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 14.5,
254
+ "grad_norm": 0.7314967513084412,
255
+ "learning_rate": 0.00010649999999999999,
256
+ "loss": 2.9058,
257
+ "step": 145
258
+ },
259
+ {
260
+ "epoch": 15.0,
261
+ "grad_norm": 3.183772563934326,
262
+ "learning_rate": 0.00011024999999999998,
263
+ "loss": 3.0633,
264
+ "step": 150
265
+ },
266
+ {
267
+ "epoch": 15.0,
268
+ "eval_loss": 2.9296257495880127,
269
+ "eval_runtime": 1.0347,
270
+ "eval_samples_per_second": 23.194,
271
+ "eval_steps_per_second": 0.966,
272
+ "eval_wer": 1.0,
273
+ "step": 150
274
+ },
275
+ {
276
+ "epoch": 15.5,
277
+ "grad_norm": 1.4891362190246582,
278
+ "learning_rate": 0.00011399999999999999,
279
+ "loss": 2.9901,
280
+ "step": 155
281
+ },
282
+ {
283
+ "epoch": 16.0,
284
+ "grad_norm": 5.58284854888916,
285
+ "learning_rate": 0.00011774999999999999,
286
+ "loss": 2.9861,
287
+ "step": 160
288
+ },
289
+ {
290
+ "epoch": 16.5,
291
+ "grad_norm": 1.3804000616073608,
292
+ "learning_rate": 0.0001215,
293
+ "loss": 2.9584,
294
+ "step": 165
295
+ },
296
+ {
297
+ "epoch": 17.0,
298
+ "grad_norm": 1.6562563180923462,
299
+ "learning_rate": 0.00012524999999999998,
300
+ "loss": 3.0194,
301
+ "step": 170
302
+ },
303
+ {
304
+ "epoch": 17.5,
305
+ "grad_norm": 0.653541088104248,
306
+ "learning_rate": 0.000129,
307
+ "loss": 2.9639,
308
+ "step": 175
309
+ },
310
+ {
311
+ "epoch": 17.5,
312
+ "eval_loss": 2.926556348800659,
313
+ "eval_runtime": 1.0442,
314
+ "eval_samples_per_second": 22.985,
315
+ "eval_steps_per_second": 0.958,
316
+ "eval_wer": 1.0,
317
+ "step": 175
318
+ },
319
+ {
320
+ "epoch": 18.0,
321
+ "grad_norm": 9.456038475036621,
322
+ "learning_rate": 0.00013275,
323
+ "loss": 2.8944,
324
+ "step": 180
325
+ },
326
+ {
327
+ "epoch": 18.5,
328
+ "grad_norm": 0.3759576082229614,
329
+ "learning_rate": 0.00013649999999999998,
330
+ "loss": 2.9149,
331
+ "step": 185
332
+ },
333
+ {
334
+ "epoch": 19.0,
335
+ "grad_norm": 3.0567305088043213,
336
+ "learning_rate": 0.00014025,
337
+ "loss": 3.0321,
338
+ "step": 190
339
+ },
340
+ {
341
+ "epoch": 19.5,
342
+ "grad_norm": 8.436885833740234,
343
+ "learning_rate": 0.00014399999999999998,
344
+ "loss": 2.9683,
345
+ "step": 195
346
+ },
347
+ {
348
+ "epoch": 20.0,
349
+ "grad_norm": 1.9778860807418823,
350
+ "learning_rate": 0.00014774999999999999,
351
+ "loss": 2.9576,
352
+ "step": 200
353
+ },
354
+ {
355
+ "epoch": 20.0,
356
+ "eval_loss": 2.9644908905029297,
357
+ "eval_runtime": 1.0268,
358
+ "eval_samples_per_second": 23.374,
359
+ "eval_steps_per_second": 0.974,
360
+ "eval_wer": 1.0,
361
+ "step": 200
362
+ },
363
+ {
364
+ "epoch": 20.5,
365
+ "grad_norm": 0.6856608390808105,
366
+ "learning_rate": 0.00014976923076923077,
367
+ "loss": 2.9374,
368
+ "step": 205
369
+ },
370
+ {
371
+ "epoch": 21.0,
372
+ "grad_norm": 1.157402515411377,
373
+ "learning_rate": 0.00014919230769230767,
374
+ "loss": 2.875,
375
+ "step": 210
376
+ },
377
+ {
378
+ "epoch": 21.5,
379
+ "grad_norm": 0.42920613288879395,
380
+ "learning_rate": 0.0001486153846153846,
381
+ "loss": 2.9796,
382
+ "step": 215
383
+ },
384
+ {
385
+ "epoch": 22.0,
386
+ "grad_norm": 4.603660583496094,
387
+ "learning_rate": 0.00014803846153846152,
388
+ "loss": 2.9233,
389
+ "step": 220
390
+ },
391
+ {
392
+ "epoch": 22.5,
393
+ "grad_norm": 1.3661619424819946,
394
+ "learning_rate": 0.00014746153846153845,
395
+ "loss": 2.8708,
396
+ "step": 225
397
+ },
398
+ {
399
+ "epoch": 22.5,
400
+ "eval_loss": 2.9085776805877686,
401
+ "eval_runtime": 1.0387,
402
+ "eval_samples_per_second": 23.106,
403
+ "eval_steps_per_second": 0.963,
404
+ "eval_wer": 1.0,
405
+ "step": 225
406
+ },
407
+ {
408
+ "epoch": 23.0,
409
+ "grad_norm": 0.7445681691169739,
410
+ "learning_rate": 0.00014688461538461537,
411
+ "loss": 2.933,
412
+ "step": 230
413
+ },
414
+ {
415
+ "epoch": 23.5,
416
+ "grad_norm": 1.2040903568267822,
417
+ "learning_rate": 0.0001463076923076923,
418
+ "loss": 2.9217,
419
+ "step": 235
420
+ },
421
+ {
422
+ "epoch": 24.0,
423
+ "grad_norm": 4.538419246673584,
424
+ "learning_rate": 0.00014573076923076923,
425
+ "loss": 2.9043,
426
+ "step": 240
427
+ },
428
+ {
429
+ "epoch": 24.5,
430
+ "grad_norm": 0.36169031262397766,
431
+ "learning_rate": 0.00014515384615384615,
432
+ "loss": 2.8554,
433
+ "step": 245
434
+ },
435
+ {
436
+ "epoch": 25.0,
437
+ "grad_norm": 1.2133870124816895,
438
+ "learning_rate": 0.00014457692307692305,
439
+ "loss": 2.943,
440
+ "step": 250
441
+ },
442
+ {
443
+ "epoch": 25.0,
444
+ "eval_loss": 2.900446653366089,
445
+ "eval_runtime": 1.0279,
446
+ "eval_samples_per_second": 23.348,
447
+ "eval_steps_per_second": 0.973,
448
+ "eval_wer": 1.0,
449
+ "step": 250
450
+ },
451
+ {
452
+ "epoch": 25.5,
453
+ "grad_norm": 1.1455128192901611,
454
+ "learning_rate": 0.00014399999999999998,
455
+ "loss": 2.8775,
456
+ "step": 255
457
+ },
458
+ {
459
+ "epoch": 26.0,
460
+ "grad_norm": 3.7162177562713623,
461
+ "learning_rate": 0.0001434230769230769,
462
+ "loss": 2.9401,
463
+ "step": 260
464
+ },
465
+ {
466
+ "epoch": 26.5,
467
+ "grad_norm": 4.095553398132324,
468
+ "learning_rate": 0.00014284615384615383,
469
+ "loss": 2.9053,
470
+ "step": 265
471
+ },
472
+ {
473
+ "epoch": 27.0,
474
+ "grad_norm": 2.0302634239196777,
475
+ "learning_rate": 0.00014226923076923075,
476
+ "loss": 2.975,
477
+ "step": 270
478
+ },
479
+ {
480
+ "epoch": 27.5,
481
+ "grad_norm": 3.123234510421753,
482
+ "learning_rate": 0.00014169230769230768,
483
+ "loss": 2.9225,
484
+ "step": 275
485
+ },
486
+ {
487
+ "epoch": 27.5,
488
+ "eval_loss": 2.9469966888427734,
489
+ "eval_runtime": 1.023,
490
+ "eval_samples_per_second": 23.46,
491
+ "eval_steps_per_second": 0.978,
492
+ "eval_wer": 1.0,
493
+ "step": 275
494
+ },
495
+ {
496
+ "epoch": 28.0,
497
+ "grad_norm": 0.886202871799469,
498
+ "learning_rate": 0.0001411153846153846,
499
+ "loss": 2.8783,
500
+ "step": 280
501
+ },
502
+ {
503
+ "epoch": 28.5,
504
+ "grad_norm": 0.48980531096458435,
505
+ "learning_rate": 0.00014053846153846153,
506
+ "loss": 2.8977,
507
+ "step": 285
508
+ },
509
+ {
510
+ "epoch": 29.0,
511
+ "grad_norm": 2.4499869346618652,
512
+ "learning_rate": 0.00013996153846153843,
513
+ "loss": 2.9178,
514
+ "step": 290
515
+ },
516
+ {
517
+ "epoch": 29.5,
518
+ "grad_norm": 3.5155863761901855,
519
+ "learning_rate": 0.00013938461538461536,
520
+ "loss": 2.8955,
521
+ "step": 295
522
+ },
523
+ {
524
+ "epoch": 30.0,
525
+ "grad_norm": 3.8240697383880615,
526
+ "learning_rate": 0.00013880769230769228,
527
+ "loss": 2.9897,
528
+ "step": 300
529
+ },
530
+ {
531
+ "epoch": 30.0,
532
+ "eval_loss": 2.9530646800994873,
533
+ "eval_runtime": 1.0334,
534
+ "eval_samples_per_second": 23.224,
535
+ "eval_steps_per_second": 0.968,
536
+ "eval_wer": 1.0,
537
+ "step": 300
538
+ },
539
+ {
540
+ "epoch": 30.5,
541
+ "grad_norm": 1.4881560802459717,
542
+ "learning_rate": 0.0001382307692307692,
543
+ "loss": 2.8732,
544
+ "step": 305
545
+ },
546
+ {
547
+ "epoch": 31.0,
548
+ "grad_norm": 5.950206756591797,
549
+ "learning_rate": 0.00013765384615384613,
550
+ "loss": 2.9688,
551
+ "step": 310
552
+ },
553
+ {
554
+ "epoch": 31.5,
555
+ "grad_norm": 0.8825148940086365,
556
+ "learning_rate": 0.00013707692307692306,
557
+ "loss": 2.869,
558
+ "step": 315
559
+ },
560
+ {
561
+ "epoch": 32.0,
562
+ "grad_norm": 1.6368755102157593,
563
+ "learning_rate": 0.00013649999999999998,
564
+ "loss": 2.8843,
565
+ "step": 320
566
+ },
567
+ {
568
+ "epoch": 32.5,
569
+ "grad_norm": 1.556404709815979,
570
+ "learning_rate": 0.0001359230769230769,
571
+ "loss": 2.8514,
572
+ "step": 325
573
+ },
574
+ {
575
+ "epoch": 32.5,
576
+ "eval_loss": 2.911478042602539,
577
+ "eval_runtime": 1.021,
578
+ "eval_samples_per_second": 23.505,
579
+ "eval_steps_per_second": 0.979,
580
+ "eval_wer": 1.0,
581
+ "step": 325
582
+ },
583
+ {
584
+ "epoch": 33.0,
585
+ "grad_norm": 0.6802976131439209,
586
+ "learning_rate": 0.00013534615384615384,
587
+ "loss": 2.8542,
588
+ "step": 330
589
+ },
590
+ {
591
+ "epoch": 33.5,
592
+ "grad_norm": 0.5035978555679321,
593
+ "learning_rate": 0.00013476923076923076,
594
+ "loss": 2.9064,
595
+ "step": 335
596
+ },
597
+ {
598
+ "epoch": 34.0,
599
+ "grad_norm": 1.6443456411361694,
600
+ "learning_rate": 0.0001341923076923077,
601
+ "loss": 2.8498,
602
+ "step": 340
603
+ },
604
+ {
605
+ "epoch": 34.5,
606
+ "grad_norm": 0.6262179017066956,
607
+ "learning_rate": 0.0001336153846153846,
608
+ "loss": 2.8368,
609
+ "step": 345
610
+ },
611
+ {
612
+ "epoch": 35.0,
613
+ "grad_norm": 0.8266497850418091,
614
+ "learning_rate": 0.00013303846153846154,
615
+ "loss": 2.8681,
616
+ "step": 350
617
+ },
618
+ {
619
+ "epoch": 35.0,
620
+ "eval_loss": 2.9094789028167725,
621
+ "eval_runtime": 1.0369,
622
+ "eval_samples_per_second": 23.145,
623
+ "eval_steps_per_second": 0.964,
624
+ "eval_wer": 1.0,
625
+ "step": 350
626
+ },
627
+ {
628
+ "epoch": 35.5,
629
+ "grad_norm": 0.33677324652671814,
630
+ "learning_rate": 0.00013246153846153846,
631
+ "loss": 2.8163,
632
+ "step": 355
633
+ },
634
+ {
635
+ "epoch": 36.0,
636
+ "grad_norm": 0.6221341490745544,
637
+ "learning_rate": 0.0001318846153846154,
638
+ "loss": 2.8746,
639
+ "step": 360
640
+ },
641
+ {
642
+ "epoch": 36.5,
643
+ "grad_norm": 0.5015878677368164,
644
+ "learning_rate": 0.00013130769230769232,
645
+ "loss": 2.8477,
646
+ "step": 365
647
+ },
648
+ {
649
+ "epoch": 37.0,
650
+ "grad_norm": 0.6005992889404297,
651
+ "learning_rate": 0.00013073076923076921,
652
+ "loss": 2.838,
653
+ "step": 370
654
+ },
655
+ {
656
+ "epoch": 37.5,
657
+ "grad_norm": 0.4997330605983734,
658
+ "learning_rate": 0.00013015384615384614,
659
+ "loss": 2.8431,
660
+ "step": 375
661
+ },
662
+ {
663
+ "epoch": 37.5,
664
+ "eval_loss": 2.90104603767395,
665
+ "eval_runtime": 1.017,
666
+ "eval_samples_per_second": 23.599,
667
+ "eval_steps_per_second": 0.983,
668
+ "eval_wer": 1.0,
669
+ "step": 375
670
+ },
671
+ {
672
+ "epoch": 38.0,
673
+ "grad_norm": 1.342210292816162,
674
+ "learning_rate": 0.00012957692307692307,
675
+ "loss": 2.8672,
676
+ "step": 380
677
+ },
678
+ {
679
+ "epoch": 38.5,
680
+ "grad_norm": 1.2935914993286133,
681
+ "learning_rate": 0.000129,
682
+ "loss": 2.848,
683
+ "step": 385
684
+ },
685
+ {
686
+ "epoch": 39.0,
687
+ "grad_norm": 0.41487249732017517,
688
+ "learning_rate": 0.00012842307692307692,
689
+ "loss": 2.8244,
690
+ "step": 390
691
+ },
692
+ {
693
+ "epoch": 39.5,
694
+ "grad_norm": 1.1988450288772583,
695
+ "learning_rate": 0.00012784615384615384,
696
+ "loss": 2.8328,
697
+ "step": 395
698
+ },
699
+ {
700
+ "epoch": 40.0,
701
+ "grad_norm": 2.2671468257904053,
702
+ "learning_rate": 0.00012726923076923077,
703
+ "loss": 2.8843,
704
+ "step": 400
705
+ },
706
+ {
707
+ "epoch": 40.0,
708
+ "eval_loss": 2.9156665802001953,
709
+ "eval_runtime": 1.0579,
710
+ "eval_samples_per_second": 22.686,
711
+ "eval_steps_per_second": 0.945,
712
+ "eval_wer": 1.0,
713
+ "step": 400
714
+ },
715
+ {
716
+ "epoch": 40.5,
717
+ "grad_norm": 1.003772497177124,
718
+ "learning_rate": 0.0001266923076923077,
719
+ "loss": 2.8312,
720
+ "step": 405
721
+ },
722
+ {
723
+ "epoch": 41.0,
724
+ "grad_norm": 1.2402571439743042,
725
+ "learning_rate": 0.00012611538461538462,
726
+ "loss": 2.8291,
727
+ "step": 410
728
+ },
729
+ {
730
+ "epoch": 41.5,
731
+ "grad_norm": 0.29388442635536194,
732
+ "learning_rate": 0.00012553846153846152,
733
+ "loss": 2.8275,
734
+ "step": 415
735
+ },
736
+ {
737
+ "epoch": 42.0,
738
+ "grad_norm": 0.9477460980415344,
739
+ "learning_rate": 0.00012496153846153844,
740
+ "loss": 2.8384,
741
+ "step": 420
742
+ },
743
+ {
744
+ "epoch": 42.5,
745
+ "grad_norm": 1.4519686698913574,
746
+ "learning_rate": 0.00012438461538461537,
747
+ "loss": 2.9357,
748
+ "step": 425
749
+ },
750
+ {
751
+ "epoch": 42.5,
752
+ "eval_loss": 2.902658462524414,
753
+ "eval_runtime": 1.0363,
754
+ "eval_samples_per_second": 23.158,
755
+ "eval_steps_per_second": 0.965,
756
+ "eval_wer": 1.0,
757
+ "step": 425
758
+ },
759
+ {
760
+ "epoch": 43.0,
761
+ "grad_norm": 0.4391646087169647,
762
+ "learning_rate": 0.0001238076923076923,
763
+ "loss": 2.8395,
764
+ "step": 430
765
+ },
766
+ {
767
+ "epoch": 43.5,
768
+ "grad_norm": 2.1784377098083496,
769
+ "learning_rate": 0.00012323076923076922,
770
+ "loss": 2.8599,
771
+ "step": 435
772
+ },
773
+ {
774
+ "epoch": 44.0,
775
+ "grad_norm": 0.9729048609733582,
776
+ "learning_rate": 0.00012265384615384615,
777
+ "loss": 2.8489,
778
+ "step": 440
779
+ },
780
+ {
781
+ "epoch": 44.5,
782
+ "grad_norm": 0.5243009328842163,
783
+ "learning_rate": 0.00012207692307692307,
784
+ "loss": 2.83,
785
+ "step": 445
786
+ },
787
+ {
788
+ "epoch": 45.0,
789
+ "grad_norm": 0.7081323862075806,
790
+ "learning_rate": 0.0001215,
791
+ "loss": 2.8236,
792
+ "step": 450
793
+ },
794
+ {
795
+ "epoch": 45.0,
796
+ "eval_loss": 2.901521682739258,
797
+ "eval_runtime": 1.0318,
798
+ "eval_samples_per_second": 23.261,
799
+ "eval_steps_per_second": 0.969,
800
+ "eval_wer": 1.0,
801
+ "step": 450
802
+ },
803
+ {
804
+ "epoch": 45.5,
805
+ "grad_norm": 0.3105088770389557,
806
+ "learning_rate": 0.00012092307692307691,
807
+ "loss": 2.8189,
808
+ "step": 455
809
+ },
810
+ {
811
+ "epoch": 46.0,
812
+ "grad_norm": 0.6120209097862244,
813
+ "learning_rate": 0.00012034615384615384,
814
+ "loss": 2.8075,
815
+ "step": 460
816
+ },
817
+ {
818
+ "epoch": 46.5,
819
+ "grad_norm": 0.996507465839386,
820
+ "learning_rate": 0.00011976923076923076,
821
+ "loss": 2.8318,
822
+ "step": 465
823
+ },
824
+ {
825
+ "epoch": 47.0,
826
+ "grad_norm": 7.280458927154541,
827
+ "learning_rate": 0.00011919230769230767,
828
+ "loss": 2.871,
829
+ "step": 470
830
+ },
831
+ {
832
+ "epoch": 47.5,
833
+ "grad_norm": 0.8332684636116028,
834
+ "learning_rate": 0.0001186153846153846,
835
+ "loss": 2.8376,
836
+ "step": 475
837
+ },
838
+ {
839
+ "epoch": 47.5,
840
+ "eval_loss": 2.900068998336792,
841
+ "eval_runtime": 1.0322,
842
+ "eval_samples_per_second": 23.251,
843
+ "eval_steps_per_second": 0.969,
844
+ "eval_wer": 1.0,
845
+ "step": 475
846
+ },
847
+ {
848
+ "epoch": 48.0,
849
+ "grad_norm": 0.6555355191230774,
850
+ "learning_rate": 0.00011803846153846153,
851
+ "loss": 2.7954,
852
+ "step": 480
853
+ },
854
+ {
855
+ "epoch": 48.5,
856
+ "grad_norm": 1.127866268157959,
857
+ "learning_rate": 0.00011746153846153845,
858
+ "loss": 2.8494,
859
+ "step": 485
860
+ },
861
+ {
862
+ "epoch": 49.0,
863
+ "grad_norm": 0.7961714863777161,
864
+ "learning_rate": 0.00011688461538461538,
865
+ "loss": 2.8446,
866
+ "step": 490
867
+ },
868
+ {
869
+ "epoch": 49.5,
870
+ "grad_norm": 1.9832100868225098,
871
+ "learning_rate": 0.00011630769230769229,
872
+ "loss": 2.8353,
873
+ "step": 495
874
+ },
875
+ {
876
+ "epoch": 50.0,
877
+ "grad_norm": 0.9229313731193542,
878
+ "learning_rate": 0.00011573076923076922,
879
+ "loss": 2.8148,
880
+ "step": 500
881
+ },
882
+ {
883
+ "epoch": 50.0,
884
+ "eval_loss": 2.8878333568573,
885
+ "eval_runtime": 1.0279,
886
+ "eval_samples_per_second": 23.349,
887
+ "eval_steps_per_second": 0.973,
888
+ "eval_wer": 1.0,
889
+ "step": 500
890
+ },
891
+ {
892
+ "epoch": 50.5,
893
+ "grad_norm": 2.113555669784546,
894
+ "learning_rate": 0.00011515384615384614,
895
+ "loss": 2.816,
896
+ "step": 505
897
+ },
898
+ {
899
+ "epoch": 51.0,
900
+ "grad_norm": 2.10042667388916,
901
+ "learning_rate": 0.00011457692307692307,
902
+ "loss": 2.8544,
903
+ "step": 510
904
+ },
905
+ {
906
+ "epoch": 51.5,
907
+ "grad_norm": 0.48272839188575745,
908
+ "learning_rate": 0.00011399999999999999,
909
+ "loss": 2.8207,
910
+ "step": 515
911
+ },
912
+ {
913
+ "epoch": 52.0,
914
+ "grad_norm": 0.9009172320365906,
915
+ "learning_rate": 0.00011342307692307692,
916
+ "loss": 2.8008,
917
+ "step": 520
918
+ },
919
+ {
920
+ "epoch": 52.5,
921
+ "grad_norm": 1.0341640710830688,
922
+ "learning_rate": 0.00011284615384615384,
923
+ "loss": 2.8057,
924
+ "step": 525
925
+ },
926
+ {
927
+ "epoch": 52.5,
928
+ "eval_loss": 2.8624706268310547,
929
+ "eval_runtime": 1.037,
930
+ "eval_samples_per_second": 23.144,
931
+ "eval_steps_per_second": 0.964,
932
+ "eval_wer": 1.0,
933
+ "step": 525
934
+ },
935
+ {
936
+ "epoch": 53.0,
937
+ "grad_norm": 1.3395497798919678,
938
+ "learning_rate": 0.00011226923076923077,
939
+ "loss": 2.7866,
940
+ "step": 530
941
+ },
942
+ {
943
+ "epoch": 53.5,
944
+ "grad_norm": 0.3619355261325836,
945
+ "learning_rate": 0.00011169230769230768,
946
+ "loss": 2.7779,
947
+ "step": 535
948
+ },
949
+ {
950
+ "epoch": 54.0,
951
+ "grad_norm": 1.4029289484024048,
952
+ "learning_rate": 0.0001111153846153846,
953
+ "loss": 2.789,
954
+ "step": 540
955
+ },
956
+ {
957
+ "epoch": 54.5,
958
+ "grad_norm": 0.29736635088920593,
959
+ "learning_rate": 0.00011053846153846152,
960
+ "loss": 2.7452,
961
+ "step": 545
962
+ },
963
+ {
964
+ "epoch": 55.0,
965
+ "grad_norm": 1.7570823431015015,
966
+ "learning_rate": 0.00010996153846153845,
967
+ "loss": 2.7268,
968
+ "step": 550
969
+ },
970
+ {
971
+ "epoch": 55.0,
972
+ "eval_loss": 2.819674253463745,
973
+ "eval_runtime": 1.0343,
974
+ "eval_samples_per_second": 23.205,
975
+ "eval_steps_per_second": 0.967,
976
+ "eval_wer": 1.0,
977
+ "step": 550
978
+ },
979
+ {
980
+ "epoch": 55.5,
981
+ "grad_norm": 0.3762887418270111,
982
+ "learning_rate": 0.00010938461538461537,
983
+ "loss": 2.7224,
984
+ "step": 555
985
+ },
986
+ {
987
+ "epoch": 56.0,
988
+ "grad_norm": 1.0835281610488892,
989
+ "learning_rate": 0.0001088076923076923,
990
+ "loss": 2.7022,
991
+ "step": 560
992
+ },
993
+ {
994
+ "epoch": 56.5,
995
+ "grad_norm": 1.721433401107788,
996
+ "learning_rate": 0.00010823076923076922,
997
+ "loss": 2.6927,
998
+ "step": 565
999
+ },
1000
+ {
1001
+ "epoch": 57.0,
1002
+ "grad_norm": 2.9872403144836426,
1003
+ "learning_rate": 0.00010765384615384615,
1004
+ "loss": 2.7924,
1005
+ "step": 570
1006
+ },
1007
+ {
1008
+ "epoch": 57.5,
1009
+ "grad_norm": 0.5493649840354919,
1010
+ "learning_rate": 0.00010707692307692306,
1011
+ "loss": 2.6252,
1012
+ "step": 575
1013
+ },
1014
+ {
1015
+ "epoch": 57.5,
1016
+ "eval_loss": 2.807591676712036,
1017
+ "eval_runtime": 1.0323,
1018
+ "eval_samples_per_second": 23.25,
1019
+ "eval_steps_per_second": 0.969,
1020
+ "eval_wer": 1.0,
1021
+ "step": 575
1022
+ },
1023
+ {
1024
+ "epoch": 58.0,
1025
+ "grad_norm": 1.2353851795196533,
1026
+ "learning_rate": 0.00010649999999999999,
1027
+ "loss": 2.6458,
1028
+ "step": 580
1029
+ },
1030
+ {
1031
+ "epoch": 58.5,
1032
+ "grad_norm": 0.7240511775016785,
1033
+ "learning_rate": 0.00010592307692307691,
1034
+ "loss": 2.5911,
1035
+ "step": 585
1036
+ },
1037
+ {
1038
+ "epoch": 59.0,
1039
+ "grad_norm": 0.9982340335845947,
1040
+ "learning_rate": 0.00010534615384615384,
1041
+ "loss": 2.6489,
1042
+ "step": 590
1043
+ },
1044
+ {
1045
+ "epoch": 59.5,
1046
+ "grad_norm": 0.6784680485725403,
1047
+ "learning_rate": 0.00010476923076923076,
1048
+ "loss": 2.5169,
1049
+ "step": 595
1050
+ },
1051
+ {
1052
+ "epoch": 60.0,
1053
+ "grad_norm": 1.9756778478622437,
1054
+ "learning_rate": 0.00010419230769230769,
1055
+ "loss": 2.5511,
1056
+ "step": 600
1057
+ },
1058
+ {
1059
+ "epoch": 60.0,
1060
+ "eval_loss": 2.615316152572632,
1061
+ "eval_runtime": 1.0274,
1062
+ "eval_samples_per_second": 23.361,
1063
+ "eval_steps_per_second": 0.973,
1064
+ "eval_wer": 1.0056338028169014,
1065
+ "step": 600
1066
+ },
1067
+ {
1068
+ "epoch": 60.5,
1069
+ "grad_norm": 1.3284317255020142,
1070
+ "learning_rate": 0.00010361538461538462,
1071
+ "loss": 2.4731,
1072
+ "step": 605
1073
+ },
1074
+ {
1075
+ "epoch": 61.0,
1076
+ "grad_norm": 1.3110464811325073,
1077
+ "learning_rate": 0.00010303846153846154,
1078
+ "loss": 2.4817,
1079
+ "step": 610
1080
+ },
1081
+ {
1082
+ "epoch": 61.5,
1083
+ "grad_norm": 1.003812551498413,
1084
+ "learning_rate": 0.00010246153846153844,
1085
+ "loss": 2.3945,
1086
+ "step": 615
1087
+ },
1088
+ {
1089
+ "epoch": 62.0,
1090
+ "grad_norm": 1.148573398590088,
1091
+ "learning_rate": 0.00010188461538461537,
1092
+ "loss": 2.399,
1093
+ "step": 620
1094
+ },
1095
+ {
1096
+ "epoch": 62.5,
1097
+ "grad_norm": 0.5585479736328125,
1098
+ "learning_rate": 0.00010130769230769229,
1099
+ "loss": 2.323,
1100
+ "step": 625
1101
+ },
1102
+ {
1103
+ "epoch": 62.5,
1104
+ "eval_loss": 2.4444546699523926,
1105
+ "eval_runtime": 1.0272,
1106
+ "eval_samples_per_second": 23.365,
1107
+ "eval_steps_per_second": 0.974,
1108
+ "eval_wer": 1.0169014084507042,
1109
+ "step": 625
1110
+ },
1111
+ {
1112
+ "epoch": 63.0,
1113
+ "grad_norm": 2.2142958641052246,
1114
+ "learning_rate": 0.00010073076923076922,
1115
+ "loss": 2.2927,
1116
+ "step": 630
1117
+ },
1118
+ {
1119
+ "epoch": 63.5,
1120
+ "grad_norm": 1.0168890953063965,
1121
+ "learning_rate": 0.00010015384615384614,
1122
+ "loss": 2.2108,
1123
+ "step": 635
1124
+ },
1125
+ {
1126
+ "epoch": 64.0,
1127
+ "grad_norm": 1.312639832496643,
1128
+ "learning_rate": 9.957692307692307e-05,
1129
+ "loss": 2.1866,
1130
+ "step": 640
1131
+ },
1132
+ {
1133
+ "epoch": 64.5,
1134
+ "grad_norm": 0.5699294209480286,
1135
+ "learning_rate": 9.9e-05,
1136
+ "loss": 2.1114,
1137
+ "step": 645
1138
+ },
1139
+ {
1140
+ "epoch": 65.0,
1141
+ "grad_norm": 1.4273818731307983,
1142
+ "learning_rate": 9.842307692307692e-05,
1143
+ "loss": 2.1119,
1144
+ "step": 650
1145
+ },
1146
+ {
1147
+ "epoch": 65.0,
1148
+ "eval_loss": 2.2476181983947754,
1149
+ "eval_runtime": 1.0519,
1150
+ "eval_samples_per_second": 22.815,
1151
+ "eval_steps_per_second": 0.951,
1152
+ "eval_wer": 1.1183098591549296,
1153
+ "step": 650
1154
+ },
1155
+ {
1156
+ "epoch": 65.5,
1157
+ "grad_norm": 0.5214980244636536,
1158
+ "learning_rate": 9.784615384615383e-05,
1159
+ "loss": 2.0414,
1160
+ "step": 655
1161
+ },
1162
+ {
1163
+ "epoch": 66.0,
1164
+ "grad_norm": 2.480297803878784,
1165
+ "learning_rate": 9.726923076923076e-05,
1166
+ "loss": 2.0609,
1167
+ "step": 660
1168
+ },
1169
+ {
1170
+ "epoch": 66.5,
1171
+ "grad_norm": 3.5270726680755615,
1172
+ "learning_rate": 9.669230769230768e-05,
1173
+ "loss": 1.9963,
1174
+ "step": 665
1175
+ },
1176
+ {
1177
+ "epoch": 67.0,
1178
+ "grad_norm": 14.827882766723633,
1179
+ "learning_rate": 9.611538461538461e-05,
1180
+ "loss": 1.9333,
1181
+ "step": 670
1182
+ },
1183
+ {
1184
+ "epoch": 67.5,
1185
+ "grad_norm": 1.1005451679229736,
1186
+ "learning_rate": 9.553846153846153e-05,
1187
+ "loss": 1.8514,
1188
+ "step": 675
1189
+ },
1190
+ {
1191
+ "epoch": 67.5,
1192
+ "eval_loss": 2.173093318939209,
1193
+ "eval_runtime": 1.033,
1194
+ "eval_samples_per_second": 23.233,
1195
+ "eval_steps_per_second": 0.968,
1196
+ "eval_wer": 1.095774647887324,
1197
+ "step": 675
1198
+ },
1199
+ {
1200
+ "epoch": 68.0,
1201
+ "grad_norm": 1.5897767543792725,
1202
+ "learning_rate": 9.496153846153846e-05,
1203
+ "loss": 1.9986,
1204
+ "step": 680
1205
+ },
1206
+ {
1207
+ "epoch": 68.5,
1208
+ "grad_norm": 0.8863438963890076,
1209
+ "learning_rate": 9.438461538461539e-05,
1210
+ "loss": 1.8067,
1211
+ "step": 685
1212
+ },
1213
+ {
1214
+ "epoch": 69.0,
1215
+ "grad_norm": 1.305874228477478,
1216
+ "learning_rate": 9.380769230769231e-05,
1217
+ "loss": 1.7975,
1218
+ "step": 690
1219
+ },
1220
+ {
1221
+ "epoch": 69.5,
1222
+ "grad_norm": 0.6541560292243958,
1223
+ "learning_rate": 9.323076923076921e-05,
1224
+ "loss": 1.7655,
1225
+ "step": 695
1226
+ },
1227
+ {
1228
+ "epoch": 70.0,
1229
+ "grad_norm": 1.056104063987732,
1230
+ "learning_rate": 9.265384615384614e-05,
1231
+ "loss": 1.7094,
1232
+ "step": 700
1233
+ },
1234
+ {
1235
+ "epoch": 70.0,
1236
+ "eval_loss": 2.0642001628875732,
1237
+ "eval_runtime": 1.0377,
1238
+ "eval_samples_per_second": 23.129,
1239
+ "eval_steps_per_second": 0.964,
1240
+ "eval_wer": 1.0309859154929577,
1241
+ "step": 700
1242
+ },
1243
+ {
1244
+ "epoch": 70.5,
1245
+ "grad_norm": 0.5228053331375122,
1246
+ "learning_rate": 9.207692307692306e-05,
1247
+ "loss": 1.6764,
1248
+ "step": 705
1249
+ },
1250
+ {
1251
+ "epoch": 71.0,
1252
+ "grad_norm": 6.9655256271362305,
1253
+ "learning_rate": 9.149999999999999e-05,
1254
+ "loss": 1.7414,
1255
+ "step": 710
1256
+ },
1257
+ {
1258
+ "epoch": 71.5,
1259
+ "grad_norm": 0.6360809206962585,
1260
+ "learning_rate": 9.092307692307691e-05,
1261
+ "loss": 1.6232,
1262
+ "step": 715
1263
+ },
1264
+ {
1265
+ "epoch": 72.0,
1266
+ "grad_norm": 1.2141180038452148,
1267
+ "learning_rate": 9.034615384615384e-05,
1268
+ "loss": 1.6497,
1269
+ "step": 720
1270
+ },
1271
+ {
1272
+ "epoch": 72.5,
1273
+ "grad_norm": 0.874902606010437,
1274
+ "learning_rate": 8.976923076923077e-05,
1275
+ "loss": 1.6069,
1276
+ "step": 725
1277
+ },
1278
+ {
1279
+ "epoch": 72.5,
1280
+ "eval_loss": 2.0792412757873535,
1281
+ "eval_runtime": 1.0243,
1282
+ "eval_samples_per_second": 23.431,
1283
+ "eval_steps_per_second": 0.976,
1284
+ "eval_wer": 1.0788732394366196,
1285
+ "step": 725
1286
+ },
1287
+ {
1288
+ "epoch": 73.0,
1289
+ "grad_norm": 0.9335172176361084,
1290
+ "learning_rate": 8.919230769230769e-05,
1291
+ "loss": 1.4947,
1292
+ "step": 730
1293
+ },
1294
+ {
1295
+ "epoch": 73.5,
1296
+ "grad_norm": 1.299177885055542,
1297
+ "learning_rate": 8.861538461538462e-05,
1298
+ "loss": 1.5304,
1299
+ "step": 735
1300
+ },
1301
+ {
1302
+ "epoch": 74.0,
1303
+ "grad_norm": 1.6317135095596313,
1304
+ "learning_rate": 8.803846153846153e-05,
1305
+ "loss": 1.5218,
1306
+ "step": 740
1307
+ },
1308
+ {
1309
+ "epoch": 74.5,
1310
+ "grad_norm": 0.8083561062812805,
1311
+ "learning_rate": 8.746153846153845e-05,
1312
+ "loss": 1.5259,
1313
+ "step": 745
1314
+ },
1315
+ {
1316
+ "epoch": 75.0,
1317
+ "grad_norm": 1.805677890777588,
1318
+ "learning_rate": 8.688461538461538e-05,
1319
+ "loss": 1.4663,
1320
+ "step": 750
1321
+ },
1322
+ {
1323
+ "epoch": 75.0,
1324
+ "eval_loss": 2.0323963165283203,
1325
+ "eval_runtime": 1.0407,
1326
+ "eval_samples_per_second": 23.062,
1327
+ "eval_steps_per_second": 0.961,
1328
+ "eval_wer": 1.036619718309859,
1329
+ "step": 750
1330
+ },
1331
+ {
1332
+ "epoch": 75.5,
1333
+ "grad_norm": 0.8463692665100098,
1334
+ "learning_rate": 8.63076923076923e-05,
1335
+ "loss": 1.4244,
1336
+ "step": 755
1337
+ },
1338
+ {
1339
+ "epoch": 76.0,
1340
+ "grad_norm": 2.091686248779297,
1341
+ "learning_rate": 8.573076923076923e-05,
1342
+ "loss": 1.3791,
1343
+ "step": 760
1344
+ },
1345
+ {
1346
+ "epoch": 76.5,
1347
+ "grad_norm": 0.7040625810623169,
1348
+ "learning_rate": 8.515384615384614e-05,
1349
+ "loss": 1.3495,
1350
+ "step": 765
1351
+ },
1352
+ {
1353
+ "epoch": 77.0,
1354
+ "grad_norm": 1.7725024223327637,
1355
+ "learning_rate": 8.457692307692307e-05,
1356
+ "loss": 1.3497,
1357
+ "step": 770
1358
+ },
1359
+ {
1360
+ "epoch": 77.5,
1361
+ "grad_norm": 0.808942437171936,
1362
+ "learning_rate": 8.4e-05,
1363
+ "loss": 1.288,
1364
+ "step": 775
1365
+ },
1366
+ {
1367
+ "epoch": 77.5,
1368
+ "eval_loss": 2.0642640590667725,
1369
+ "eval_runtime": 1.0443,
1370
+ "eval_samples_per_second": 22.982,
1371
+ "eval_steps_per_second": 0.958,
1372
+ "eval_wer": 1.0929577464788733,
1373
+ "step": 775
1374
+ },
1375
+ {
1376
+ "epoch": 78.0,
1377
+ "grad_norm": 3.843997001647949,
1378
+ "learning_rate": 8.342307692307691e-05,
1379
+ "loss": 1.2597,
1380
+ "step": 780
1381
+ },
1382
+ {
1383
+ "epoch": 78.5,
1384
+ "grad_norm": 0.9082187414169312,
1385
+ "learning_rate": 8.284615384615383e-05,
1386
+ "loss": 1.2702,
1387
+ "step": 785
1388
+ },
1389
+ {
1390
+ "epoch": 79.0,
1391
+ "grad_norm": 1.4159339666366577,
1392
+ "learning_rate": 8.226923076923076e-05,
1393
+ "loss": 1.2833,
1394
+ "step": 790
1395
+ },
1396
+ {
1397
+ "epoch": 79.5,
1398
+ "grad_norm": 1.0848701000213623,
1399
+ "learning_rate": 8.169230769230768e-05,
1400
+ "loss": 1.2117,
1401
+ "step": 795
1402
+ },
1403
+ {
1404
+ "epoch": 80.0,
1405
+ "grad_norm": 2.275663137435913,
1406
+ "learning_rate": 8.111538461538461e-05,
1407
+ "loss": 1.262,
1408
+ "step": 800
1409
+ },
1410
+ {
1411
+ "epoch": 80.0,
1412
+ "eval_loss": 2.084003210067749,
1413
+ "eval_runtime": 1.0408,
1414
+ "eval_samples_per_second": 23.059,
1415
+ "eval_steps_per_second": 0.961,
1416
+ "eval_wer": 1.076056338028169,
1417
+ "step": 800
1418
+ },
1419
+ {
1420
+ "epoch": 80.5,
1421
+ "grad_norm": 0.9842613339424133,
1422
+ "learning_rate": 8.053846153846154e-05,
1423
+ "loss": 1.2799,
1424
+ "step": 805
1425
+ },
1426
+ {
1427
+ "epoch": 81.0,
1428
+ "grad_norm": 20.336593627929688,
1429
+ "learning_rate": 7.996153846153846e-05,
1430
+ "loss": 1.2903,
1431
+ "step": 810
1432
+ },
1433
+ {
1434
+ "epoch": 81.5,
1435
+ "grad_norm": 0.8291641473770142,
1436
+ "learning_rate": 7.938461538461539e-05,
1437
+ "loss": 1.1215,
1438
+ "step": 815
1439
+ },
1440
+ {
1441
+ "epoch": 82.0,
1442
+ "grad_norm": 1.6971830129623413,
1443
+ "learning_rate": 7.88076923076923e-05,
1444
+ "loss": 1.1435,
1445
+ "step": 820
1446
+ },
1447
+ {
1448
+ "epoch": 82.5,
1449
+ "grad_norm": 0.69861900806427,
1450
+ "learning_rate": 7.823076923076923e-05,
1451
+ "loss": 1.043,
1452
+ "step": 825
1453
+ },
1454
+ {
1455
+ "epoch": 82.5,
1456
+ "eval_loss": 2.149214506149292,
1457
+ "eval_runtime": 1.0296,
1458
+ "eval_samples_per_second": 23.311,
1459
+ "eval_steps_per_second": 0.971,
1460
+ "eval_wer": 1.0901408450704226,
1461
+ "step": 825
1462
+ },
1463
+ {
1464
+ "epoch": 83.0,
1465
+ "grad_norm": 1.7208884954452515,
1466
+ "learning_rate": 7.776923076923076e-05,
1467
+ "loss": 1.203,
1468
+ "step": 830
1469
+ },
1470
+ {
1471
+ "epoch": 83.5,
1472
+ "grad_norm": 0.8559800982475281,
1473
+ "learning_rate": 7.719230769230768e-05,
1474
+ "loss": 1.0825,
1475
+ "step": 835
1476
+ },
1477
+ {
1478
+ "epoch": 84.0,
1479
+ "grad_norm": 1.6605381965637207,
1480
+ "learning_rate": 7.661538461538461e-05,
1481
+ "loss": 1.1121,
1482
+ "step": 840
1483
+ },
1484
+ {
1485
+ "epoch": 84.5,
1486
+ "grad_norm": 1.077573537826538,
1487
+ "learning_rate": 7.603846153846154e-05,
1488
+ "loss": 1.0145,
1489
+ "step": 845
1490
+ },
1491
+ {
1492
+ "epoch": 85.0,
1493
+ "grad_norm": 2.7091293334960938,
1494
+ "learning_rate": 7.546153846153846e-05,
1495
+ "loss": 1.0501,
1496
+ "step": 850
1497
+ },
1498
+ {
1499
+ "epoch": 85.0,
1500
+ "eval_loss": 2.177476644515991,
1501
+ "eval_runtime": 1.0552,
1502
+ "eval_samples_per_second": 22.744,
1503
+ "eval_steps_per_second": 0.948,
1504
+ "eval_wer": 1.0591549295774647,
1505
+ "step": 850
1506
+ },
1507
+ {
1508
+ "epoch": 85.5,
1509
+ "grad_norm": 1.3562541007995605,
1510
+ "learning_rate": 7.488461538461539e-05,
1511
+ "loss": 1.1098,
1512
+ "step": 855
1513
+ },
1514
+ {
1515
+ "epoch": 86.0,
1516
+ "grad_norm": 2.6526386737823486,
1517
+ "learning_rate": 7.43076923076923e-05,
1518
+ "loss": 0.8642,
1519
+ "step": 860
1520
+ },
1521
+ {
1522
+ "epoch": 86.5,
1523
+ "grad_norm": 1.1710244417190552,
1524
+ "learning_rate": 7.373076923076922e-05,
1525
+ "loss": 0.9004,
1526
+ "step": 865
1527
+ },
1528
+ {
1529
+ "epoch": 87.0,
1530
+ "grad_norm": 2.9008164405822754,
1531
+ "learning_rate": 7.315384615384615e-05,
1532
+ "loss": 1.037,
1533
+ "step": 870
1534
+ },
1535
+ {
1536
+ "epoch": 87.5,
1537
+ "grad_norm": 0.6306678056716919,
1538
+ "learning_rate": 7.257692307692308e-05,
1539
+ "loss": 0.9726,
1540
+ "step": 875
1541
+ },
1542
+ {
1543
+ "epoch": 87.5,
1544
+ "eval_loss": 2.176731586456299,
1545
+ "eval_runtime": 1.0313,
1546
+ "eval_samples_per_second": 23.271,
1547
+ "eval_steps_per_second": 0.97,
1548
+ "eval_wer": 1.028169014084507,
1549
+ "step": 875
1550
+ },
1551
+ {
1552
+ "epoch": 88.0,
1553
+ "grad_norm": 1.6984366178512573,
1554
+ "learning_rate": 7.199999999999999e-05,
1555
+ "loss": 1.1201,
1556
+ "step": 880
1557
+ },
1558
+ {
1559
+ "epoch": 88.5,
1560
+ "grad_norm": 0.803970992565155,
1561
+ "learning_rate": 7.142307692307691e-05,
1562
+ "loss": 0.908,
1563
+ "step": 885
1564
+ },
1565
+ {
1566
+ "epoch": 89.0,
1567
+ "grad_norm": 2.103391408920288,
1568
+ "learning_rate": 7.084615384615384e-05,
1569
+ "loss": 0.8684,
1570
+ "step": 890
1571
+ },
1572
+ {
1573
+ "epoch": 89.5,
1574
+ "grad_norm": 0.9575273990631104,
1575
+ "learning_rate": 7.026923076923077e-05,
1576
+ "loss": 0.9791,
1577
+ "step": 895
1578
+ },
1579
+ {
1580
+ "epoch": 90.0,
1581
+ "grad_norm": 3.000880479812622,
1582
+ "learning_rate": 6.969230769230768e-05,
1583
+ "loss": 0.8079,
1584
+ "step": 900
1585
+ },
1586
+ {
1587
+ "epoch": 90.0,
1588
+ "eval_loss": 2.1965668201446533,
1589
+ "eval_runtime": 1.0433,
1590
+ "eval_samples_per_second": 23.003,
1591
+ "eval_steps_per_second": 0.958,
1592
+ "eval_wer": 0.9943661971830986,
1593
+ "step": 900
1594
+ },
1595
+ {
1596
+ "epoch": 90.5,
1597
+ "grad_norm": 0.6576473712921143,
1598
+ "learning_rate": 6.91153846153846e-05,
1599
+ "loss": 0.846,
1600
+ "step": 905
1601
+ },
1602
+ {
1603
+ "epoch": 91.0,
1604
+ "grad_norm": 2.2526416778564453,
1605
+ "learning_rate": 6.853846153846153e-05,
1606
+ "loss": 0.8868,
1607
+ "step": 910
1608
+ },
1609
+ {
1610
+ "epoch": 91.5,
1611
+ "grad_norm": 0.5678216814994812,
1612
+ "learning_rate": 6.796153846153845e-05,
1613
+ "loss": 0.8925,
1614
+ "step": 915
1615
+ },
1616
+ {
1617
+ "epoch": 92.0,
1618
+ "grad_norm": 2.549266815185547,
1619
+ "learning_rate": 6.738461538461538e-05,
1620
+ "loss": 1.0163,
1621
+ "step": 920
1622
+ },
1623
+ {
1624
+ "epoch": 92.5,
1625
+ "grad_norm": 0.7736966013908386,
1626
+ "learning_rate": 6.68076923076923e-05,
1627
+ "loss": 0.7198,
1628
+ "step": 925
1629
+ },
1630
+ {
1631
+ "epoch": 92.5,
1632
+ "eval_loss": 2.2433066368103027,
1633
+ "eval_runtime": 1.0523,
1634
+ "eval_samples_per_second": 22.808,
1635
+ "eval_steps_per_second": 0.95,
1636
+ "eval_wer": 1.0028169014084507,
1637
+ "step": 925
1638
+ },
1639
+ {
1640
+ "epoch": 93.0,
1641
+ "grad_norm": 3.742175817489624,
1642
+ "learning_rate": 6.623076923076923e-05,
1643
+ "loss": 1.011,
1644
+ "step": 930
1645
+ },
1646
+ {
1647
+ "epoch": 93.5,
1648
+ "grad_norm": 0.748150110244751,
1649
+ "learning_rate": 6.565384615384616e-05,
1650
+ "loss": 0.7659,
1651
+ "step": 935
1652
+ },
1653
+ {
1654
+ "epoch": 94.0,
1655
+ "grad_norm": 2.121845006942749,
1656
+ "learning_rate": 6.507692307692307e-05,
1657
+ "loss": 0.7862,
1658
+ "step": 940
1659
+ },
1660
+ {
1661
+ "epoch": 94.5,
1662
+ "grad_norm": 0.7966519594192505,
1663
+ "learning_rate": 6.45e-05,
1664
+ "loss": 0.8271,
1665
+ "step": 945
1666
+ },
1667
+ {
1668
+ "epoch": 95.0,
1669
+ "grad_norm": 1.6206731796264648,
1670
+ "learning_rate": 6.392307692307692e-05,
1671
+ "loss": 0.6312,
1672
+ "step": 950
1673
+ },
1674
+ {
1675
+ "epoch": 95.0,
1676
+ "eval_loss": 2.309884786605835,
1677
+ "eval_runtime": 1.062,
1678
+ "eval_samples_per_second": 22.599,
1679
+ "eval_steps_per_second": 0.942,
1680
+ "eval_wer": 0.9971830985915493,
1681
+ "step": 950
1682
+ },
1683
+ {
1684
+ "epoch": 95.5,
1685
+ "grad_norm": 3.240893602371216,
1686
+ "learning_rate": 6.334615384615385e-05,
1687
+ "loss": 0.723,
1688
+ "step": 955
1689
+ },
1690
+ {
1691
+ "epoch": 96.0,
1692
+ "grad_norm": 1.4926756620407104,
1693
+ "learning_rate": 6.276923076923076e-05,
1694
+ "loss": 0.7344,
1695
+ "step": 960
1696
+ },
1697
+ {
1698
+ "epoch": 96.5,
1699
+ "grad_norm": 0.8542086482048035,
1700
+ "learning_rate": 6.219230769230769e-05,
1701
+ "loss": 0.7649,
1702
+ "step": 965
1703
+ },
1704
+ {
1705
+ "epoch": 97.0,
1706
+ "grad_norm": 2.2014851570129395,
1707
+ "learning_rate": 6.161538461538461e-05,
1708
+ "loss": 0.6969,
1709
+ "step": 970
1710
+ },
1711
+ {
1712
+ "epoch": 97.5,
1713
+ "grad_norm": 0.6612327694892883,
1714
+ "learning_rate": 6.103846153846154e-05,
1715
+ "loss": 0.6336,
1716
+ "step": 975
1717
+ },
1718
+ {
1719
+ "epoch": 97.5,
1720
+ "eval_loss": 2.3546626567840576,
1721
+ "eval_runtime": 1.0484,
1722
+ "eval_samples_per_second": 22.893,
1723
+ "eval_steps_per_second": 0.954,
1724
+ "eval_wer": 0.9971830985915493,
1725
+ "step": 975
1726
+ },
1727
+ {
1728
+ "epoch": 98.0,
1729
+ "grad_norm": 2.117011547088623,
1730
+ "learning_rate": 6.0461538461538456e-05,
1731
+ "loss": 0.7537,
1732
+ "step": 980
1733
+ },
1734
+ {
1735
+ "epoch": 98.5,
1736
+ "grad_norm": 8.142460823059082,
1737
+ "learning_rate": 5.988461538461538e-05,
1738
+ "loss": 0.6593,
1739
+ "step": 985
1740
+ },
1741
+ {
1742
+ "epoch": 99.0,
1743
+ "grad_norm": 2.6468851566314697,
1744
+ "learning_rate": 5.93076923076923e-05,
1745
+ "loss": 0.8069,
1746
+ "step": 990
1747
+ },
1748
+ {
1749
+ "epoch": 99.5,
1750
+ "grad_norm": 1.392821192741394,
1751
+ "learning_rate": 5.8730769230769226e-05,
1752
+ "loss": 0.746,
1753
+ "step": 995
1754
+ },
1755
+ {
1756
+ "epoch": 100.0,
1757
+ "grad_norm": 2.0805888175964355,
1758
+ "learning_rate": 5.8153846153846145e-05,
1759
+ "loss": 0.9073,
1760
+ "step": 1000
1761
+ },
1762
+ {
1763
+ "epoch": 100.0,
1764
+ "eval_loss": 2.350856304168701,
1765
+ "eval_runtime": 1.0707,
1766
+ "eval_samples_per_second": 22.414,
1767
+ "eval_steps_per_second": 0.934,
1768
+ "eval_wer": 0.9943661971830986,
1769
+ "step": 1000
1770
+ },
1771
+ {
1772
+ "epoch": 100.5,
1773
+ "grad_norm": 18.686534881591797,
1774
+ "learning_rate": 5.757692307692307e-05,
1775
+ "loss": 0.7907,
1776
+ "step": 1005
1777
+ },
1778
+ {
1779
+ "epoch": 101.0,
1780
+ "grad_norm": 1.7688676118850708,
1781
+ "learning_rate": 5.6999999999999996e-05,
1782
+ "loss": 0.5693,
1783
+ "step": 1010
1784
+ },
1785
+ {
1786
+ "epoch": 101.5,
1787
+ "grad_norm": 0.9006216526031494,
1788
+ "learning_rate": 5.642307692307692e-05,
1789
+ "loss": 0.6408,
1790
+ "step": 1015
1791
+ },
1792
+ {
1793
+ "epoch": 102.0,
1794
+ "grad_norm": 2.382704496383667,
1795
+ "learning_rate": 5.584615384615384e-05,
1796
+ "loss": 0.7203,
1797
+ "step": 1020
1798
+ },
1799
+ {
1800
+ "epoch": 102.5,
1801
+ "grad_norm": 0.8852857351303101,
1802
+ "learning_rate": 5.526923076923076e-05,
1803
+ "loss": 0.6431,
1804
+ "step": 1025
1805
+ },
1806
+ {
1807
+ "epoch": 102.5,
1808
+ "eval_loss": 2.4202942848205566,
1809
+ "eval_runtime": 1.0529,
1810
+ "eval_samples_per_second": 22.794,
1811
+ "eval_steps_per_second": 0.95,
1812
+ "eval_wer": 1.0056338028169014,
1813
+ "step": 1025
1814
+ },
1815
+ {
1816
+ "epoch": 103.0,
1817
+ "grad_norm": 3.3610403537750244,
1818
+ "learning_rate": 5.4692307692307686e-05,
1819
+ "loss": 0.6476,
1820
+ "step": 1030
1821
+ },
1822
+ {
1823
+ "epoch": 103.5,
1824
+ "grad_norm": 0.8738270401954651,
1825
+ "learning_rate": 5.411538461538461e-05,
1826
+ "loss": 0.5492,
1827
+ "step": 1035
1828
+ },
1829
+ {
1830
+ "epoch": 104.0,
1831
+ "grad_norm": 2.4251339435577393,
1832
+ "learning_rate": 5.353846153846153e-05,
1833
+ "loss": 0.6005,
1834
+ "step": 1040
1835
+ },
1836
+ {
1837
+ "epoch": 104.5,
1838
+ "grad_norm": 0.7935536503791809,
1839
+ "learning_rate": 5.2961538461538456e-05,
1840
+ "loss": 0.5855,
1841
+ "step": 1045
1842
+ },
1843
+ {
1844
+ "epoch": 105.0,
1845
+ "grad_norm": 2.805385112762451,
1846
+ "learning_rate": 5.238461538461538e-05,
1847
+ "loss": 0.62,
1848
+ "step": 1050
1849
+ },
1850
+ {
1851
+ "epoch": 105.0,
1852
+ "eval_loss": 2.3933348655700684,
1853
+ "eval_runtime": 1.0674,
1854
+ "eval_samples_per_second": 22.485,
1855
+ "eval_steps_per_second": 0.937,
1856
+ "eval_wer": 0.9746478873239437,
1857
+ "step": 1050
1858
+ },
1859
+ {
1860
+ "epoch": 105.5,
1861
+ "grad_norm": 1.2249245643615723,
1862
+ "learning_rate": 5.180769230769231e-05,
1863
+ "loss": 0.652,
1864
+ "step": 1055
1865
+ },
1866
+ {
1867
+ "epoch": 106.0,
1868
+ "grad_norm": 1.2247533798217773,
1869
+ "learning_rate": 5.123076923076922e-05,
1870
+ "loss": 0.6108,
1871
+ "step": 1060
1872
+ },
1873
+ {
1874
+ "epoch": 106.5,
1875
+ "grad_norm": 0.8812918663024902,
1876
+ "learning_rate": 5.0653846153846146e-05,
1877
+ "loss": 0.6453,
1878
+ "step": 1065
1879
+ },
1880
+ {
1881
+ "epoch": 107.0,
1882
+ "grad_norm": 2.7638535499572754,
1883
+ "learning_rate": 5.007692307692307e-05,
1884
+ "loss": 0.568,
1885
+ "step": 1070
1886
+ },
1887
+ {
1888
+ "epoch": 107.5,
1889
+ "grad_norm": 1.3182368278503418,
1890
+ "learning_rate": 4.95e-05,
1891
+ "loss": 0.708,
1892
+ "step": 1075
1893
+ },
1894
+ {
1895
+ "epoch": 107.5,
1896
+ "eval_loss": 2.4381346702575684,
1897
+ "eval_runtime": 1.061,
1898
+ "eval_samples_per_second": 22.619,
1899
+ "eval_steps_per_second": 0.942,
1900
+ "eval_wer": 0.9690140845070423,
1901
+ "step": 1075
1902
+ },
1903
+ {
1904
+ "epoch": 108.0,
1905
+ "grad_norm": 2.4760406017303467,
1906
+ "learning_rate": 4.8923076923076916e-05,
1907
+ "loss": 0.6171,
1908
+ "step": 1080
1909
+ },
1910
+ {
1911
+ "epoch": 108.5,
1912
+ "grad_norm": 0.5409008264541626,
1913
+ "learning_rate": 4.834615384615384e-05,
1914
+ "loss": 0.5542,
1915
+ "step": 1085
1916
+ },
1917
+ {
1918
+ "epoch": 109.0,
1919
+ "grad_norm": 1.675410509109497,
1920
+ "learning_rate": 4.776923076923077e-05,
1921
+ "loss": 0.6491,
1922
+ "step": 1090
1923
+ },
1924
+ {
1925
+ "epoch": 109.5,
1926
+ "grad_norm": 0.8941754698753357,
1927
+ "learning_rate": 4.719230769230769e-05,
1928
+ "loss": 0.7266,
1929
+ "step": 1095
1930
+ },
1931
+ {
1932
+ "epoch": 110.0,
1933
+ "grad_norm": 1.9851211309432983,
1934
+ "learning_rate": 4.6615384615384605e-05,
1935
+ "loss": 0.6729,
1936
+ "step": 1100
1937
+ },
1938
+ {
1939
+ "epoch": 110.0,
1940
+ "eval_loss": 2.474308967590332,
1941
+ "eval_runtime": 1.0636,
1942
+ "eval_samples_per_second": 22.564,
1943
+ "eval_steps_per_second": 0.94,
1944
+ "eval_wer": 1.0,
1945
+ "step": 1100
1946
+ },
1947
+ {
1948
+ "epoch": 110.5,
1949
+ "grad_norm": 0.677306592464447,
1950
+ "learning_rate": 4.603846153846153e-05,
1951
+ "loss": 0.7625,
1952
+ "step": 1105
1953
+ },
1954
+ {
1955
+ "epoch": 111.0,
1956
+ "grad_norm": 2.572356700897217,
1957
+ "learning_rate": 4.546153846153846e-05,
1958
+ "loss": 0.5146,
1959
+ "step": 1110
1960
+ },
1961
+ {
1962
+ "epoch": 111.5,
1963
+ "grad_norm": 1.2789101600646973,
1964
+ "learning_rate": 4.488461538461538e-05,
1965
+ "loss": 0.5504,
1966
+ "step": 1115
1967
+ },
1968
+ {
1969
+ "epoch": 112.0,
1970
+ "grad_norm": 2.3920390605926514,
1971
+ "learning_rate": 4.430769230769231e-05,
1972
+ "loss": 0.4821,
1973
+ "step": 1120
1974
+ },
1975
+ {
1976
+ "epoch": 112.5,
1977
+ "grad_norm": 1.219436764717102,
1978
+ "learning_rate": 4.373076923076923e-05,
1979
+ "loss": 0.5779,
1980
+ "step": 1125
1981
+ },
1982
+ {
1983
+ "epoch": 112.5,
1984
+ "eval_loss": 2.492933988571167,
1985
+ "eval_runtime": 1.0274,
1986
+ "eval_samples_per_second": 23.36,
1987
+ "eval_steps_per_second": 0.973,
1988
+ "eval_wer": 0.9549295774647887,
1989
+ "step": 1125
1990
+ },
1991
+ {
1992
+ "epoch": 113.0,
1993
+ "grad_norm": 3.558155059814453,
1994
+ "learning_rate": 4.315384615384615e-05,
1995
+ "loss": 0.4743,
1996
+ "step": 1130
1997
+ },
1998
+ {
1999
+ "epoch": 113.5,
2000
+ "grad_norm": 0.9398171901702881,
2001
+ "learning_rate": 4.257692307692307e-05,
2002
+ "loss": 0.493,
2003
+ "step": 1135
2004
+ },
2005
+ {
2006
+ "epoch": 114.0,
2007
+ "grad_norm": 4.514529705047607,
2008
+ "learning_rate": 4.2e-05,
2009
+ "loss": 0.4341,
2010
+ "step": 1140
2011
+ },
2012
+ {
2013
+ "epoch": 114.5,
2014
+ "grad_norm": 1.015120029449463,
2015
+ "learning_rate": 4.142307692307692e-05,
2016
+ "loss": 0.5069,
2017
+ "step": 1145
2018
+ },
2019
+ {
2020
+ "epoch": 115.0,
2021
+ "grad_norm": 2.043063163757324,
2022
+ "learning_rate": 4.084615384615384e-05,
2023
+ "loss": 0.6303,
2024
+ "step": 1150
2025
+ },
2026
+ {
2027
+ "epoch": 115.0,
2028
+ "eval_loss": 2.5056331157684326,
2029
+ "eval_runtime": 1.0408,
2030
+ "eval_samples_per_second": 23.06,
2031
+ "eval_steps_per_second": 0.961,
2032
+ "eval_wer": 0.923943661971831,
2033
+ "step": 1150
2034
+ }
2035
+ ],
2036
+ "logging_steps": 5,
2037
+ "max_steps": 1500,
2038
+ "num_input_tokens_seen": 0,
2039
+ "num_train_epochs": 150,
2040
+ "save_steps": 25,
2041
+ "stateful_callbacks": {
2042
+ "TrainerControl": {
2043
+ "args": {
2044
+ "should_epoch_stop": false,
2045
+ "should_evaluate": false,
2046
+ "should_log": false,
2047
+ "should_save": true,
2048
+ "should_training_stop": false
2049
+ },
2050
+ "attributes": {}
2051
+ }
2052
+ },
2053
+ "total_flos": 4.771505223996499e+18,
2054
+ "train_batch_size": 16,
2055
+ "trial_name": null,
2056
+ "trial_params": null
2057
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d684a542fe2c2e932faa66172a7cc1f48ba500827ae1b3d29c9b64338152a672
3
+ size 5112
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"i": 0, "3": 1, "d": 2, "k": 3, "y": 5, "x": 6, "4": 7, "t": 8, "o": 9, "s": 10, "e": 11, "b": 12, "u": 13, "2": 14, "h": 15, "c": 16, "m": 17, "1": 18, "n": 19, "\u00f1": 20, "l": 21, "'": 22, "a": 23, "r": 24, "|": 4, "[UNK]": 25, "[PAD]": 26}