slplab commited on
Commit
5fa2549
1 Parent(s): a88148e

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +236 -0
trainer_state.json ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.4367816091954024,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 2.7599999999999997e-05,
13
+ "loss": 10.9066,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 0.14,
18
+ "learning_rate": 5.6399999999999995e-05,
19
+ "loss": 7.3263,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "eval_cer": 0.9998702550415184,
25
+ "eval_loss": 5.000309467315674,
26
+ "eval_runtime": 31.7393,
27
+ "eval_samples_per_second": 14.399,
28
+ "eval_steps_per_second": 0.914,
29
+ "eval_wer": 1.0,
30
+ "step": 100
31
+ },
32
+ {
33
+ "epoch": 0.22,
34
+ "learning_rate": 8.639999999999999e-05,
35
+ "loss": 3.4967,
36
+ "step": 150
37
+ },
38
+ {
39
+ "epoch": 0.29,
40
+ "learning_rate": 0.0001164,
41
+ "loss": 3.1464,
42
+ "step": 200
43
+ },
44
+ {
45
+ "epoch": 0.29,
46
+ "eval_cer": 0.9998702550415184,
47
+ "eval_loss": 3.5314910411834717,
48
+ "eval_runtime": 31.3912,
49
+ "eval_samples_per_second": 14.558,
50
+ "eval_steps_per_second": 0.924,
51
+ "eval_wer": 1.0,
52
+ "step": 200
53
+ },
54
+ {
55
+ "epoch": 0.36,
56
+ "learning_rate": 0.00014639999999999998,
57
+ "loss": 3.1238,
58
+ "step": 250
59
+ },
60
+ {
61
+ "epoch": 0.43,
62
+ "learning_rate": 0.00017639999999999998,
63
+ "loss": 3.1152,
64
+ "step": 300
65
+ },
66
+ {
67
+ "epoch": 0.43,
68
+ "eval_cer": 0.9998702550415184,
69
+ "eval_loss": 3.6040682792663574,
70
+ "eval_runtime": 31.267,
71
+ "eval_samples_per_second": 14.616,
72
+ "eval_steps_per_second": 0.927,
73
+ "eval_wer": 1.0,
74
+ "step": 300
75
+ },
76
+ {
77
+ "epoch": 0.5,
78
+ "learning_rate": 0.00020639999999999998,
79
+ "loss": 3.088,
80
+ "step": 350
81
+ },
82
+ {
83
+ "epoch": 0.57,
84
+ "learning_rate": 0.0002364,
85
+ "loss": 3.0626,
86
+ "step": 400
87
+ },
88
+ {
89
+ "epoch": 0.57,
90
+ "eval_cer": 0.9743290332147093,
91
+ "eval_loss": 3.385751247406006,
92
+ "eval_runtime": 31.1188,
93
+ "eval_samples_per_second": 14.686,
94
+ "eval_steps_per_second": 0.932,
95
+ "eval_wer": 0.9992471013401596,
96
+ "step": 400
97
+ },
98
+ {
99
+ "epoch": 0.65,
100
+ "learning_rate": 0.00026639999999999997,
101
+ "loss": 3.032,
102
+ "step": 450
103
+ },
104
+ {
105
+ "epoch": 0.72,
106
+ "learning_rate": 0.0002964,
107
+ "loss": 2.3387,
108
+ "step": 500
109
+ },
110
+ {
111
+ "epoch": 0.72,
112
+ "eval_cer": 0.3497553380782918,
113
+ "eval_loss": 1.502172589302063,
114
+ "eval_runtime": 31.3947,
115
+ "eval_samples_per_second": 14.557,
116
+ "eval_steps_per_second": 0.924,
117
+ "eval_wer": 1.0904984189128144,
118
+ "step": 500
119
+ },
120
+ {
121
+ "epoch": 0.79,
122
+ "learning_rate": 0.00029168765743073046,
123
+ "loss": 1.1185,
124
+ "step": 550
125
+ },
126
+ {
127
+ "epoch": 0.86,
128
+ "learning_rate": 0.0002822418136020151,
129
+ "loss": 0.7737,
130
+ "step": 600
131
+ },
132
+ {
133
+ "epoch": 0.86,
134
+ "eval_cer": 0.21517274614472123,
135
+ "eval_loss": 0.6938613057136536,
136
+ "eval_runtime": 31.1722,
137
+ "eval_samples_per_second": 14.661,
138
+ "eval_steps_per_second": 0.93,
139
+ "eval_wer": 0.872910706218943,
140
+ "step": 600
141
+ },
142
+ {
143
+ "epoch": 0.93,
144
+ "learning_rate": 0.00027279596977329974,
145
+ "loss": 0.647,
146
+ "step": 650
147
+ },
148
+ {
149
+ "epoch": 1.01,
150
+ "learning_rate": 0.00026335012594458433,
151
+ "loss": 0.5643,
152
+ "step": 700
153
+ },
154
+ {
155
+ "epoch": 1.01,
156
+ "eval_cer": 0.15467452550415184,
157
+ "eval_loss": 0.4780799448490143,
158
+ "eval_runtime": 30.8612,
159
+ "eval_samples_per_second": 14.808,
160
+ "eval_steps_per_second": 0.94,
161
+ "eval_wer": 0.7375395271796417,
162
+ "step": 700
163
+ },
164
+ {
165
+ "epoch": 1.08,
166
+ "learning_rate": 0.00025390428211586897,
167
+ "loss": 0.5188,
168
+ "step": 750
169
+ },
170
+ {
171
+ "epoch": 1.15,
172
+ "learning_rate": 0.0002444584382871536,
173
+ "loss": 0.4762,
174
+ "step": 800
175
+ },
176
+ {
177
+ "epoch": 1.15,
178
+ "eval_cer": 0.1396055753262159,
179
+ "eval_loss": 0.41990911960601807,
180
+ "eval_runtime": 31.306,
181
+ "eval_samples_per_second": 14.598,
182
+ "eval_steps_per_second": 0.926,
183
+ "eval_wer": 0.6905586508056015,
184
+ "step": 800
185
+ },
186
+ {
187
+ "epoch": 1.22,
188
+ "learning_rate": 0.00023501259445843828,
189
+ "loss": 0.4614,
190
+ "step": 850
191
+ },
192
+ {
193
+ "epoch": 1.29,
194
+ "learning_rate": 0.0002255667506297229,
195
+ "loss": 0.4371,
196
+ "step": 900
197
+ },
198
+ {
199
+ "epoch": 1.29,
200
+ "eval_cer": 0.12713152431791222,
201
+ "eval_loss": 0.382140189409256,
202
+ "eval_runtime": 31.4276,
203
+ "eval_samples_per_second": 14.541,
204
+ "eval_steps_per_second": 0.923,
205
+ "eval_wer": 0.6390603824725192,
206
+ "step": 900
207
+ },
208
+ {
209
+ "epoch": 1.36,
210
+ "learning_rate": 0.00021612090680100753,
211
+ "loss": 0.4259,
212
+ "step": 950
213
+ },
214
+ {
215
+ "epoch": 1.44,
216
+ "learning_rate": 0.00020667506297229217,
217
+ "loss": 0.4138,
218
+ "step": 1000
219
+ },
220
+ {
221
+ "epoch": 1.44,
222
+ "eval_cer": 0.12171930604982206,
223
+ "eval_loss": 0.36281564831733704,
224
+ "eval_runtime": 31.2629,
225
+ "eval_samples_per_second": 14.618,
226
+ "eval_steps_per_second": 0.928,
227
+ "eval_wer": 0.6143653064297545,
228
+ "step": 1000
229
+ }
230
+ ],
231
+ "max_steps": 2088,
232
+ "num_train_epochs": 3,
233
+ "total_flos": 8.198176189032398e+18,
234
+ "trial_name": null,
235
+ "trial_params": null
236
+ }