DrishtiSharma commited on
Commit
d74ba6c
1 Parent(s): 14d2638

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +9 -9
  2. eval_results.json +5 -5
  3. train_results.json +4 -4
  4. trainer_state.json +108 -108
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 1.2441085577011108,
4
- "eval_runtime": 12.8952,
5
  "eval_samples": 360,
6
- "eval_samples_per_second": 27.917,
7
- "eval_steps_per_second": 3.49,
8
- "eval_wer": 0.5350842806742454,
9
- "train_loss": 0.7377449184876901,
10
- "train_runtime": 5722.2661,
11
  "train_samples": 856,
12
- "train_samples_per_second": 14.959,
13
- "train_steps_per_second": 0.944
14
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 1.1507658958435059,
4
+ "eval_runtime": 14.1834,
5
  "eval_samples": 360,
6
+ "eval_samples_per_second": 25.382,
7
+ "eval_steps_per_second": 3.173,
8
+ "eval_wer": 0.49078792630341045,
9
+ "train_loss": 0.8475217450106586,
10
+ "train_runtime": 6078.829,
11
  "train_samples": 856,
12
+ "train_samples_per_second": 14.082,
13
+ "train_steps_per_second": 0.888
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 1.2441085577011108,
4
- "eval_runtime": 12.8952,
5
  "eval_samples": 360,
6
- "eval_samples_per_second": 27.917,
7
- "eval_steps_per_second": 3.49,
8
- "eval_wer": 0.5350842806742454
9
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 1.1507658958435059,
4
+ "eval_runtime": 14.1834,
5
  "eval_samples": 360,
6
+ "eval_samples_per_second": 25.382,
7
+ "eval_steps_per_second": 3.173,
8
+ "eval_wer": 0.49078792630341045
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 0.7377449184876901,
4
- "train_runtime": 5722.2661,
5
  "train_samples": 856,
6
- "train_samples_per_second": 14.959,
7
- "train_steps_per_second": 0.944
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 0.8475217450106586,
4
+ "train_runtime": 6078.829,
5
  "train_samples": 856,
6
+ "train_samples_per_second": 14.082,
7
+ "train_steps_per_second": 0.888
8
  }
trainer_state.json CHANGED
@@ -9,264 +9,264 @@
9
  "log_history": [
10
  {
11
  "epoch": 3.7,
12
- "learning_rate": 5.909999999999999e-05,
13
- "loss": 11.4212,
14
  "step": 200
15
  },
16
  {
17
  "epoch": 7.41,
18
- "learning_rate": 0.0001191,
19
- "loss": 3.3931,
20
  "step": 400
21
  },
22
  {
23
  "epoch": 9.26,
24
- "eval_loss": 2.6012766361236572,
25
- "eval_runtime": 13.0763,
26
- "eval_samples_per_second": 27.531,
27
- "eval_steps_per_second": 3.441,
28
- "eval_wer": 0.991375931007448,
29
  "step": 500
30
  },
31
  {
32
  "epoch": 11.11,
33
- "learning_rate": 0.0001791,
34
- "loss": 2.2716,
35
  "step": 600
36
  },
37
  {
38
  "epoch": 14.81,
39
- "learning_rate": 0.00023909999999999998,
40
- "loss": 0.5568,
41
  "step": 800
42
  },
43
  {
44
  "epoch": 18.52,
45
- "learning_rate": 0.0002991,
46
- "loss": 0.3457,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 18.52,
51
- "eval_loss": 1.0315884351730347,
52
- "eval_runtime": 12.6921,
53
- "eval_samples_per_second": 28.364,
54
- "eval_steps_per_second": 3.546,
55
- "eval_wer": 0.6452371618972952,
56
  "step": 1000
57
  },
58
  {
59
  "epoch": 22.22,
60
- "learning_rate": 0.00032255581395348835,
61
- "loss": 0.2708,
62
  "step": 1200
63
  },
64
  {
65
  "epoch": 25.93,
66
- "learning_rate": 0.00030720697674418605,
67
- "loss": 0.2118,
68
  "step": 1400
69
  },
70
  {
71
  "epoch": 27.78,
72
- "eval_loss": 1.1182368993759155,
73
- "eval_runtime": 12.9665,
74
- "eval_samples_per_second": 27.764,
75
- "eval_steps_per_second": 3.47,
76
- "eval_wer": 0.6354370834966679,
77
  "step": 1500
78
  },
79
  {
80
  "epoch": 29.63,
81
- "learning_rate": 0.00029185813953488374,
82
- "loss": 0.1765,
83
  "step": 1600
84
  },
85
  {
86
  "epoch": 33.33,
87
- "learning_rate": 0.0002765093023255814,
88
- "loss": 0.1404,
89
  "step": 1800
90
  },
91
  {
92
  "epoch": 37.04,
93
- "learning_rate": 0.00026116046511627907,
94
- "loss": 0.1245,
95
  "step": 2000
96
  },
97
  {
98
  "epoch": 37.04,
99
- "eval_loss": 1.2044304609298706,
100
- "eval_runtime": 12.7752,
101
- "eval_samples_per_second": 28.18,
102
- "eval_steps_per_second": 3.522,
103
- "eval_wer": 0.6295570364562917,
104
  "step": 2000
105
  },
106
  {
107
  "epoch": 40.74,
108
- "learning_rate": 0.00024581162790697676,
109
- "loss": 0.1166,
110
  "step": 2200
111
  },
112
  {
113
  "epoch": 44.44,
114
- "learning_rate": 0.00023046279069767443,
115
- "loss": 0.1068,
116
  "step": 2400
117
  },
118
  {
119
  "epoch": 46.3,
120
- "eval_loss": 1.1400295495986938,
121
- "eval_runtime": 12.7392,
122
- "eval_samples_per_second": 28.259,
123
- "eval_steps_per_second": 3.532,
124
- "eval_wer": 0.582124656997256,
125
  "step": 2500
126
  },
127
  {
128
  "epoch": 48.15,
129
- "learning_rate": 0.0002151139534883721,
130
- "loss": 0.0917,
131
  "step": 2600
132
  },
133
  {
134
  "epoch": 51.85,
135
- "learning_rate": 0.00019976511627906976,
136
- "loss": 0.0842,
137
  "step": 2800
138
  },
139
  {
140
  "epoch": 55.56,
141
- "learning_rate": 0.00018441627906976745,
142
- "loss": 0.0806,
143
  "step": 3000
144
  },
145
  {
146
  "epoch": 55.56,
147
- "eval_loss": 1.2010185718536377,
148
- "eval_runtime": 12.8794,
149
- "eval_samples_per_second": 27.952,
150
- "eval_steps_per_second": 3.494,
151
- "eval_wer": 0.5785966287730302,
152
  "step": 3000
153
  },
154
  {
155
  "epoch": 59.26,
156
- "learning_rate": 0.00016906744186046509,
157
- "loss": 0.0719,
158
  "step": 3200
159
  },
160
  {
161
  "epoch": 62.96,
162
- "learning_rate": 0.00015371860465116278,
163
- "loss": 0.0679,
164
  "step": 3400
165
  },
166
  {
167
  "epoch": 64.81,
168
- "eval_loss": 1.1644504070281982,
169
- "eval_runtime": 12.9878,
170
- "eval_samples_per_second": 27.718,
171
- "eval_steps_per_second": 3.465,
172
- "eval_wer": 0.546452371618973,
173
  "step": 3500
174
  },
175
  {
176
  "epoch": 66.67,
177
- "learning_rate": 0.00013836976744186047,
178
- "loss": 0.0582,
179
  "step": 3600
180
  },
181
  {
182
  "epoch": 70.37,
183
- "learning_rate": 0.00012302093023255814,
184
- "loss": 0.0566,
185
  "step": 3800
186
  },
187
  {
188
  "epoch": 74.07,
189
- "learning_rate": 0.00010767209302325581,
190
- "loss": 0.0506,
191
  "step": 4000
192
  },
193
  {
194
  "epoch": 74.07,
195
- "eval_loss": 1.2748003005981445,
196
- "eval_runtime": 12.9412,
197
- "eval_samples_per_second": 27.818,
198
- "eval_steps_per_second": 3.477,
199
- "eval_wer": 0.5507644061152489,
200
  "step": 4000
201
  },
202
  {
203
  "epoch": 77.78,
204
- "learning_rate": 9.232325581395348e-05,
205
- "loss": 0.0443,
206
  "step": 4200
207
  },
208
  {
209
  "epoch": 81.48,
210
- "learning_rate": 7.697441860465116e-05,
211
- "loss": 0.0397,
212
  "step": 4400
213
  },
214
  {
215
  "epoch": 83.33,
216
- "eval_loss": 1.1869040727615356,
217
- "eval_runtime": 12.9633,
218
- "eval_samples_per_second": 27.771,
219
- "eval_steps_per_second": 3.471,
220
- "eval_wer": 0.5393963151705213,
221
  "step": 4500
222
  },
223
  {
224
  "epoch": 85.19,
225
- "learning_rate": 6.162558139534884e-05,
226
- "loss": 0.0353,
227
  "step": 4600
228
  },
229
  {
230
  "epoch": 88.89,
231
- "learning_rate": 4.6276744186046515e-05,
232
- "loss": 0.0282,
233
  "step": 4800
234
  },
235
  {
236
  "epoch": 92.59,
237
- "learning_rate": 3.092790697674419e-05,
238
- "loss": 0.0276,
239
  "step": 5000
240
  },
241
  {
242
  "epoch": 92.59,
243
- "eval_loss": 1.2239253520965576,
244
- "eval_runtime": 12.8775,
245
- "eval_samples_per_second": 27.956,
246
- "eval_steps_per_second": 3.494,
247
- "eval_wer": 0.5390043120344963,
248
  "step": 5000
249
  },
250
  {
251
  "epoch": 96.3,
252
- "learning_rate": 1.5579069767441862e-05,
253
- "loss": 0.0247,
254
  "step": 5200
255
  },
256
  {
257
  "epoch": 100.0,
258
- "learning_rate": 2.302325581395349e-07,
259
- "loss": 0.0218,
260
  "step": 5400
261
  },
262
  {
263
  "epoch": 100.0,
264
  "step": 5400,
265
  "total_flos": 1.3559072525710848e+19,
266
- "train_loss": 0.7377449184876901,
267
- "train_runtime": 5722.2661,
268
- "train_samples_per_second": 14.959,
269
- "train_steps_per_second": 0.944
270
  }
271
  ],
272
  "max_steps": 5400,
 
9
  "log_history": [
10
  {
11
  "epoch": 3.7,
12
+ "learning_rate": 3.9399999999999995e-05,
13
+ "loss": 12.8577,
14
  "step": 200
15
  },
16
  {
17
  "epoch": 7.41,
18
+ "learning_rate": 7.939999999999999e-05,
19
+ "loss": 3.5841,
20
  "step": 400
21
  },
22
  {
23
  "epoch": 9.26,
24
+ "eval_loss": 3.2514328956604004,
25
+ "eval_runtime": 14.446,
26
+ "eval_samples_per_second": 24.92,
27
+ "eval_steps_per_second": 3.115,
28
+ "eval_wer": 0.9941199529596236,
29
  "step": 500
30
  },
31
  {
32
  "epoch": 11.11,
33
+ "learning_rate": 0.0001194,
34
+ "loss": 3.0775,
35
  "step": 600
36
  },
37
  {
38
  "epoch": 14.81,
39
+ "learning_rate": 0.00015939999999999997,
40
+ "loss": 0.8996,
41
  "step": 800
42
  },
43
  {
44
  "epoch": 18.52,
45
+ "learning_rate": 0.00019939999999999997,
46
+ "loss": 0.3992,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 18.52,
51
+ "eval_loss": 0.8790363073348999,
52
+ "eval_runtime": 14.0346,
53
+ "eval_samples_per_second": 25.651,
54
+ "eval_steps_per_second": 3.206,
55
+ "eval_wer": 0.6107408859270874,
56
  "step": 1000
57
  },
58
  {
59
  "epoch": 22.22,
60
+ "learning_rate": 0.0002394,
61
+ "loss": 0.2879,
62
  "step": 1200
63
  },
64
  {
65
  "epoch": 25.93,
66
+ "learning_rate": 0.00027939999999999996,
67
+ "loss": 0.2409,
68
  "step": 1400
69
  },
70
  {
71
  "epoch": 27.78,
72
+ "eval_loss": 1.0011698007583618,
73
+ "eval_runtime": 14.3849,
74
+ "eval_samples_per_second": 25.026,
75
+ "eval_steps_per_second": 3.128,
76
+ "eval_wer": 0.6366130929047432,
77
  "step": 1500
78
  },
79
  {
80
  "epoch": 29.63,
81
+ "learning_rate": 0.0002925384615384615,
82
+ "loss": 0.2132,
83
  "step": 1600
84
  },
85
  {
86
  "epoch": 33.33,
87
+ "learning_rate": 0.00027715384615384614,
88
+ "loss": 0.1729,
89
  "step": 1800
90
  },
91
  {
92
  "epoch": 37.04,
93
+ "learning_rate": 0.00026176923076923073,
94
+ "loss": 0.1447,
95
  "step": 2000
96
  },
97
  {
98
  "epoch": 37.04,
99
+ "eval_loss": 1.0167392492294312,
100
+ "eval_runtime": 14.4093,
101
+ "eval_samples_per_second": 24.984,
102
+ "eval_steps_per_second": 3.123,
103
+ "eval_wer": 0.6275970207761662,
104
  "step": 2000
105
  },
106
  {
107
  "epoch": 40.74,
108
+ "learning_rate": 0.0002463846153846153,
109
+ "loss": 0.1311,
110
  "step": 2200
111
  },
112
  {
113
  "epoch": 44.44,
114
+ "learning_rate": 0.00023099999999999998,
115
+ "loss": 0.1109,
116
  "step": 2400
117
  },
118
  {
119
  "epoch": 46.3,
120
+ "eval_loss": 1.0637701749801636,
121
+ "eval_runtime": 13.6702,
122
+ "eval_samples_per_second": 26.335,
123
+ "eval_steps_per_second": 3.292,
124
+ "eval_wer": 0.5652685221481771,
125
  "step": 2500
126
  },
127
  {
128
  "epoch": 48.15,
129
+ "learning_rate": 0.0002156153846153846,
130
+ "loss": 0.0968,
131
  "step": 2600
132
  },
133
  {
134
  "epoch": 51.85,
135
+ "learning_rate": 0.00020023076923076922,
136
+ "loss": 0.0865,
137
  "step": 2800
138
  },
139
  {
140
  "epoch": 55.56,
141
+ "learning_rate": 0.00018484615384615385,
142
+ "loss": 0.0797,
143
  "step": 3000
144
  },
145
  {
146
  "epoch": 55.56,
147
+ "eval_loss": 1.1447213888168335,
148
+ "eval_runtime": 13.6804,
149
+ "eval_samples_per_second": 26.315,
150
+ "eval_steps_per_second": 3.289,
151
+ "eval_wer": 0.5715405723245786,
152
  "step": 3000
153
  },
154
  {
155
  "epoch": 59.26,
156
+ "learning_rate": 0.00016946153846153844,
157
+ "loss": 0.0728,
158
  "step": 3200
159
  },
160
  {
161
  "epoch": 62.96,
162
+ "learning_rate": 0.00015407692307692306,
163
+ "loss": 0.0636,
164
  "step": 3400
165
  },
166
  {
167
  "epoch": 64.81,
168
+ "eval_loss": 1.1502803564071655,
169
+ "eval_runtime": 14.7332,
170
+ "eval_samples_per_second": 24.435,
171
+ "eval_steps_per_second": 3.054,
172
+ "eval_wer": 0.5315562524500196,
173
  "step": 3500
174
  },
175
  {
176
  "epoch": 66.67,
177
+ "learning_rate": 0.0001386923076923077,
178
+ "loss": 0.0604,
179
  "step": 3600
180
  },
181
  {
182
  "epoch": 70.37,
183
+ "learning_rate": 0.0001233076923076923,
184
+ "loss": 0.0499,
185
  "step": 3800
186
  },
187
  {
188
  "epoch": 74.07,
189
+ "learning_rate": 0.0001079230769230769,
190
+ "loss": 0.0466,
191
  "step": 4000
192
  },
193
  {
194
  "epoch": 74.07,
195
+ "eval_loss": 1.2227113246917725,
196
+ "eval_runtime": 14.6209,
197
+ "eval_samples_per_second": 24.622,
198
+ "eval_steps_per_second": 3.078,
199
+ "eval_wer": 0.5386123088984712,
200
  "step": 4000
201
  },
202
  {
203
  "epoch": 77.78,
204
+ "learning_rate": 9.253846153846153e-05,
205
+ "loss": 0.0426,
206
  "step": 4200
207
  },
208
  {
209
  "epoch": 81.48,
210
+ "learning_rate": 7.715384615384615e-05,
211
+ "loss": 0.0372,
212
  "step": 4400
213
  },
214
  {
215
  "epoch": 83.33,
216
+ "eval_loss": 1.121378779411316,
217
+ "eval_runtime": 13.6605,
218
+ "eval_samples_per_second": 26.353,
219
+ "eval_steps_per_second": 3.294,
220
+ "eval_wer": 0.5225401803214426,
221
  "step": 4500
222
  },
223
  {
224
  "epoch": 85.19,
225
+ "learning_rate": 6.176923076923076e-05,
226
+ "loss": 0.0349,
227
  "step": 4600
228
  },
229
  {
230
  "epoch": 88.89,
231
+ "learning_rate": 4.6384615384615385e-05,
232
+ "loss": 0.029,
233
  "step": 4800
234
  },
235
  {
236
  "epoch": 92.59,
237
+ "learning_rate": 3.0999999999999995e-05,
238
+ "loss": 0.0239,
239
  "step": 5000
240
  },
241
  {
242
  "epoch": 92.59,
243
+ "eval_loss": 1.1375247240066528,
244
+ "eval_runtime": 14.3296,
245
+ "eval_samples_per_second": 25.123,
246
+ "eval_steps_per_second": 3.14,
247
+ "eval_wer": 0.4998039984319875,
248
  "step": 5000
249
  },
250
  {
251
  "epoch": 96.3,
252
+ "learning_rate": 1.5615384615384614e-05,
253
+ "loss": 0.0208,
254
  "step": 5200
255
  },
256
  {
257
  "epoch": 100.0,
258
+ "learning_rate": 2.3076923076923075e-07,
259
+ "loss": 0.0188,
260
  "step": 5400
261
  },
262
  {
263
  "epoch": 100.0,
264
  "step": 5400,
265
  "total_flos": 1.3559072525710848e+19,
266
+ "train_loss": 0.8475217450106586,
267
+ "train_runtime": 6078.829,
268
+ "train_samples_per_second": 14.082,
269
+ "train_steps_per_second": 0.888
270
  }
271
  ],
272
  "max_steps": 5400,