noflm commited on
Commit
578f0e2
1 Parent(s): ea7fd5b

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 0.46010294556617737,
4
- "eval_runtime": 1652.1311,
5
- "eval_samples_per_second": 2.787,
6
- "eval_steps_per_second": 0.349,
7
- "eval_wer": 22.51334731203637,
8
- "train_loss": 0.5294164781570434,
9
- "train_runtime": 5156.396,
10
- "train_samples_per_second": 3.103,
11
- "train_steps_per_second": 0.194
12
  }
 
1
  {
2
+ "epoch": 35.02,
3
+ "eval_loss": 0.6532349586486816,
4
+ "eval_runtime": 1529.5324,
5
+ "eval_samples_per_second": 3.01,
6
+ "eval_steps_per_second": 0.188,
7
+ "eval_wer": 21.991788980318223,
8
+ "train_loss": 0.06371040197610855,
9
+ "train_runtime": 79589.3253,
10
+ "train_samples_per_second": 4.021,
11
+ "train_steps_per_second": 0.126
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 0.46010294556617737,
4
- "eval_runtime": 1652.1311,
5
- "eval_samples_per_second": 2.787,
6
- "eval_steps_per_second": 0.349,
7
- "eval_wer": 22.51334731203637
8
  }
 
1
  {
2
+ "epoch": 35.02,
3
+ "eval_loss": 0.6532349586486816,
4
+ "eval_runtime": 1529.5324,
5
+ "eval_samples_per_second": 3.01,
6
+ "eval_steps_per_second": 0.188,
7
+ "eval_wer": 21.991788980318223
8
  }
runs/Dec27_09-55-14_srv/events.out.tfevents.1672216184.srv.1172111.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376b9b587745cc14a971964a87d273e1eebf96fb6ea5d415eff9f0f00395a7a5
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.5294164781570434,
4
- "train_runtime": 5156.396,
5
- "train_samples_per_second": 3.103,
6
- "train_steps_per_second": 0.194
7
  }
 
1
  {
2
+ "epoch": 35.02,
3
+ "train_loss": 0.06371040197610855,
4
+ "train_runtime": 79589.3253,
5
+ "train_samples_per_second": 4.021,
6
+ "train_steps_per_second": 0.126
7
  }
trainer_state.json CHANGED
@@ -1,274 +1,235 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.03,
12
- "learning_rate": 4.800000000000001e-06,
13
- "loss": 0.7069,
14
- "step": 25
15
- },
16
- {
17
- "epoch": 0.05,
18
- "learning_rate": 9.800000000000001e-06,
19
- "loss": 0.5962,
20
- "step": 50
21
- },
22
- {
23
- "epoch": 0.07,
24
- "learning_rate": 1e-05,
25
- "loss": 0.4928,
26
- "step": 75
27
- },
28
- {
29
- "epoch": 0.1,
30
- "learning_rate": 1e-05,
31
- "loss": 0.5115,
32
- "step": 100
33
- },
34
- {
35
- "epoch": 0.12,
36
- "learning_rate": 1e-05,
37
- "loss": 0.4646,
38
- "step": 125
39
- },
40
- {
41
- "epoch": 0.15,
42
- "learning_rate": 1e-05,
43
- "loss": 0.5159,
44
- "step": 150
45
- },
46
- {
47
- "epoch": 0.17,
48
- "learning_rate": 1e-05,
49
- "loss": 0.4915,
50
- "step": 175
51
- },
52
- {
53
- "epoch": 0.2,
54
- "learning_rate": 1e-05,
55
- "loss": 0.4671,
56
- "step": 200
57
- },
58
- {
59
- "epoch": 0.23,
60
- "learning_rate": 1e-05,
61
- "loss": 0.4107,
62
- "step": 225
63
- },
64
- {
65
- "epoch": 0.25,
66
- "learning_rate": 1e-05,
67
- "loss": 0.4238,
68
- "step": 250
69
  },
70
  {
71
- "epoch": 0.28,
72
  "learning_rate": 1e-05,
73
- "loss": 0.4253,
74
- "step": 275
75
  },
76
  {
77
- "epoch": 0.3,
78
- "learning_rate": 1e-05,
79
- "loss": 0.4236,
80
- "step": 300
 
 
 
81
  },
82
  {
83
- "epoch": 0.33,
84
  "learning_rate": 1e-05,
85
- "loss": 0.3871,
86
- "step": 325
87
  },
88
  {
89
- "epoch": 0.35,
90
  "learning_rate": 1e-05,
91
- "loss": 0.4001,
92
- "step": 350
93
  },
94
  {
95
- "epoch": 0.38,
96
- "learning_rate": 1e-05,
97
- "loss": 0.3982,
98
- "step": 375
 
 
 
99
  },
100
  {
101
- "epoch": 0.4,
102
  "learning_rate": 1e-05,
103
- "loss": 0.3953,
104
- "step": 400
105
  },
106
  {
107
- "epoch": 0.42,
108
  "learning_rate": 1e-05,
109
- "loss": 0.4469,
110
- "step": 425
111
  },
112
  {
113
- "epoch": 0.45,
114
- "learning_rate": 1e-05,
115
- "loss": 0.4613,
116
- "step": 450
 
 
 
117
  },
118
  {
119
- "epoch": 0.47,
120
  "learning_rate": 1e-05,
121
- "loss": 0.5581,
122
- "step": 475
123
  },
124
  {
125
- "epoch": 0.5,
126
  "learning_rate": 1e-05,
127
- "loss": 0.5489,
128
- "step": 500
129
  },
130
  {
131
- "epoch": 0.53,
132
- "learning_rate": 1e-05,
133
- "loss": 0.5243,
134
- "step": 525
 
 
 
135
  },
136
  {
137
- "epoch": 0.55,
138
  "learning_rate": 1e-05,
139
- "loss": 0.5691,
140
- "step": 550
141
  },
142
  {
143
- "epoch": 0.57,
144
  "learning_rate": 1e-05,
145
- "loss": 0.5907,
146
- "step": 575
147
  },
148
  {
149
- "epoch": 0.6,
150
- "learning_rate": 1e-05,
151
- "loss": 0.5893,
152
- "step": 600
 
 
 
153
  },
154
  {
155
- "epoch": 0.62,
156
  "learning_rate": 1e-05,
157
- "loss": 0.6474,
158
- "step": 625
159
  },
160
  {
161
- "epoch": 0.65,
162
  "learning_rate": 1e-05,
163
- "loss": 0.6241,
164
- "step": 650
165
  },
166
  {
167
- "epoch": 0.68,
168
- "learning_rate": 1e-05,
169
- "loss": 0.6801,
170
- "step": 675
 
 
 
171
  },
172
  {
173
- "epoch": 0.7,
174
  "learning_rate": 1e-05,
175
- "loss": 0.6744,
176
- "step": 700
177
  },
178
  {
179
- "epoch": 0.72,
180
  "learning_rate": 1e-05,
181
- "loss": 0.6525,
182
- "step": 725
183
  },
184
  {
185
- "epoch": 0.75,
186
- "learning_rate": 1e-05,
187
- "loss": 0.6215,
188
- "step": 750
189
- },
190
- {
191
- "epoch": 0.78,
192
- "learning_rate": 1e-05,
193
- "loss": 0.6284,
194
- "step": 775
195
  },
196
  {
197
- "epoch": 0.8,
198
  "learning_rate": 1e-05,
199
- "loss": 0.5778,
200
- "step": 800
201
  },
202
  {
203
- "epoch": 0.82,
204
  "learning_rate": 1e-05,
205
- "loss": 0.6221,
206
- "step": 825
207
  },
208
  {
209
- "epoch": 0.85,
210
- "learning_rate": 1e-05,
211
- "loss": 0.5919,
212
- "step": 850
 
 
 
213
  },
214
  {
215
- "epoch": 0.88,
216
  "learning_rate": 1e-05,
217
- "loss": 0.4808,
218
- "step": 875
219
  },
220
  {
221
- "epoch": 0.9,
222
  "learning_rate": 1e-05,
223
- "loss": 0.5497,
224
- "step": 900
225
  },
226
  {
227
- "epoch": 0.93,
228
- "learning_rate": 1e-05,
229
- "loss": 0.4979,
230
- "step": 925
 
 
 
231
  },
232
  {
233
- "epoch": 0.95,
234
  "learning_rate": 1e-05,
235
- "loss": 0.4988,
236
- "step": 950
237
  },
238
  {
239
- "epoch": 0.97,
240
  "learning_rate": 1e-05,
241
- "loss": 0.528,
242
- "step": 975
243
  },
244
  {
245
- "epoch": 1.0,
246
- "learning_rate": 1e-05,
247
- "loss": 0.5019,
248
- "step": 1000
249
- },
250
- {
251
- "epoch": 1.0,
252
- "eval_loss": 0.46010294556617737,
253
- "eval_runtime": 1657.4757,
254
- "eval_samples_per_second": 2.778,
255
- "eval_steps_per_second": 0.348,
256
- "eval_wer": 22.51334731203637,
257
- "step": 1000
258
  },
259
  {
260
- "epoch": 1.0,
261
- "step": 1000,
262
- "total_flos": 5.1887996928e+17,
263
- "train_loss": 0.5294164781570434,
264
- "train_runtime": 5156.396,
265
- "train_samples_per_second": 3.103,
266
- "train_steps_per_second": 0.194
267
  }
268
  ],
269
- "max_steps": 1000,
270
  "num_train_epochs": 9223372036854775807,
271
- "total_flos": 5.1887996928e+17,
272
  "trial_name": null,
273
  "trial_params": null
274
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 35.0165,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.02,
12
+ "learning_rate": 9.960000000000001e-06,
13
+ "loss": 0.5615,
14
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
+ "epoch": 3.02,
18
  "learning_rate": 1e-05,
19
+ "loss": 0.3273,
20
+ "step": 1000
21
  },
22
  {
23
+ "epoch": 3.02,
24
+ "eval_loss": 0.4225325584411621,
25
+ "eval_runtime": 1489.9228,
26
+ "eval_samples_per_second": 3.09,
27
+ "eval_steps_per_second": 0.193,
28
+ "eval_wer": 20.82533081951615,
29
+ "step": 1000
30
  },
31
  {
32
+ "epoch": 5.01,
33
  "learning_rate": 1e-05,
34
+ "loss": 0.1859,
35
+ "step": 1500
36
  },
37
  {
38
+ "epoch": 7.0,
39
  "learning_rate": 1e-05,
40
+ "loss": 0.0923,
41
+ "step": 2000
42
  },
43
  {
44
+ "epoch": 7.0,
45
+ "eval_loss": 0.46434369683265686,
46
+ "eval_runtime": 1504.5618,
47
+ "eval_samples_per_second": 3.06,
48
+ "eval_steps_per_second": 0.191,
49
+ "eval_wer": 21.22002361108664,
50
+ "step": 2000
51
  },
52
  {
53
+ "epoch": 8.03,
54
  "learning_rate": 1e-05,
55
+ "loss": 0.0369,
56
+ "step": 2500
57
  },
58
  {
59
+ "epoch": 10.02,
60
  "learning_rate": 1e-05,
61
+ "loss": 0.0164,
62
+ "step": 3000
63
  },
64
  {
65
+ "epoch": 10.02,
66
+ "eval_loss": 0.5403426885604858,
67
+ "eval_runtime": 1504.7378,
68
+ "eval_samples_per_second": 3.06,
69
+ "eval_steps_per_second": 0.191,
70
+ "eval_wer": 22.962662766725987,
71
+ "step": 3000
72
  },
73
  {
74
+ "epoch": 12.01,
75
  "learning_rate": 1e-05,
76
+ "loss": 0.0089,
77
+ "step": 3500
78
  },
79
  {
80
+ "epoch": 14.01,
81
  "learning_rate": 1e-05,
82
+ "loss": 0.006,
83
+ "step": 4000
84
  },
85
  {
86
+ "epoch": 14.01,
87
+ "eval_loss": 0.5819889307022095,
88
+ "eval_runtime": 1478.853,
89
+ "eval_samples_per_second": 3.113,
90
+ "eval_steps_per_second": 0.195,
91
+ "eval_wer": 21.086109985375224,
92
+ "step": 4000
93
  },
94
  {
95
+ "epoch": 16.0,
96
  "learning_rate": 1e-05,
97
+ "loss": 0.0057,
98
+ "step": 4500
99
  },
100
  {
101
+ "epoch": 17.02,
102
  "learning_rate": 1e-05,
103
+ "loss": 0.0046,
104
+ "step": 5000
105
  },
106
  {
107
+ "epoch": 17.02,
108
+ "eval_loss": 0.5851565003395081,
109
+ "eval_runtime": 1532.3442,
110
+ "eval_samples_per_second": 3.005,
111
+ "eval_steps_per_second": 0.188,
112
+ "eval_wer": 22.072841964301446,
113
+ "step": 5000
114
  },
115
  {
116
+ "epoch": 19.02,
117
  "learning_rate": 1e-05,
118
+ "loss": 0.0042,
119
+ "step": 5500
120
  },
121
  {
122
+ "epoch": 21.01,
123
  "learning_rate": 1e-05,
124
+ "loss": 0.0034,
125
+ "step": 6000
126
  },
127
  {
128
+ "epoch": 21.01,
129
+ "eval_loss": 0.6112661361694336,
130
+ "eval_runtime": 1497.7325,
131
+ "eval_samples_per_second": 3.074,
132
+ "eval_steps_per_second": 0.192,
133
+ "eval_wer": 21.6622909802125,
134
+ "step": 6000
135
  },
136
  {
137
+ "epoch": 23.0,
138
  "learning_rate": 1e-05,
139
+ "loss": 0.004,
140
+ "step": 6500
141
  },
142
  {
143
+ "epoch": 24.03,
144
  "learning_rate": 1e-05,
145
+ "loss": 0.0028,
146
+ "step": 7000
147
  },
148
  {
149
+ "epoch": 24.03,
150
+ "eval_loss": 0.6582160592079163,
151
+ "eval_runtime": 1517.5965,
152
+ "eval_samples_per_second": 3.034,
153
+ "eval_steps_per_second": 0.19,
154
+ "eval_wer": 22.326573044596763,
155
+ "step": 7000
 
 
 
156
  },
157
  {
158
+ "epoch": 26.02,
159
  "learning_rate": 1e-05,
160
+ "loss": 0.0027,
161
+ "step": 7500
162
  },
163
  {
164
+ "epoch": 28.01,
165
  "learning_rate": 1e-05,
166
+ "loss": 0.0025,
167
+ "step": 8000
168
  },
169
  {
170
+ "epoch": 28.01,
171
+ "eval_loss": 0.6349595785140991,
172
+ "eval_runtime": 1539.4689,
173
+ "eval_samples_per_second": 2.991,
174
+ "eval_steps_per_second": 0.187,
175
+ "eval_wer": 22.23318591087696,
176
+ "step": 8000
177
  },
178
  {
179
+ "epoch": 30.01,
180
  "learning_rate": 1e-05,
181
+ "loss": 0.0027,
182
+ "step": 8500
183
  },
184
  {
185
+ "epoch": 32.0,
186
  "learning_rate": 1e-05,
187
+ "loss": 0.0029,
188
+ "step": 9000
189
  },
190
  {
191
+ "epoch": 32.0,
192
+ "eval_loss": 0.646816074848175,
193
+ "eval_runtime": 1531.6588,
194
+ "eval_samples_per_second": 3.006,
195
+ "eval_steps_per_second": 0.188,
196
+ "eval_wer": 22.10984441351118,
197
+ "step": 9000
198
  },
199
  {
200
+ "epoch": 33.02,
201
  "learning_rate": 1e-05,
202
+ "loss": 0.0021,
203
+ "step": 9500
204
  },
205
  {
206
+ "epoch": 35.02,
207
  "learning_rate": 1e-05,
208
+ "loss": 0.0014,
209
+ "step": 10000
210
  },
211
  {
212
+ "epoch": 35.02,
213
+ "eval_loss": 0.6532349586486816,
214
+ "eval_runtime": 1526.2981,
215
+ "eval_samples_per_second": 3.016,
216
+ "eval_steps_per_second": 0.189,
217
+ "eval_wer": 21.991788980318223,
218
+ "step": 10000
 
 
 
 
 
 
219
  },
220
  {
221
+ "epoch": 35.02,
222
+ "step": 10000,
223
+ "total_flos": 2.07052565741568e+19,
224
+ "train_loss": 0.06371040197610855,
225
+ "train_runtime": 79589.3253,
226
+ "train_samples_per_second": 4.021,
227
+ "train_steps_per_second": 0.126
228
  }
229
  ],
230
+ "max_steps": 10000,
231
  "num_train_epochs": 9223372036854775807,
232
+ "total_flos": 2.07052565741568e+19,
233
  "trial_name": null,
234
  "trial_params": null
235
  }