alexgrigore commited on
Commit
ad35605
1 Parent(s): 9662f4e

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. test_results.json +5 -5
  3. trainer_state.json +153 -233
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.242021276595745,
3
  "eval_accuracy": 0.7633136094674556,
4
- "eval_loss": 0.875207781791687,
5
- "eval_runtime": 11.2012,
6
- "eval_samples_per_second": 15.088,
7
- "eval_steps_per_second": 1.964
8
  }
 
1
  {
2
+ "epoch": 4.125,
3
  "eval_accuracy": 0.7633136094674556,
4
+ "eval_loss": 0.8172227144241333,
5
+ "eval_runtime": 11.0103,
6
+ "eval_samples_per_second": 15.349,
7
+ "eval_steps_per_second": 1.998
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.242021276595745,
3
  "eval_accuracy": 0.7633136094674556,
4
- "eval_loss": 0.875207781791687,
5
- "eval_runtime": 11.2012,
6
- "eval_samples_per_second": 15.088,
7
- "eval_steps_per_second": 1.964
8
  }
 
1
  {
2
+ "epoch": 4.125,
3
  "eval_accuracy": 0.7633136094674556,
4
+ "eval_loss": 0.8172227144241333,
5
+ "eval_runtime": 11.0103,
6
+ "eval_samples_per_second": 15.349,
7
+ "eval_steps_per_second": 1.998
8
  }
trainer_state.json CHANGED
@@ -1,334 +1,245 @@
1
  {
2
  "best_metric": 0.7875,
3
  "best_model_checkpoint": "videomae-base-finetuned-gesturePhasev2/checkpoint-95",
4
- "epoch": 3.242021276595745,
5
  "eval_steps": 500,
6
- "global_step": 376,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.026595744680851064,
13
- "grad_norm": 13.211409568786621,
14
- "learning_rate": 2.631578947368421e-06,
15
- "loss": 1.594,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.05319148936170213,
20
- "grad_norm": 9.70760726928711,
21
- "learning_rate": 5.263157894736842e-06,
22
- "loss": 1.4073,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.0797872340425532,
27
- "grad_norm": 5.945734977722168,
28
- "learning_rate": 7.894736842105265e-06,
29
- "loss": 1.1347,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.10638297872340426,
34
- "grad_norm": 8.433605194091797,
35
- "learning_rate": 9.940828402366864e-06,
36
- "loss": 0.8755,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.13297872340425532,
41
- "grad_norm": 5.273907661437988,
42
- "learning_rate": 9.644970414201184e-06,
43
- "loss": 0.6064,
 
 
 
 
 
 
 
 
 
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.1595744680851064,
48
- "grad_norm": 6.961372375488281,
49
- "learning_rate": 9.349112426035503e-06,
50
- "loss": 0.9865,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.18617021276595744,
55
- "grad_norm": 6.787417411804199,
56
- "learning_rate": 9.053254437869822e-06,
57
- "loss": 0.7214,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 0.2127659574468085,
62
- "grad_norm": 4.117732048034668,
63
- "learning_rate": 8.757396449704143e-06,
64
- "loss": 0.5468,
65
  "step": 80
66
  },
67
  {
68
- "epoch": 0.2393617021276596,
69
- "grad_norm": 5.041596412658691,
70
- "learning_rate": 8.461538461538462e-06,
71
- "loss": 0.806,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 0.2526595744680851,
76
  "eval_accuracy": 0.7875,
77
- "eval_loss": 0.8249056935310364,
78
- "eval_runtime": 22.7472,
79
- "eval_samples_per_second": 7.034,
80
- "eval_steps_per_second": 0.879,
81
  "step": 95
82
  },
83
  {
84
- "epoch": 1.0132978723404256,
85
- "grad_norm": 3.2331671714782715,
86
- "learning_rate": 8.165680473372781e-06,
87
- "loss": 0.6714,
88
  "step": 100
89
  },
90
  {
91
- "epoch": 1.0398936170212767,
92
- "grad_norm": 5.518906116485596,
93
- "learning_rate": 7.869822485207102e-06,
94
- "loss": 0.7709,
95
  "step": 110
96
  },
97
  {
98
- "epoch": 1.0664893617021276,
99
- "grad_norm": 4.540952205657959,
100
- "learning_rate": 7.573964497041421e-06,
101
- "loss": 0.7454,
102
  "step": 120
103
  },
104
  {
105
- "epoch": 1.0930851063829787,
106
- "grad_norm": 5.326303482055664,
107
- "learning_rate": 7.278106508875741e-06,
108
- "loss": 0.6977,
109
  "step": 130
110
  },
111
  {
112
- "epoch": 1.1196808510638299,
113
- "grad_norm": 9.28279972076416,
114
- "learning_rate": 6.98224852071006e-06,
115
- "loss": 0.6382,
116
  "step": 140
117
  },
118
  {
119
- "epoch": 1.1462765957446808,
120
- "grad_norm": 3.8069820404052734,
121
- "learning_rate": 6.686390532544379e-06,
122
- "loss": 0.7963,
 
 
 
 
 
 
 
 
 
123
  "step": 150
124
  },
125
  {
126
- "epoch": 1.172872340425532,
127
- "grad_norm": 3.917985677719116,
128
- "learning_rate": 6.3905325443786995e-06,
129
- "loss": 0.6782,
130
  "step": 160
131
  },
132
  {
133
- "epoch": 1.199468085106383,
134
- "grad_norm": 5.1802802085876465,
135
- "learning_rate": 6.0946745562130185e-06,
136
- "loss": 0.6922,
137
  "step": 170
138
  },
139
  {
140
- "epoch": 1.226063829787234,
141
- "grad_norm": 2.9730751514434814,
142
- "learning_rate": 5.7988165680473375e-06,
143
- "loss": 0.7682,
144
  "step": 180
145
  },
146
  {
147
- "epoch": 1.252659574468085,
148
- "grad_norm": 6.959865570068359,
149
- "learning_rate": 5.502958579881657e-06,
150
- "loss": 0.8453,
151
  "step": 190
152
  },
153
  {
154
- "epoch": 1.252659574468085,
155
  "eval_accuracy": 0.7875,
156
- "eval_loss": 0.8071285486221313,
157
- "eval_runtime": 10.9141,
158
- "eval_samples_per_second": 14.66,
159
- "eval_steps_per_second": 1.832,
160
  "step": 190
161
  },
162
  {
163
- "epoch": 2.026595744680851,
164
- "grad_norm": 3.4485950469970703,
165
- "learning_rate": 5.207100591715976e-06,
166
- "loss": 0.7081,
167
  "step": 200
168
  },
169
  {
170
- "epoch": 2.0531914893617023,
171
- "grad_norm": 3.514798164367676,
172
- "learning_rate": 4.911242603550296e-06,
173
- "loss": 0.6794,
174
  "step": 210
175
  },
176
  {
177
- "epoch": 2.0797872340425534,
178
- "grad_norm": 4.564563751220703,
179
- "learning_rate": 4.615384615384616e-06,
180
- "loss": 0.8428,
181
  "step": 220
182
  },
183
  {
184
- "epoch": 2.106382978723404,
185
- "grad_norm": 5.189121723175049,
186
- "learning_rate": 4.319526627218935e-06,
187
- "loss": 0.8315,
188
  "step": 230
189
  },
190
  {
191
- "epoch": 2.132978723404255,
192
- "grad_norm": 6.5788044929504395,
193
- "learning_rate": 4.023668639053255e-06,
194
- "loss": 0.6639,
195
- "step": 240
196
- },
197
- {
198
- "epoch": 2.1595744680851063,
199
- "grad_norm": 6.100628852844238,
200
- "learning_rate": 3.7278106508875745e-06,
201
- "loss": 0.7014,
202
- "step": 250
203
- },
204
- {
205
- "epoch": 2.1861702127659575,
206
- "grad_norm": 5.754176616668701,
207
- "learning_rate": 3.4319526627218935e-06,
208
- "loss": 0.7219,
209
- "step": 260
210
- },
211
- {
212
- "epoch": 2.2127659574468086,
213
- "grad_norm": 6.1187424659729,
214
- "learning_rate": 3.1360946745562134e-06,
215
- "loss": 0.5346,
216
- "step": 270
217
- },
218
- {
219
- "epoch": 2.2393617021276597,
220
- "grad_norm": 4.662169933319092,
221
- "learning_rate": 2.840236686390533e-06,
222
- "loss": 0.8814,
223
- "step": 280
224
- },
225
- {
226
- "epoch": 2.252659574468085,
227
  "eval_accuracy": 0.7875,
228
- "eval_loss": 0.8307636380195618,
229
- "eval_runtime": 10.9219,
230
- "eval_samples_per_second": 14.649,
231
- "eval_steps_per_second": 1.831,
232
- "step": 285
233
- },
234
- {
235
- "epoch": 3.0132978723404253,
236
- "grad_norm": 5.648519992828369,
237
- "learning_rate": 2.5443786982248527e-06,
238
- "loss": 0.601,
239
- "step": 290
240
- },
241
- {
242
- "epoch": 3.0398936170212765,
243
- "grad_norm": 3.8109617233276367,
244
- "learning_rate": 2.2485207100591717e-06,
245
- "loss": 0.618,
246
- "step": 300
247
  },
248
  {
249
- "epoch": 3.0664893617021276,
250
- "grad_norm": 4.005825519561768,
251
- "learning_rate": 1.952662721893491e-06,
252
- "loss": 0.6362,
253
- "step": 310
 
 
254
  },
255
  {
256
- "epoch": 3.0930851063829787,
257
- "grad_norm": 6.642380237579346,
258
- "learning_rate": 1.656804733727811e-06,
259
- "loss": 0.8369,
260
- "step": 320
261
- },
262
- {
263
- "epoch": 3.11968085106383,
264
- "grad_norm": 4.232272624969482,
265
- "learning_rate": 1.3609467455621303e-06,
266
- "loss": 0.7755,
267
- "step": 330
268
- },
269
- {
270
- "epoch": 3.146276595744681,
271
- "grad_norm": 3.2232885360717773,
272
- "learning_rate": 1.06508875739645e-06,
273
- "loss": 0.8827,
274
- "step": 340
275
- },
276
- {
277
- "epoch": 3.172872340425532,
278
- "grad_norm": 5.880322456359863,
279
- "learning_rate": 7.692307692307694e-07,
280
- "loss": 0.6081,
281
- "step": 350
282
- },
283
- {
284
- "epoch": 3.199468085106383,
285
- "grad_norm": 6.9393310546875,
286
- "learning_rate": 4.733727810650888e-07,
287
- "loss": 0.8163,
288
- "step": 360
289
- },
290
- {
291
- "epoch": 3.226063829787234,
292
- "grad_norm": 6.549450874328613,
293
- "learning_rate": 1.775147928994083e-07,
294
- "loss": 0.8272,
295
- "step": 370
296
- },
297
- {
298
- "epoch": 3.242021276595745,
299
- "eval_accuracy": 0.7875,
300
- "eval_loss": 0.8259578943252563,
301
- "eval_runtime": 84.7179,
302
- "eval_samples_per_second": 1.889,
303
- "eval_steps_per_second": 0.236,
304
- "step": 376
305
- },
306
- {
307
- "epoch": 3.242021276595745,
308
- "step": 376,
309
- "total_flos": 3.7220613152994755e+18,
310
- "train_loss": 0.7830839626332546,
311
- "train_runtime": 814.9801,
312
- "train_samples_per_second": 3.691,
313
- "train_steps_per_second": 0.461
314
- },
315
- {
316
- "epoch": 3.242021276595745,
317
  "eval_accuracy": 0.7633136094674556,
318
- "eval_loss": 0.8752076625823975,
319
- "eval_runtime": 14.0179,
320
- "eval_samples_per_second": 12.056,
321
- "eval_steps_per_second": 1.569,
322
- "step": 376
323
  },
324
  {
325
- "epoch": 3.242021276595745,
326
  "eval_accuracy": 0.7633136094674556,
327
- "eval_loss": 0.875207781791687,
328
- "eval_runtime": 11.2012,
329
- "eval_samples_per_second": 15.088,
330
- "eval_steps_per_second": 1.964,
331
- "step": 376
332
  }
333
  ],
334
  "logging_steps": 10,
@@ -337,6 +248,15 @@
337
  "num_train_epochs": 9223372036854775807,
338
  "save_steps": 500,
339
  "stateful_callbacks": {
 
 
 
 
 
 
 
 
 
340
  "TrainerControl": {
341
  "args": {
342
  "should_epoch_stop": false,
@@ -348,7 +268,7 @@
348
  "attributes": {}
349
  }
350
  },
351
- "total_flos": 3.7220613152994755e+18,
352
  "train_batch_size": 8,
353
  "trial_name": null,
354
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7875,
3
  "best_model_checkpoint": "videomae-base-finetuned-gesturePhasev2/checkpoint-95",
4
+ "epoch": 4.125,
5
  "eval_steps": 500,
6
+ "global_step": 237,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.026595744680851064,
13
+ "grad_norm": 9.288494110107422,
14
+ "learning_rate": 6.578947368421053e-07,
15
+ "loss": 1.4443,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.05319148936170213,
20
+ "grad_norm": 10.334871292114258,
21
+ "learning_rate": 1.3157894736842106e-06,
22
+ "loss": 1.418,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.0797872340425532,
27
+ "grad_norm": 7.731433391571045,
28
+ "learning_rate": 1.973684210526316e-06,
29
+ "loss": 1.3362,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.10638297872340426,
34
+ "grad_norm": 10.242834091186523,
35
+ "learning_rate": 2.631578947368421e-06,
36
+ "loss": 1.1665,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.125,
41
+ "eval_accuracy": 0.775,
42
+ "eval_loss": 1.0642327070236206,
43
+ "eval_runtime": 15.1556,
44
+ "eval_samples_per_second": 10.557,
45
+ "eval_steps_per_second": 1.32,
46
+ "step": 47
47
+ },
48
+ {
49
+ "epoch": 1.0066489361702127,
50
+ "grad_norm": 5.333619594573975,
51
+ "learning_rate": 3.289473684210527e-06,
52
+ "loss": 1.0034,
53
  "step": 50
54
  },
55
  {
56
+ "epoch": 1.0332446808510638,
57
+ "grad_norm": 4.666859149932861,
58
+ "learning_rate": 3.947368421052632e-06,
59
+ "loss": 0.8562,
60
  "step": 60
61
  },
62
  {
63
+ "epoch": 1.059840425531915,
64
+ "grad_norm": 4.381842136383057,
65
+ "learning_rate": 4.605263157894737e-06,
66
+ "loss": 0.7196,
67
  "step": 70
68
  },
69
  {
70
+ "epoch": 1.086436170212766,
71
+ "grad_norm": 4.5117340087890625,
72
+ "learning_rate": 4.997807075247147e-06,
73
+ "loss": 0.7522,
74
  "step": 80
75
  },
76
  {
77
+ "epoch": 1.113031914893617,
78
+ "grad_norm": 3.7782816886901855,
79
+ "learning_rate": 4.973180832407471e-06,
80
+ "loss": 0.7316,
81
  "step": 90
82
  },
83
  {
84
+ "epoch": 1.1263297872340425,
85
  "eval_accuracy": 0.7875,
86
+ "eval_loss": 0.7825992703437805,
87
+ "eval_runtime": 11.6767,
88
+ "eval_samples_per_second": 13.703,
89
+ "eval_steps_per_second": 1.713,
90
  "step": 95
91
  },
92
  {
93
+ "epoch": 2.0132978723404253,
94
+ "grad_norm": 3.1058461666107178,
95
+ "learning_rate": 4.921457902821578e-06,
96
+ "loss": 0.785,
97
  "step": 100
98
  },
99
  {
100
+ "epoch": 2.0398936170212765,
101
+ "grad_norm": 3.0555734634399414,
102
+ "learning_rate": 4.84320497372973e-06,
103
+ "loss": 0.7795,
104
  "step": 110
105
  },
106
  {
107
+ "epoch": 2.0664893617021276,
108
+ "grad_norm": 3.8933866024017334,
109
+ "learning_rate": 4.7392794005985324e-06,
110
+ "loss": 0.774,
111
  "step": 120
112
  },
113
  {
114
+ "epoch": 2.0930851063829787,
115
+ "grad_norm": 4.3550591468811035,
116
+ "learning_rate": 4.610819813755038e-06,
117
+ "loss": 0.7218,
118
  "step": 130
119
  },
120
  {
121
+ "epoch": 2.11968085106383,
122
+ "grad_norm": 4.537750720977783,
123
+ "learning_rate": 4.4592336433146e-06,
124
+ "loss": 0.7259,
125
  "step": 140
126
  },
127
  {
128
+ "epoch": 2.125,
129
+ "eval_accuracy": 0.7875,
130
+ "eval_loss": 0.8042387962341309,
131
+ "eval_runtime": 10.5863,
132
+ "eval_samples_per_second": 15.114,
133
+ "eval_steps_per_second": 1.889,
134
+ "step": 142
135
+ },
136
+ {
137
+ "epoch": 3.0199468085106385,
138
+ "grad_norm": 3.3319995403289795,
139
+ "learning_rate": 4.286181699082008e-06,
140
+ "loss": 0.6222,
141
  "step": 150
142
  },
143
  {
144
+ "epoch": 3.046542553191489,
145
+ "grad_norm": 4.209911823272705,
146
+ "learning_rate": 4.093559974371725e-06,
147
+ "loss": 0.7533,
148
  "step": 160
149
  },
150
  {
151
+ "epoch": 3.0731382978723403,
152
+ "grad_norm": 2.9892210960388184,
153
+ "learning_rate": 3.88347887310836e-06,
154
+ "loss": 0.8408,
155
  "step": 170
156
  },
157
  {
158
+ "epoch": 3.0997340425531914,
159
+ "grad_norm": 4.7717814445495605,
160
+ "learning_rate": 3.658240087799655e-06,
161
+ "loss": 0.7287,
162
  "step": 180
163
  },
164
  {
165
+ "epoch": 3.1263297872340425,
166
+ "grad_norm": 5.496540546417236,
167
+ "learning_rate": 3.4203113817116955e-06,
168
+ "loss": 0.6643,
169
  "step": 190
170
  },
171
  {
172
+ "epoch": 3.1263297872340425,
173
  "eval_accuracy": 0.7875,
174
+ "eval_loss": 0.8022773861885071,
175
+ "eval_runtime": 10.4825,
176
+ "eval_samples_per_second": 15.264,
177
+ "eval_steps_per_second": 1.908,
178
  "step": 190
179
  },
180
  {
181
+ "epoch": 4.026595744680851,
182
+ "grad_norm": 3.0813848972320557,
183
+ "learning_rate": 3.1722995515381644e-06,
184
+ "loss": 0.6386,
185
  "step": 200
186
  },
187
  {
188
+ "epoch": 4.053191489361702,
189
+ "grad_norm": 3.0726428031921387,
190
+ "learning_rate": 2.9169218667902562e-06,
191
+ "loss": 0.6708,
192
  "step": 210
193
  },
194
  {
195
+ "epoch": 4.079787234042553,
196
+ "grad_norm": 5.47495174407959,
197
+ "learning_rate": 2.6569762988232838e-06,
198
+ "loss": 0.8216,
199
  "step": 220
200
  },
201
  {
202
+ "epoch": 4.1063829787234045,
203
+ "grad_norm": 3.801624059677124,
204
+ "learning_rate": 2.3953108656770018e-06,
205
+ "loss": 0.761,
206
  "step": 230
207
  },
208
  {
209
+ "epoch": 4.125,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  "eval_accuracy": 0.7875,
211
+ "eval_loss": 0.8077355623245239,
212
+ "eval_runtime": 16.5222,
213
+ "eval_samples_per_second": 9.684,
214
+ "eval_steps_per_second": 1.21,
215
+ "step": 237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  },
217
  {
218
+ "epoch": 4.125,
219
+ "step": 237,
220
+ "total_flos": 4.691516857081528e+18,
221
+ "train_loss": 0.8600665486814604,
222
+ "train_runtime": 867.0351,
223
+ "train_samples_per_second": 6.939,
224
+ "train_steps_per_second": 0.434
225
  },
226
  {
227
+ "epoch": 4.125,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  "eval_accuracy": 0.7633136094674556,
229
+ "eval_loss": 0.8172227144241333,
230
+ "eval_runtime": 60.5871,
231
+ "eval_samples_per_second": 2.789,
232
+ "eval_steps_per_second": 0.363,
233
+ "step": 237
234
  },
235
  {
236
+ "epoch": 4.125,
237
  "eval_accuracy": 0.7633136094674556,
238
+ "eval_loss": 0.8172227144241333,
239
+ "eval_runtime": 11.0103,
240
+ "eval_samples_per_second": 15.349,
241
+ "eval_steps_per_second": 1.998,
242
+ "step": 237
243
  }
244
  ],
245
  "logging_steps": 10,
 
248
  "num_train_epochs": 9223372036854775807,
249
  "save_steps": 500,
250
  "stateful_callbacks": {
251
+ "EarlyStoppingCallback": {
252
+ "args": {
253
+ "early_stopping_patience": 3,
254
+ "early_stopping_threshold": 0.0
255
+ },
256
+ "attributes": {
257
+ "early_stopping_patience_counter": 0
258
+ }
259
+ },
260
  "TrainerControl": {
261
  "args": {
262
  "should_epoch_stop": false,
 
268
  "attributes": {}
269
  }
270
  },
271
+ "total_flos": 4.691516857081528e+18,
272
  "train_batch_size": 8,
273
  "trial_name": null,
274
  "trial_params": null