josesantorcuato commited on
Commit
af9940f
1 Parent(s): fbc7c55

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.22972972972973,
3
- "eval_accuracy": 0.8903225806451613,
4
- "eval_loss": 0.4209001660346985,
5
- "eval_runtime": 15.6361,
6
- "eval_samples_per_second": 9.913,
7
- "eval_steps_per_second": 1.279
8
  }
 
1
  {
2
+ "epoch": 4.180952380952381,
3
+ "eval_accuracy": 0.8395061728395061,
4
+ "eval_loss": 0.38502195477485657,
5
+ "eval_runtime": 14.6842,
6
+ "eval_samples_per_second": 11.032,
7
+ "eval_steps_per_second": 1.43
8
  }
runs/Oct22_17-35-56_8e489873f98b/events.out.tfevents.1729619014.8e489873f98b.9878.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47a36eda906cf555f668758ce5203e214667baabdec9779c85890d4a76f8a2e1
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ba3f9ec4e6ba556fd82059e569fc67ea6478a89e2bdf76bd106842ec7e0cde
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.22972972972973,
3
- "eval_accuracy": 0.8903225806451613,
4
- "eval_loss": 0.4209001660346985,
5
- "eval_runtime": 15.6361,
6
- "eval_samples_per_second": 9.913,
7
- "eval_steps_per_second": 1.279
8
  }
 
1
  {
2
+ "epoch": 4.180952380952381,
3
+ "eval_accuracy": 0.8395061728395061,
4
+ "eval_loss": 0.38502195477485657,
5
+ "eval_runtime": 14.6842,
6
+ "eval_samples_per_second": 11.032,
7
+ "eval_steps_per_second": 1.43
8
  }
trainer_state.json CHANGED
@@ -1,177 +1,235 @@
1
  {
2
- "best_metric": 0.8857142857142857,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-148",
4
- "epoch": 3.22972972972973,
5
  "eval_steps": 500,
6
- "global_step": 148,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06756756756756757,
13
- "grad_norm": 6.64535665512085,
14
- "learning_rate": 3.3333333333333335e-05,
15
- "loss": 2.3547,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.13513513513513514,
20
- "grad_norm": 6.657472610473633,
21
- "learning_rate": 4.81203007518797e-05,
22
- "loss": 2.1896,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.20270270270270271,
27
- "grad_norm": 8.04188346862793,
28
- "learning_rate": 4.43609022556391e-05,
29
- "loss": 2.0919,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.25675675675675674,
34
- "eval_accuracy": 0.5285714285714286,
35
- "eval_loss": 1.8006081581115723,
36
- "eval_runtime": 7.0865,
37
- "eval_samples_per_second": 9.878,
38
- "eval_steps_per_second": 1.27,
39
- "step": 38
40
  },
41
  {
42
- "epoch": 1.0135135135135136,
43
- "grad_norm": 10.020313262939453,
44
- "learning_rate": 4.0601503759398494e-05,
45
- "loss": 1.9064,
46
- "step": 40
 
 
47
  },
48
  {
49
- "epoch": 1.0810810810810811,
50
- "grad_norm": 11.511276245117188,
51
- "learning_rate": 3.6842105263157895e-05,
52
- "loss": 1.5879,
53
  "step": 50
54
  },
55
  {
56
- "epoch": 1.1486486486486487,
57
- "grad_norm": 10.105522155761719,
58
- "learning_rate": 3.3082706766917295e-05,
59
- "loss": 1.2609,
60
  "step": 60
61
  },
62
  {
63
- "epoch": 1.2162162162162162,
64
- "grad_norm": 5.39172887802124,
65
- "learning_rate": 2.9323308270676693e-05,
66
- "loss": 0.9101,
67
  "step": 70
68
  },
69
  {
70
- "epoch": 1.2567567567567568,
71
- "eval_accuracy": 0.6857142857142857,
72
- "eval_loss": 0.9759382009506226,
73
- "eval_runtime": 6.7289,
74
- "eval_samples_per_second": 10.403,
75
- "eval_steps_per_second": 1.338,
76
- "step": 76
77
  },
78
  {
79
- "epoch": 2.027027027027027,
80
- "grad_norm": 6.035886764526367,
81
- "learning_rate": 2.556390977443609e-05,
82
- "loss": 0.9083,
83
- "step": 80
 
 
84
  },
85
  {
86
- "epoch": 2.0945945945945947,
87
- "grad_norm": 17.813278198242188,
88
- "learning_rate": 2.1804511278195487e-05,
89
- "loss": 0.6671,
90
  "step": 90
91
  },
92
  {
93
- "epoch": 2.1621621621621623,
94
- "grad_norm": 11.762858390808105,
95
- "learning_rate": 1.8045112781954888e-05,
96
- "loss": 0.5856,
97
  "step": 100
98
  },
99
  {
100
- "epoch": 2.22972972972973,
101
- "grad_norm": 4.943543434143066,
102
- "learning_rate": 1.4285714285714285e-05,
103
- "loss": 0.4855,
104
  "step": 110
105
  },
106
  {
107
- "epoch": 2.2567567567567566,
108
- "eval_accuracy": 0.8285714285714286,
109
- "eval_loss": 0.5223344564437866,
110
- "eval_runtime": 6.5279,
111
- "eval_samples_per_second": 10.723,
112
- "eval_steps_per_second": 1.379,
113
- "step": 114
114
  },
115
  {
116
- "epoch": 3.0405405405405403,
117
- "grad_norm": 3.349546194076538,
118
- "learning_rate": 1.0526315789473684e-05,
119
- "loss": 0.3393,
120
- "step": 120
 
 
121
  },
122
  {
123
- "epoch": 3.108108108108108,
124
- "grad_norm": 2.945603609085083,
125
- "learning_rate": 6.766917293233083e-06,
126
- "loss": 0.2937,
127
  "step": 130
128
  },
129
  {
130
- "epoch": 3.175675675675676,
131
- "grad_norm": 7.3917717933654785,
132
- "learning_rate": 3.007518796992481e-06,
133
- "loss": 0.2876,
134
  "step": 140
135
  },
136
  {
137
- "epoch": 3.22972972972973,
138
- "eval_accuracy": 0.8857142857142857,
139
- "eval_loss": 0.40336811542510986,
140
- "eval_runtime": 8.2936,
141
- "eval_samples_per_second": 8.44,
142
- "eval_steps_per_second": 1.085,
143
- "step": 148
144
- },
145
- {
146
- "epoch": 3.22972972972973,
147
- "step": 148,
148
- "total_flos": 1.460491890402263e+18,
149
- "train_loss": 1.086421162695498,
150
- "train_runtime": 248.8416,
151
- "train_samples_per_second": 4.758,
152
- "train_steps_per_second": 0.595
153
- },
154
- {
155
- "epoch": 3.22972972972973,
156
- "eval_accuracy": 0.8903225806451613,
157
- "eval_loss": 0.4209001958370209,
158
- "eval_runtime": 16.3349,
159
- "eval_samples_per_second": 9.489,
160
- "eval_steps_per_second": 1.224,
161
- "step": 148
162
- },
163
- {
164
- "epoch": 3.22972972972973,
165
- "eval_accuracy": 0.8903225806451613,
166
- "eval_loss": 0.4209001660346985,
167
- "eval_runtime": 15.6361,
168
- "eval_samples_per_second": 9.913,
169
- "eval_steps_per_second": 1.279,
170
- "step": 148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  }
172
  ],
173
  "logging_steps": 10,
174
- "max_steps": 148,
175
  "num_input_tokens_seen": 0,
176
  "num_train_epochs": 9223372036854775807,
177
  "save_steps": 500,
@@ -187,7 +245,7 @@
187
  "attributes": {}
188
  }
189
  },
190
- "total_flos": 1.460491890402263e+18,
191
  "train_batch_size": 8,
192
  "trial_name": null,
193
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9066666666666666,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-210",
4
+ "epoch": 4.180952380952381,
5
  "eval_steps": 500,
6
+ "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.047619047619047616,
13
+ "grad_norm": 7.849704265594482,
14
+ "learning_rate": 2.380952380952381e-05,
15
+ "loss": 2.3976,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.09523809523809523,
20
+ "grad_norm": 7.76956033706665,
21
+ "learning_rate": 4.761904761904762e-05,
22
+ "loss": 2.3357,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.14285714285714285,
27
+ "grad_norm": 15.013307571411133,
28
+ "learning_rate": 4.761904761904762e-05,
29
+ "loss": 2.282,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.19047619047619047,
34
+ "grad_norm": 7.262270450592041,
35
+ "learning_rate": 4.4973544973544974e-05,
36
+ "loss": 2.0368,
37
+ "step": 40
 
 
38
  },
39
  {
40
+ "epoch": 0.20476190476190476,
41
+ "eval_accuracy": 0.29333333333333333,
42
+ "eval_loss": 2.0273361206054688,
43
+ "eval_runtime": 7.264,
44
+ "eval_samples_per_second": 10.325,
45
+ "eval_steps_per_second": 1.377,
46
+ "step": 43
47
  },
48
  {
49
+ "epoch": 1.0333333333333334,
50
+ "grad_norm": 8.3132963180542,
51
+ "learning_rate": 4.232804232804233e-05,
52
+ "loss": 1.947,
53
  "step": 50
54
  },
55
  {
56
+ "epoch": 1.0809523809523809,
57
+ "grad_norm": 14.42203426361084,
58
+ "learning_rate": 3.968253968253968e-05,
59
+ "loss": 1.594,
60
  "step": 60
61
  },
62
  {
63
+ "epoch": 1.1285714285714286,
64
+ "grad_norm": 11.382922172546387,
65
+ "learning_rate": 3.7037037037037037e-05,
66
+ "loss": 1.2509,
67
  "step": 70
68
  },
69
  {
70
+ "epoch": 1.1761904761904762,
71
+ "grad_norm": 9.775425910949707,
72
+ "learning_rate": 3.439153439153439e-05,
73
+ "loss": 1.1679,
74
+ "step": 80
 
 
75
  },
76
  {
77
+ "epoch": 1.2047619047619047,
78
+ "eval_accuracy": 0.56,
79
+ "eval_loss": 1.0918534994125366,
80
+ "eval_runtime": 7.1505,
81
+ "eval_samples_per_second": 10.489,
82
+ "eval_steps_per_second": 1.398,
83
+ "step": 86
84
  },
85
  {
86
+ "epoch": 2.019047619047619,
87
+ "grad_norm": 16.352764129638672,
88
+ "learning_rate": 3.1746031746031745e-05,
89
+ "loss": 1.0361,
90
  "step": 90
91
  },
92
  {
93
+ "epoch": 2.066666666666667,
94
+ "grad_norm": 5.250826358795166,
95
+ "learning_rate": 2.91005291005291e-05,
96
+ "loss": 0.7379,
97
  "step": 100
98
  },
99
  {
100
+ "epoch": 2.1142857142857143,
101
+ "grad_norm": 11.715250968933105,
102
+ "learning_rate": 2.6455026455026456e-05,
103
+ "loss": 0.7289,
104
  "step": 110
105
  },
106
  {
107
+ "epoch": 2.1619047619047618,
108
+ "grad_norm": 16.230215072631836,
109
+ "learning_rate": 2.380952380952381e-05,
110
+ "loss": 0.5097,
111
+ "step": 120
 
 
112
  },
113
  {
114
+ "epoch": 2.204761904761905,
115
+ "eval_accuracy": 0.8266666666666667,
116
+ "eval_loss": 0.5804122090339661,
117
+ "eval_runtime": 6.6096,
118
+ "eval_samples_per_second": 11.347,
119
+ "eval_steps_per_second": 1.513,
120
+ "step": 129
121
  },
122
  {
123
+ "epoch": 3.0047619047619047,
124
+ "grad_norm": 18.1605224609375,
125
+ "learning_rate": 2.1164021164021164e-05,
126
+ "loss": 0.4609,
127
  "step": 130
128
  },
129
  {
130
+ "epoch": 3.052380952380952,
131
+ "grad_norm": 7.160711288452148,
132
+ "learning_rate": 1.8518518518518518e-05,
133
+ "loss": 0.3043,
134
  "step": 140
135
  },
136
  {
137
+ "epoch": 3.1,
138
+ "grad_norm": 14.182634353637695,
139
+ "learning_rate": 1.5873015873015872e-05,
140
+ "loss": 0.336,
141
+ "step": 150
142
+ },
143
+ {
144
+ "epoch": 3.1476190476190475,
145
+ "grad_norm": 2.916877031326294,
146
+ "learning_rate": 1.3227513227513228e-05,
147
+ "loss": 0.2992,
148
+ "step": 160
149
+ },
150
+ {
151
+ "epoch": 3.1952380952380954,
152
+ "grad_norm": 8.268482208251953,
153
+ "learning_rate": 1.0582010582010582e-05,
154
+ "loss": 0.2293,
155
+ "step": 170
156
+ },
157
+ {
158
+ "epoch": 3.204761904761905,
159
+ "eval_accuracy": 0.8266666666666667,
160
+ "eval_loss": 0.4854774475097656,
161
+ "eval_runtime": 6.5331,
162
+ "eval_samples_per_second": 11.48,
163
+ "eval_steps_per_second": 1.531,
164
+ "step": 172
165
+ },
166
+ {
167
+ "epoch": 4.038095238095238,
168
+ "grad_norm": 7.389148712158203,
169
+ "learning_rate": 7.936507936507936e-06,
170
+ "loss": 0.3101,
171
+ "step": 180
172
+ },
173
+ {
174
+ "epoch": 4.085714285714285,
175
+ "grad_norm": 4.5858259201049805,
176
+ "learning_rate": 5.291005291005291e-06,
177
+ "loss": 0.1873,
178
+ "step": 190
179
+ },
180
+ {
181
+ "epoch": 4.133333333333334,
182
+ "grad_norm": 2.300278425216675,
183
+ "learning_rate": 2.6455026455026455e-06,
184
+ "loss": 0.2008,
185
+ "step": 200
186
+ },
187
+ {
188
+ "epoch": 4.180952380952381,
189
+ "grad_norm": 2.982243537902832,
190
+ "learning_rate": 0.0,
191
+ "loss": 0.1882,
192
+ "step": 210
193
+ },
194
+ {
195
+ "epoch": 4.180952380952381,
196
+ "eval_accuracy": 0.9066666666666666,
197
+ "eval_loss": 0.3435481786727905,
198
+ "eval_runtime": 6.9489,
199
+ "eval_samples_per_second": 10.793,
200
+ "eval_steps_per_second": 1.439,
201
+ "step": 210
202
+ },
203
+ {
204
+ "epoch": 4.180952380952381,
205
+ "step": 210,
206
+ "total_flos": 2.0736178988651643e+18,
207
+ "train_loss": 0.9781298257055737,
208
+ "train_runtime": 354.3465,
209
+ "train_samples_per_second": 4.741,
210
+ "train_steps_per_second": 0.593
211
+ },
212
+ {
213
+ "epoch": 4.180952380952381,
214
+ "eval_accuracy": 0.8395061728395061,
215
+ "eval_loss": 0.3850219249725342,
216
+ "eval_runtime": 60.1785,
217
+ "eval_samples_per_second": 2.692,
218
+ "eval_steps_per_second": 0.349,
219
+ "step": 210
220
+ },
221
+ {
222
+ "epoch": 4.180952380952381,
223
+ "eval_accuracy": 0.8395061728395061,
224
+ "eval_loss": 0.38502195477485657,
225
+ "eval_runtime": 14.6842,
226
+ "eval_samples_per_second": 11.032,
227
+ "eval_steps_per_second": 1.43,
228
+ "step": 210
229
  }
230
  ],
231
  "logging_steps": 10,
232
+ "max_steps": 210,
233
  "num_input_tokens_seen": 0,
234
  "num_train_epochs": 9223372036854775807,
235
  "save_steps": 500,
 
245
  "attributes": {}
246
  }
247
  },
248
+ "total_flos": 2.0736178988651643e+18,
249
  "train_batch_size": 8,
250
  "trial_name": null,
251
  "trial_params": null