paul commited on
Commit
f2489e4
1 Parent(s): d9ae9f5

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.89,
3
- "total_flos": 1.3244442196920238e+18,
4
- "train_loss": 0.9847671677668889,
5
- "train_runtime": 282.6595,
6
- "train_samples_per_second": 61.134,
7
- "train_steps_per_second": 0.212
8
  }
 
1
  {
2
+ "epoch": 19.89,
3
+ "total_flos": 2.6637698351109243e+18,
4
+ "train_loss": 0.012146842836712798,
5
+ "train_runtime": 613.8258,
6
+ "train_samples_per_second": 56.303,
7
+ "train_steps_per_second": 0.195
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:034114a0efe89d79a82c07eb97fde02ef1dfd83832450c7003a93a1da84dfaf7
3
  size 343334641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74cb5e8f5d78070c153615d7560f7c2a8e140588589df74270cb3f3e6b92973e
3
  size 343334641
runs/Jan18_23-37-50_teesta/events.out.tfevents.1674065278.teesta.15918.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f4b076c59870f2500a2565d5f933453ba8b162363cbc5ab6f96f9ad64e25867
3
- size 5541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07c2b0b551aff8ab1e371ef374baa7d4db26cda47fcaa404328a2e93be2f7ef
3
+ size 16534
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.89,
3
- "total_flos": 1.3244442196920238e+18,
4
- "train_loss": 0.9847671677668889,
5
- "train_runtime": 282.6595,
6
- "train_samples_per_second": 61.134,
7
- "train_steps_per_second": 0.212
8
  }
 
1
  {
2
+ "epoch": 19.89,
3
+ "total_flos": 2.6637698351109243e+18,
4
+ "train_loss": 0.012146842836712798,
5
+ "train_runtime": 613.8258,
6
+ "train_samples_per_second": 56.303,
7
+ "train_steps_per_second": 0.195
8
  }
trainer_state.json CHANGED
@@ -1,181 +1,337 @@
1
  {
2
- "best_metric": 0.875,
3
- "best_model_checkpoint": "google-vit-base-patch16-224-cartoon-face-recognition/checkpoint-48",
4
- "epoch": 9.88888888888889,
5
- "global_step": 60,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.89,
12
- "eval_accuracy": 0.24074074074074073,
13
- "eval_f1": 0.1643869480887188,
14
- "eval_loss": 2.792280435562134,
15
- "eval_precision": 0.20461543357832349,
16
- "eval_recall": 0.24074074074074073,
17
- "eval_runtime": 7.7909,
18
- "eval_samples_per_second": 55.449,
19
- "eval_steps_per_second": 0.898,
20
  "step": 6
21
  },
22
  {
23
  "epoch": 1.59,
24
- "learning_rate": 0.00011111111111111112,
25
- "loss": 3.0606,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 1.89,
30
- "eval_accuracy": 0.5,
31
- "eval_f1": 0.4341708558476282,
32
- "eval_loss": 2.0185298919677734,
33
- "eval_precision": 0.50652873651949,
34
- "eval_recall": 0.5,
35
- "eval_runtime": 6.5876,
36
- "eval_samples_per_second": 65.578,
37
- "eval_steps_per_second": 1.063,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 2.89,
42
- "eval_accuracy": 0.6620370370370371,
43
- "eval_f1": 0.6279977278642492,
44
- "eval_loss": 1.4164462089538574,
45
- "eval_precision": 0.6524558387748216,
46
- "eval_recall": 0.6620370370370371,
47
- "eval_runtime": 6.5774,
48
- "eval_samples_per_second": 65.679,
49
- "eval_steps_per_second": 1.064,
50
  "step": 18
51
  },
52
  {
53
  "epoch": 3.3,
54
- "learning_rate": 8.888888888888888e-05,
55
- "loss": 1.688,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 3.89,
60
- "eval_accuracy": 0.7685185185185185,
61
- "eval_f1": 0.7520520641207192,
62
- "eval_loss": 1.0344423055648804,
63
- "eval_precision": 0.7687545261419606,
64
- "eval_recall": 0.7685185185185185,
65
- "eval_runtime": 6.6855,
66
- "eval_samples_per_second": 64.618,
67
- "eval_steps_per_second": 1.047,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 4.89,
72
- "learning_rate": 6.666666666666667e-05,
73
- "loss": 0.6694,
74
  "step": 30
75
  },
76
  {
77
  "epoch": 4.89,
78
- "eval_accuracy": 0.8310185185185185,
79
- "eval_f1": 0.8204789843927855,
80
- "eval_loss": 0.8109782338142395,
81
- "eval_precision": 0.8419593068227221,
82
- "eval_recall": 0.8310185185185185,
83
- "eval_runtime": 6.4692,
84
- "eval_samples_per_second": 66.778,
85
- "eval_steps_per_second": 1.082,
86
  "step": 30
87
  },
88
  {
89
  "epoch": 5.89,
90
- "eval_accuracy": 0.8495370370370371,
91
- "eval_f1": 0.8431889196512385,
92
- "eval_loss": 0.680189847946167,
93
- "eval_precision": 0.8577619498091635,
94
- "eval_recall": 0.8495370370370371,
95
- "eval_runtime": 6.5265,
96
- "eval_samples_per_second": 66.192,
97
- "eval_steps_per_second": 1.073,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 6.59,
102
- "learning_rate": 4.444444444444444e-05,
103
- "loss": 0.285,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 6.89,
108
- "eval_accuracy": 0.8495370370370371,
109
- "eval_f1": 0.8436215206983917,
110
- "eval_loss": 0.6132100224494934,
111
- "eval_precision": 0.8540014281192293,
112
- "eval_recall": 0.8495370370370371,
113
- "eval_runtime": 6.6157,
114
- "eval_samples_per_second": 65.299,
115
- "eval_steps_per_second": 1.058,
116
  "step": 42
117
  },
118
  {
119
  "epoch": 7.89,
120
- "eval_accuracy": 0.875,
121
- "eval_f1": 0.8707588213347532,
122
- "eval_loss": 0.5606003403663635,
123
- "eval_precision": 0.8799090872795438,
124
- "eval_recall": 0.875,
125
- "eval_runtime": 6.6491,
126
- "eval_samples_per_second": 64.971,
127
- "eval_steps_per_second": 1.053,
128
  "step": 48
129
  },
130
  {
131
  "epoch": 8.3,
132
- "learning_rate": 2.222222222222222e-05,
133
- "loss": 0.1277,
134
  "step": 50
135
  },
136
  {
137
  "epoch": 8.89,
138
- "eval_accuracy": 0.8680555555555556,
139
- "eval_f1": 0.864629496735875,
140
- "eval_loss": 0.5443260669708252,
141
- "eval_precision": 0.8753500264010703,
142
- "eval_recall": 0.8680555555555556,
143
- "eval_runtime": 6.7428,
144
- "eval_samples_per_second": 64.068,
145
- "eval_steps_per_second": 1.038,
146
  "step": 54
147
  },
148
  {
149
  "epoch": 9.89,
150
- "learning_rate": 0.0,
151
- "loss": 0.0779,
152
  "step": 60
153
  },
154
  {
155
  "epoch": 9.89,
156
- "eval_accuracy": 0.8611111111111112,
157
- "eval_f1": 0.8553095447735699,
158
- "eval_loss": 0.5382117033004761,
159
- "eval_precision": 0.8637558340439363,
160
- "eval_recall": 0.8611111111111112,
161
- "eval_runtime": 6.6834,
162
- "eval_samples_per_second": 64.638,
163
- "eval_steps_per_second": 1.047,
164
  "step": 60
165
  },
166
  {
167
- "epoch": 9.89,
168
- "step": 60,
169
- "total_flos": 1.3244442196920238e+18,
170
- "train_loss": 0.9847671677668889,
171
- "train_runtime": 282.6595,
172
- "train_samples_per_second": 61.134,
173
- "train_steps_per_second": 0.212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
- "max_steps": 60,
177
- "num_train_epochs": 10,
178
- "total_flos": 1.3244442196920238e+18,
179
  "trial_name": null,
180
  "trial_params": null
181
  }
 
1
  {
2
+ "best_metric": 0.9050925925925926,
3
+ "best_model_checkpoint": "google-vit-base-patch16-224-cartoon-face-recognition/checkpoint-84",
4
+ "epoch": 19.88888888888889,
5
+ "global_step": 120,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.89,
12
+ "eval_accuracy": 0.8611111111111112,
13
+ "eval_f1": 0.8577219426933996,
14
+ "eval_loss": 0.5458505749702454,
15
+ "eval_precision": 0.8682746241112194,
16
+ "eval_recall": 0.8611111111111112,
17
+ "eval_runtime": 7.819,
18
+ "eval_samples_per_second": 55.25,
19
+ "eval_steps_per_second": 0.895,
20
  "step": 6
21
  },
22
  {
23
  "epoch": 1.59,
24
+ "learning_rate": 0.0001,
25
+ "loss": 0.0812,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 1.89,
30
+ "eval_accuracy": 0.8796296296296297,
31
+ "eval_f1": 0.8763585304815898,
32
+ "eval_loss": 0.47026392817497253,
33
+ "eval_precision": 0.8832897696668488,
34
+ "eval_recall": 0.8796296296296297,
35
+ "eval_runtime": 6.6723,
36
+ "eval_samples_per_second": 64.746,
37
+ "eval_steps_per_second": 1.049,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 2.89,
42
+ "eval_accuracy": 0.8935185185185185,
43
+ "eval_f1": 0.890600542893898,
44
+ "eval_loss": 0.44299086928367615,
45
+ "eval_precision": 0.896940900151719,
46
+ "eval_recall": 0.8935185185185185,
47
+ "eval_runtime": 7.4266,
48
+ "eval_samples_per_second": 58.169,
49
+ "eval_steps_per_second": 0.943,
50
  "step": 18
51
  },
52
  {
53
  "epoch": 3.3,
54
+ "learning_rate": 0.00011111111111111112,
55
+ "loss": 0.0307,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 3.89,
60
+ "eval_accuracy": 0.8819444444444444,
61
+ "eval_f1": 0.876747873541574,
62
+ "eval_loss": 0.40446680784225464,
63
+ "eval_precision": 0.8848741291373776,
64
+ "eval_recall": 0.8819444444444444,
65
+ "eval_runtime": 6.4316,
66
+ "eval_samples_per_second": 67.168,
67
+ "eval_steps_per_second": 1.088,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 4.89,
72
+ "learning_rate": 0.0001,
73
+ "loss": 0.0091,
74
  "step": 30
75
  },
76
  {
77
  "epoch": 4.89,
78
+ "eval_accuracy": 0.9004629629629629,
79
+ "eval_f1": 0.8979540324742182,
80
+ "eval_loss": 0.36718857288360596,
81
+ "eval_precision": 0.9025419548328308,
82
+ "eval_recall": 0.9004629629629629,
83
+ "eval_runtime": 6.6517,
84
+ "eval_samples_per_second": 64.946,
85
+ "eval_steps_per_second": 1.052,
86
  "step": 30
87
  },
88
  {
89
  "epoch": 5.89,
90
+ "eval_accuracy": 0.9027777777777778,
91
+ "eval_f1": 0.9010548586749486,
92
+ "eval_loss": 0.3840581178665161,
93
+ "eval_precision": 0.9124659232717037,
94
+ "eval_recall": 0.9027777777777778,
95
+ "eval_runtime": 6.6486,
96
+ "eval_samples_per_second": 64.976,
97
+ "eval_steps_per_second": 1.053,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 6.59,
102
+ "learning_rate": 8.888888888888888e-05,
103
+ "loss": 0.0043,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 6.89,
108
+ "eval_accuracy": 0.9004629629629629,
109
+ "eval_f1": 0.8971983578645838,
110
+ "eval_loss": 0.39255601167678833,
111
+ "eval_precision": 0.9073430487686229,
112
+ "eval_recall": 0.9004629629629629,
113
+ "eval_runtime": 6.6126,
114
+ "eval_samples_per_second": 65.329,
115
+ "eval_steps_per_second": 1.059,
116
  "step": 42
117
  },
118
  {
119
  "epoch": 7.89,
120
+ "eval_accuracy": 0.8958333333333334,
121
+ "eval_f1": 0.8931276138165111,
122
+ "eval_loss": 0.3785696029663086,
123
+ "eval_precision": 0.9004579824364817,
124
+ "eval_recall": 0.8958333333333334,
125
+ "eval_runtime": 6.493,
126
+ "eval_samples_per_second": 66.533,
127
+ "eval_steps_per_second": 1.078,
128
  "step": 48
129
  },
130
  {
131
  "epoch": 8.3,
132
+ "learning_rate": 7.777777777777778e-05,
133
+ "loss": 0.0031,
134
  "step": 50
135
  },
136
  {
137
  "epoch": 8.89,
138
+ "eval_accuracy": 0.9027777777777778,
139
+ "eval_f1": 0.9007047995444771,
140
+ "eval_loss": 0.3791252374649048,
141
+ "eval_precision": 0.9090957347391172,
142
+ "eval_recall": 0.9027777777777778,
143
+ "eval_runtime": 6.5347,
144
+ "eval_samples_per_second": 66.109,
145
+ "eval_steps_per_second": 1.071,
146
  "step": 54
147
  },
148
  {
149
  "epoch": 9.89,
150
+ "learning_rate": 6.666666666666667e-05,
151
+ "loss": 0.002,
152
  "step": 60
153
  },
154
  {
155
  "epoch": 9.89,
156
+ "eval_accuracy": 0.9027777777777778,
157
+ "eval_f1": 0.9000566322189608,
158
+ "eval_loss": 0.3677087724208832,
159
+ "eval_precision": 0.9105962655404846,
160
+ "eval_recall": 0.9027777777777778,
161
+ "eval_runtime": 6.4291,
162
+ "eval_samples_per_second": 67.194,
163
+ "eval_steps_per_second": 1.089,
164
  "step": 60
165
  },
166
  {
167
+ "epoch": 10.89,
168
+ "eval_accuracy": 0.9027777777777778,
169
+ "eval_f1": 0.9007267676235234,
170
+ "eval_loss": 0.37403494119644165,
171
+ "eval_precision": 0.9098558471011668,
172
+ "eval_recall": 0.9027777777777778,
173
+ "eval_runtime": 7.5533,
174
+ "eval_samples_per_second": 57.193,
175
+ "eval_steps_per_second": 0.927,
176
+ "step": 66
177
+ },
178
+ {
179
+ "epoch": 11.59,
180
+ "learning_rate": 5.555555555555556e-05,
181
+ "loss": 0.0027,
182
+ "step": 70
183
+ },
184
+ {
185
+ "epoch": 11.89,
186
+ "eval_accuracy": 0.8981481481481481,
187
+ "eval_f1": 0.8955661460989992,
188
+ "eval_loss": 0.3868783414363861,
189
+ "eval_precision": 0.904346797052174,
190
+ "eval_recall": 0.8981481481481481,
191
+ "eval_runtime": 6.6537,
192
+ "eval_samples_per_second": 64.926,
193
+ "eval_steps_per_second": 1.052,
194
+ "step": 72
195
+ },
196
+ {
197
+ "epoch": 12.89,
198
+ "eval_accuracy": 0.8981481481481481,
199
+ "eval_f1": 0.8953859158931289,
200
+ "eval_loss": 0.38008958101272583,
201
+ "eval_precision": 0.9021237317078286,
202
+ "eval_recall": 0.8981481481481481,
203
+ "eval_runtime": 6.4045,
204
+ "eval_samples_per_second": 67.453,
205
+ "eval_steps_per_second": 1.093,
206
+ "step": 78
207
+ },
208
+ {
209
+ "epoch": 13.3,
210
+ "learning_rate": 4.444444444444444e-05,
211
+ "loss": 0.004,
212
+ "step": 80
213
+ },
214
+ {
215
+ "epoch": 13.89,
216
+ "eval_accuracy": 0.9050925925925926,
217
+ "eval_f1": 0.9027775427273881,
218
+ "eval_loss": 0.3674483001232147,
219
+ "eval_precision": 0.9112982137223617,
220
+ "eval_recall": 0.9050925925925926,
221
+ "eval_runtime": 6.5221,
222
+ "eval_samples_per_second": 66.237,
223
+ "eval_steps_per_second": 1.073,
224
+ "step": 84
225
+ },
226
+ {
227
+ "epoch": 14.89,
228
+ "learning_rate": 3.3333333333333335e-05,
229
+ "loss": 0.0024,
230
+ "step": 90
231
+ },
232
+ {
233
+ "epoch": 14.89,
234
+ "eval_accuracy": 0.9050925925925926,
235
+ "eval_f1": 0.9027035229916751,
236
+ "eval_loss": 0.36199745535850525,
237
+ "eval_precision": 0.9096292505458038,
238
+ "eval_recall": 0.9050925925925926,
239
+ "eval_runtime": 6.6297,
240
+ "eval_samples_per_second": 65.161,
241
+ "eval_steps_per_second": 1.056,
242
+ "step": 90
243
+ },
244
+ {
245
+ "epoch": 15.89,
246
+ "eval_accuracy": 0.9027777777777778,
247
+ "eval_f1": 0.9005670379013408,
248
+ "eval_loss": 0.36699026823043823,
249
+ "eval_precision": 0.9088959728872165,
250
+ "eval_recall": 0.9027777777777778,
251
+ "eval_runtime": 7.1759,
252
+ "eval_samples_per_second": 60.202,
253
+ "eval_steps_per_second": 0.975,
254
+ "step": 96
255
+ },
256
+ {
257
+ "epoch": 16.59,
258
+ "learning_rate": 2.222222222222222e-05,
259
+ "loss": 0.0021,
260
+ "step": 100
261
+ },
262
+ {
263
+ "epoch": 16.89,
264
+ "eval_accuracy": 0.9004629629629629,
265
+ "eval_f1": 0.8980450039465582,
266
+ "eval_loss": 0.38269728422164917,
267
+ "eval_precision": 0.9064532394818394,
268
+ "eval_recall": 0.9004629629629629,
269
+ "eval_runtime": 8.0097,
270
+ "eval_samples_per_second": 53.935,
271
+ "eval_steps_per_second": 0.874,
272
+ "step": 102
273
+ },
274
+ {
275
+ "epoch": 17.89,
276
+ "eval_accuracy": 0.8981481481481481,
277
+ "eval_f1": 0.8957872163681325,
278
+ "eval_loss": 0.37475430965423584,
279
+ "eval_precision": 0.9049223762152907,
280
+ "eval_recall": 0.8981481481481481,
281
+ "eval_runtime": 7.8764,
282
+ "eval_samples_per_second": 54.848,
283
+ "eval_steps_per_second": 0.889,
284
+ "step": 108
285
+ },
286
+ {
287
+ "epoch": 18.3,
288
+ "learning_rate": 1.111111111111111e-05,
289
+ "loss": 0.0022,
290
+ "step": 110
291
+ },
292
+ {
293
+ "epoch": 18.89,
294
+ "eval_accuracy": 0.9027777777777778,
295
+ "eval_f1": 0.9005599048903711,
296
+ "eval_loss": 0.3825390636920929,
297
+ "eval_precision": 0.9100892619763822,
298
+ "eval_recall": 0.9027777777777778,
299
+ "eval_runtime": 6.4897,
300
+ "eval_samples_per_second": 66.567,
301
+ "eval_steps_per_second": 1.079,
302
+ "step": 114
303
+ },
304
+ {
305
+ "epoch": 19.89,
306
+ "learning_rate": 0.0,
307
+ "loss": 0.0019,
308
+ "step": 120
309
+ },
310
+ {
311
+ "epoch": 19.89,
312
+ "eval_accuracy": 0.9004629629629629,
313
+ "eval_f1": 0.8984296743444529,
314
+ "eval_loss": 0.370717316865921,
315
+ "eval_precision": 0.9066341895316832,
316
+ "eval_recall": 0.9004629629629629,
317
+ "eval_runtime": 6.4737,
318
+ "eval_samples_per_second": 66.732,
319
+ "eval_steps_per_second": 1.081,
320
+ "step": 120
321
+ },
322
+ {
323
+ "epoch": 19.89,
324
+ "step": 120,
325
+ "total_flos": 2.6637698351109243e+18,
326
+ "train_loss": 0.012146842836712798,
327
+ "train_runtime": 613.8258,
328
+ "train_samples_per_second": 56.303,
329
+ "train_steps_per_second": 0.195
330
  }
331
  ],
332
+ "max_steps": 120,
333
+ "num_train_epochs": 20,
334
+ "total_flos": 2.6637698351109243e+18,
335
  "trial_name": null,
336
  "trial_params": null
337
  }