|
{ |
|
"best_metric": 6.141994476318359, |
|
"best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example/checkpoint-36000", |
|
"epoch": 5.0, |
|
"global_step": 36155, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001944682616512239, |
|
"loss": 8.6059, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.0002420302883618007, |
|
"eval_loss": 8.584440231323242, |
|
"eval_runtime": 155.6325, |
|
"eval_samples_per_second": 185.835, |
|
"eval_steps_per_second": 23.234, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001889365233024478, |
|
"loss": 8.5506, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.00100269690892746, |
|
"eval_loss": 8.518888473510742, |
|
"eval_runtime": 153.9354, |
|
"eval_samples_per_second": 187.884, |
|
"eval_steps_per_second": 23.49, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00018341584843036926, |
|
"loss": 8.4931, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.0012101514418090035, |
|
"eval_loss": 8.464128494262695, |
|
"eval_runtime": 157.4956, |
|
"eval_samples_per_second": 183.637, |
|
"eval_steps_per_second": 22.959, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00017788411008159315, |
|
"loss": 8.4223, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.0016250605075720905, |
|
"eval_loss": 8.349540710449219, |
|
"eval_runtime": 156.3508, |
|
"eval_samples_per_second": 184.982, |
|
"eval_steps_per_second": 23.127, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017235237173281705, |
|
"loss": 8.3144, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.0020745453288154346, |
|
"eval_loss": 8.255170822143555, |
|
"eval_runtime": 157.1252, |
|
"eval_samples_per_second": 184.07, |
|
"eval_steps_per_second": 23.013, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016682063338404094, |
|
"loss": 8.1936, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.0023511513726574926, |
|
"eval_loss": 8.138504981994629, |
|
"eval_runtime": 157.2716, |
|
"eval_samples_per_second": 183.898, |
|
"eval_steps_per_second": 22.992, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00016128889503526483, |
|
"loss": 8.0638, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.002835211949381094, |
|
"eval_loss": 7.992447376251221, |
|
"eval_runtime": 156.9147, |
|
"eval_samples_per_second": 184.317, |
|
"eval_steps_per_second": 23.044, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00015575715668648873, |
|
"loss": 7.8485, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.0036304543254270104, |
|
"eval_loss": 7.836571216583252, |
|
"eval_runtime": 155.7627, |
|
"eval_samples_per_second": 185.68, |
|
"eval_steps_per_second": 23.215, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00015022541833771265, |
|
"loss": 7.6933, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.004494848212433442, |
|
"eval_loss": 7.659491062164307, |
|
"eval_runtime": 156.4028, |
|
"eval_samples_per_second": 184.92, |
|
"eval_steps_per_second": 23.12, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00014469367998893652, |
|
"loss": 7.5808, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.00615448447548579, |
|
"eval_loss": 7.523160934448242, |
|
"eval_runtime": 155.1944, |
|
"eval_samples_per_second": 186.36, |
|
"eval_steps_per_second": 23.3, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001391674733785092, |
|
"loss": 7.4352, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.00701887836249222, |
|
"eval_loss": 7.381638526916504, |
|
"eval_runtime": 156.5342, |
|
"eval_samples_per_second": 184.765, |
|
"eval_steps_per_second": 23.1, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0001336357350297331, |
|
"loss": 7.3279, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.008436484337182767, |
|
"eval_loss": 7.285288333892822, |
|
"eval_runtime": 157.1769, |
|
"eval_samples_per_second": 184.009, |
|
"eval_steps_per_second": 23.006, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.000128103996680957, |
|
"loss": 7.2141, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.010511029665998203, |
|
"eval_loss": 7.1552510261535645, |
|
"eval_runtime": 155.9751, |
|
"eval_samples_per_second": 185.427, |
|
"eval_steps_per_second": 23.183, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001225722583321809, |
|
"loss": 7.151, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.011859484129728235, |
|
"eval_loss": 7.085327625274658, |
|
"eval_runtime": 156.2266, |
|
"eval_samples_per_second": 185.129, |
|
"eval_steps_per_second": 23.146, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00011704605172175355, |
|
"loss": 6.9695, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.013380817370859553, |
|
"eval_loss": 7.008819103240967, |
|
"eval_runtime": 155.749, |
|
"eval_samples_per_second": 185.696, |
|
"eval_steps_per_second": 23.217, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00011151984511132624, |
|
"loss": 6.8563, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.013934029458543669, |
|
"eval_loss": 6.940927982330322, |
|
"eval_runtime": 155.6806, |
|
"eval_samples_per_second": 185.778, |
|
"eval_steps_per_second": 23.227, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00010598810676255014, |
|
"loss": 6.8019, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_accuracy": 0.01583569600995782, |
|
"eval_loss": 6.863400936126709, |
|
"eval_runtime": 156.1188, |
|
"eval_samples_per_second": 185.256, |
|
"eval_steps_per_second": 23.162, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00010045636841377403, |
|
"loss": 6.7372, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.017495332273010165, |
|
"eval_loss": 6.800116539001465, |
|
"eval_runtime": 156.6848, |
|
"eval_samples_per_second": 184.587, |
|
"eval_steps_per_second": 23.078, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.492463006499794e-05, |
|
"loss": 6.6903, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.019120392780582255, |
|
"eval_loss": 6.7322998046875, |
|
"eval_runtime": 156.774, |
|
"eval_samples_per_second": 184.482, |
|
"eval_steps_per_second": 23.065, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.93984234545706e-05, |
|
"loss": 6.6482, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.020710877532674088, |
|
"eval_loss": 6.66375732421875, |
|
"eval_runtime": 156.4579, |
|
"eval_samples_per_second": 184.855, |
|
"eval_steps_per_second": 23.112, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.38666851057945e-05, |
|
"loss": 6.5669, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.023891847036857754, |
|
"eval_loss": 6.609040260314941, |
|
"eval_runtime": 155.2279, |
|
"eval_samples_per_second": 186.32, |
|
"eval_steps_per_second": 23.295, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.834047849536717e-05, |
|
"loss": 6.4484, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.023995574303298527, |
|
"eval_loss": 6.544083595275879, |
|
"eval_runtime": 155.967, |
|
"eval_samples_per_second": 185.437, |
|
"eval_steps_per_second": 23.184, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.281427188493984e-05, |
|
"loss": 6.2568, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_accuracy": 0.02734942258488348, |
|
"eval_loss": 6.501514911651611, |
|
"eval_runtime": 156.6432, |
|
"eval_samples_per_second": 184.636, |
|
"eval_steps_per_second": 23.084, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 6.728253353616373e-05, |
|
"loss": 6.2452, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_accuracy": 0.030426664822626376, |
|
"eval_loss": 6.458929061889648, |
|
"eval_runtime": 156.5343, |
|
"eval_samples_per_second": 184.765, |
|
"eval_steps_per_second": 23.1, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.175632692573642e-05, |
|
"loss": 6.2002, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_accuracy": 0.031049028421271004, |
|
"eval_loss": 6.431183815002441, |
|
"eval_runtime": 156.3543, |
|
"eval_samples_per_second": 184.977, |
|
"eval_steps_per_second": 23.127, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.622458857696031e-05, |
|
"loss": 6.1699, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.03194799806375769, |
|
"eval_loss": 6.372282981872559, |
|
"eval_runtime": 156.6121, |
|
"eval_samples_per_second": 184.673, |
|
"eval_steps_per_second": 23.089, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 5.0692850228184205e-05, |
|
"loss": 6.1284, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_accuracy": 0.03429914943641518, |
|
"eval_loss": 6.332355976104736, |
|
"eval_runtime": 156.1088, |
|
"eval_samples_per_second": 185.268, |
|
"eval_steps_per_second": 23.163, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.516664361775688e-05, |
|
"loss": 6.1186, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_accuracy": 0.034956088790540074, |
|
"eval_loss": 6.3029093742370605, |
|
"eval_runtime": 154.5904, |
|
"eval_samples_per_second": 187.088, |
|
"eval_steps_per_second": 23.391, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.963490526898078e-05, |
|
"loss": 6.0611, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_accuracy": 0.03810248253924348, |
|
"eval_loss": 6.272293567657471, |
|
"eval_runtime": 156.0431, |
|
"eval_samples_per_second": 185.346, |
|
"eval_steps_per_second": 23.173, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.410316692020468e-05, |
|
"loss": 5.7883, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_accuracy": 0.03827536131664477, |
|
"eval_loss": 6.252718925476074, |
|
"eval_runtime": 145.8769, |
|
"eval_samples_per_second": 198.263, |
|
"eval_steps_per_second": 24.788, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.8576960309777352e-05, |
|
"loss": 5.7684, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_accuracy": 0.03924348247009197, |
|
"eval_loss": 6.218559741973877, |
|
"eval_runtime": 146.1199, |
|
"eval_samples_per_second": 197.933, |
|
"eval_steps_per_second": 24.747, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.3045221961001246e-05, |
|
"loss": 5.7701, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": 0.04031533088997995, |
|
"eval_loss": 6.203105926513672, |
|
"eval_runtime": 145.5537, |
|
"eval_samples_per_second": 198.703, |
|
"eval_steps_per_second": 24.843, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.751901535057392e-05, |
|
"loss": 5.7473, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy": 0.04304681557292027, |
|
"eval_loss": 6.177652359008789, |
|
"eval_runtime": 145.7844, |
|
"eval_samples_per_second": 198.389, |
|
"eval_steps_per_second": 24.804, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.1987277001797816e-05, |
|
"loss": 5.735, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_accuracy": 0.044187815503768754, |
|
"eval_loss": 6.163440704345703, |
|
"eval_runtime": 146.1075, |
|
"eval_samples_per_second": 197.95, |
|
"eval_steps_per_second": 24.749, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 6.455538653021713e-06, |
|
"loss": 5.7324, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_accuracy": 0.04429154277020953, |
|
"eval_loss": 6.149352073669434, |
|
"eval_runtime": 145.8359, |
|
"eval_samples_per_second": 198.319, |
|
"eval_steps_per_second": 24.795, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.238003042456093e-07, |
|
"loss": 5.6949, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 0.044844754857893646, |
|
"eval_loss": 6.141994476318359, |
|
"eval_runtime": 145.8687, |
|
"eval_samples_per_second": 198.274, |
|
"eval_steps_per_second": 24.789, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 36155, |
|
"total_flos": 9.416417849072317e+19, |
|
"train_loss": 6.8843976465667565, |
|
"train_runtime": 17474.7904, |
|
"train_samples_per_second": 66.203, |
|
"train_steps_per_second": 2.069 |
|
} |
|
], |
|
"max_steps": 36155, |
|
"num_train_epochs": 5, |
|
"total_flos": 9.416417849072317e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|