|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM/checkpoint-29", |
|
"epoch": 44.44444444444444, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7059, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7081, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5555555555555556, |
|
"eval_loss": 0.6817994713783264, |
|
"eval_runtime": 0.2599, |
|
"eval_samples_per_second": 207.804, |
|
"eval_steps_per_second": 3.848, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.6401, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6584, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.7037037037037037, |
|
"eval_loss": 0.5915446877479553, |
|
"eval_runtime": 0.2575, |
|
"eval_samples_per_second": 209.714, |
|
"eval_steps_per_second": 3.884, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.5794, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5552, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.7407407407407407, |
|
"eval_loss": 0.5365740656852722, |
|
"eval_runtime": 0.2565, |
|
"eval_samples_per_second": 210.539, |
|
"eval_steps_per_second": 3.899, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4553, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5446, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.3763, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8518518518518519, |
|
"eval_loss": 0.35601484775543213, |
|
"eval_runtime": 0.2638, |
|
"eval_samples_per_second": 204.731, |
|
"eval_steps_per_second": 3.791, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4622, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 0.397, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_accuracy": 0.8518518518518519, |
|
"eval_loss": 0.2999265789985657, |
|
"eval_runtime": 0.2681, |
|
"eval_samples_per_second": 201.389, |
|
"eval_steps_per_second": 3.729, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.2849, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.3313, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_accuracy": 0.9074074074074074, |
|
"eval_loss": 0.23073385655879974, |
|
"eval_runtime": 0.2595, |
|
"eval_samples_per_second": 208.115, |
|
"eval_steps_per_second": 3.854, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.2448, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.2957, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_accuracy": 0.9259259259259259, |
|
"eval_loss": 0.1745777279138565, |
|
"eval_runtime": 0.2586, |
|
"eval_samples_per_second": 208.833, |
|
"eval_steps_per_second": 3.867, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.2555, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 4.6111111111111115e-05, |
|
"loss": 0.3153, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.2383, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.14317429065704346, |
|
"eval_runtime": 0.2592, |
|
"eval_samples_per_second": 208.343, |
|
"eval_steps_per_second": 3.858, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1994, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.2664, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_accuracy": 0.9074074074074074, |
|
"eval_loss": 0.33199751377105713, |
|
"eval_runtime": 0.2627, |
|
"eval_samples_per_second": 205.531, |
|
"eval_steps_per_second": 3.806, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 4.388888888888889e-05, |
|
"loss": 0.506, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.2242, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_accuracy": 0.9629629629629629, |
|
"eval_loss": 0.11195674538612366, |
|
"eval_runtime": 0.2715, |
|
"eval_samples_per_second": 198.931, |
|
"eval_steps_per_second": 3.684, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 4.277777777777778e-05, |
|
"loss": 0.1685, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.2072, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"eval_accuracy": 0.9629629629629629, |
|
"eval_loss": 0.07178916037082672, |
|
"eval_runtime": 0.2631, |
|
"eval_samples_per_second": 205.262, |
|
"eval_steps_per_second": 3.801, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.1653, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.1573, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.055555555555556e-05, |
|
"loss": 0.1399, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.049447279423475266, |
|
"eval_runtime": 0.2574, |
|
"eval_samples_per_second": 209.772, |
|
"eval_steps_per_second": 3.885, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1696, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 3.944444444444445e-05, |
|
"loss": 0.1846, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03731463849544525, |
|
"eval_runtime": 0.2604, |
|
"eval_samples_per_second": 207.336, |
|
"eval_steps_per_second": 3.84, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1438, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.1816, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03541439026594162, |
|
"eval_runtime": 0.2612, |
|
"eval_samples_per_second": 206.767, |
|
"eval_steps_per_second": 3.829, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.2004, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 3.722222222222222e-05, |
|
"loss": 0.1453, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.04606747254729271, |
|
"eval_runtime": 0.2706, |
|
"eval_samples_per_second": 199.556, |
|
"eval_steps_per_second": 3.695, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.1129, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.1819, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.1406, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03328908979892731, |
|
"eval_runtime": 0.2597, |
|
"eval_samples_per_second": 207.893, |
|
"eval_steps_per_second": 3.85, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1389, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 0.1749, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.02746324986219406, |
|
"eval_runtime": 0.2621, |
|
"eval_samples_per_second": 206.026, |
|
"eval_steps_per_second": 3.815, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 3.388888888888889e-05, |
|
"loss": 0.1079, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1383, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.02032529003918171, |
|
"eval_runtime": 0.258, |
|
"eval_samples_per_second": 209.286, |
|
"eval_steps_per_second": 3.876, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 3.277777777777778e-05, |
|
"loss": 0.1454, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.1659, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.018641581758856773, |
|
"eval_runtime": 0.2631, |
|
"eval_samples_per_second": 205.27, |
|
"eval_steps_per_second": 3.801, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.5264, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.1586, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.153, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.018401814624667168, |
|
"eval_runtime": 0.2579, |
|
"eval_samples_per_second": 209.351, |
|
"eval_steps_per_second": 3.877, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 20.44, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1323, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 20.89, |
|
"learning_rate": 2.9444444444444448e-05, |
|
"loss": 0.1497, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 20.89, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.021507540717720985, |
|
"eval_runtime": 0.2732, |
|
"eval_samples_per_second": 197.68, |
|
"eval_steps_per_second": 3.661, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.2375, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.1088, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.040791917592287064, |
|
"eval_runtime": 0.326, |
|
"eval_samples_per_second": 165.619, |
|
"eval_steps_per_second": 3.067, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0887, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 2.7222222222222223e-05, |
|
"loss": 0.1796, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.03766679763793945, |
|
"eval_runtime": 0.2593, |
|
"eval_samples_per_second": 208.223, |
|
"eval_steps_per_second": 3.856, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.1459, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 2.6111111111111114e-05, |
|
"loss": 0.156, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 0.1041, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.06314324587583542, |
|
"eval_runtime": 0.2592, |
|
"eval_samples_per_second": 208.351, |
|
"eval_steps_per_second": 3.858, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 24.44, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1796, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.1193, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.06367243826389313, |
|
"eval_runtime": 0.2815, |
|
"eval_samples_per_second": 191.858, |
|
"eval_steps_per_second": 3.553, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"learning_rate": 2.3888888888888892e-05, |
|
"loss": 0.0848, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 25.78, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.1653, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 25.78, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.07295241206884384, |
|
"eval_runtime": 0.2602, |
|
"eval_samples_per_second": 207.497, |
|
"eval_steps_per_second": 3.843, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 2.277777777777778e-05, |
|
"loss": 0.0973, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1296, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.07793273031711578, |
|
"eval_runtime": 0.2603, |
|
"eval_samples_per_second": 207.468, |
|
"eval_steps_per_second": 3.842, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 27.11, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.1243, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.1566, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2.0555555555555555e-05, |
|
"loss": 0.1036, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9814814814814815, |
|
"eval_loss": 0.031223006546497345, |
|
"eval_runtime": 0.2604, |
|
"eval_samples_per_second": 207.369, |
|
"eval_steps_per_second": 3.84, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1376, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.1287, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.011618535034358501, |
|
"eval_runtime": 0.2729, |
|
"eval_samples_per_second": 197.869, |
|
"eval_steps_per_second": 3.664, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.0961, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.1307, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.012949406169354916, |
|
"eval_runtime": 0.2593, |
|
"eval_samples_per_second": 208.256, |
|
"eval_steps_per_second": 3.857, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 30.22, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.0873, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 30.67, |
|
"learning_rate": 1.7222222222222224e-05, |
|
"loss": 0.1337, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 30.67, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01411823183298111, |
|
"eval_runtime": 0.2669, |
|
"eval_samples_per_second": 202.286, |
|
"eval_steps_per_second": 3.746, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 31.11, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0961, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 31.56, |
|
"learning_rate": 1.6111111111111115e-05, |
|
"loss": 0.1316, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.1274, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.016076432541012764, |
|
"eval_runtime": 0.2582, |
|
"eval_samples_per_second": 209.143, |
|
"eval_steps_per_second": 3.873, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 32.44, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1192, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 32.89, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.1612, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 32.89, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.017708102241158485, |
|
"eval_runtime": 0.2623, |
|
"eval_samples_per_second": 205.838, |
|
"eval_steps_per_second": 3.812, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2107, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.1504, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.018124129623174667, |
|
"eval_runtime": 0.2587, |
|
"eval_samples_per_second": 208.707, |
|
"eval_steps_per_second": 3.865, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 34.22, |
|
"learning_rate": 1.2777777777777777e-05, |
|
"loss": 0.1108, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.1307, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.017455147579312325, |
|
"eval_runtime": 0.2609, |
|
"eval_samples_per_second": 206.946, |
|
"eval_steps_per_second": 3.832, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.348, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1275, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 1.0555555555555555e-05, |
|
"loss": 0.125, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01702389493584633, |
|
"eval_runtime": 0.2603, |
|
"eval_samples_per_second": 207.432, |
|
"eval_steps_per_second": 3.841, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 36.44, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1085, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 36.89, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.1357, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 36.89, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.016507649794220924, |
|
"eval_runtime": 0.2662, |
|
"eval_samples_per_second": 202.839, |
|
"eval_steps_per_second": 3.756, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.1008, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.1033, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.016153164207935333, |
|
"eval_runtime": 0.4053, |
|
"eval_samples_per_second": 133.244, |
|
"eval_steps_per_second": 2.467, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 38.22, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.1346, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 38.67, |
|
"learning_rate": 7.222222222222222e-06, |
|
"loss": 0.1749, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 38.67, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.016394561156630516, |
|
"eval_runtime": 0.2605, |
|
"eval_samples_per_second": 207.331, |
|
"eval_steps_per_second": 3.839, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 39.11, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0824, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 39.56, |
|
"learning_rate": 6.111111111111111e-06, |
|
"loss": 0.1232, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0906, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.015265186317265034, |
|
"eval_runtime": 0.2605, |
|
"eval_samples_per_second": 207.293, |
|
"eval_steps_per_second": 3.839, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 40.44, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1454, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.1349, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01515868864953518, |
|
"eval_runtime": 0.26, |
|
"eval_samples_per_second": 207.728, |
|
"eval_steps_per_second": 3.847, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 41.33, |
|
"learning_rate": 3.888888888888889e-06, |
|
"loss": 0.1047, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 41.78, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.1056, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 41.78, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.015033537521958351, |
|
"eval_runtime": 0.2588, |
|
"eval_samples_per_second": 208.667, |
|
"eval_steps_per_second": 3.864, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 42.22, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.1196, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.1328, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.014817849732935429, |
|
"eval_runtime": 0.2611, |
|
"eval_samples_per_second": 206.804, |
|
"eval_steps_per_second": 3.83, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.1358, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 43.56, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 0.1221, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 5.555555555555556e-07, |
|
"loss": 0.0742, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.014790916815400124, |
|
"eval_runtime": 0.2592, |
|
"eval_samples_per_second": 208.324, |
|
"eval_steps_per_second": 3.858, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 0.0, |
|
"loss": 0.0875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.014792277477681637, |
|
"eval_runtime": 0.2608, |
|
"eval_samples_per_second": 207.072, |
|
"eval_steps_per_second": 3.835, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"step": 100, |
|
"total_flos": 1.6586385457107272e+18, |
|
"train_loss": 0.20484884686768054, |
|
"train_runtime": 700.3363, |
|
"train_samples_per_second": 34.341, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"max_steps": 100, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.6586385457107272e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|