|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 37.0, |
|
"global_step": 999, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2262943855309169e-05, |
|
"loss": 3.1653, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.547526790579403, |
|
"eval_loss": 2.819357395172119, |
|
"eval_runtime": 8.7539, |
|
"eval_samples_per_second": 50.72, |
|
"eval_steps_per_second": 0.228, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.4841962570206113e-05, |
|
"loss": 2.7725, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5574515806383172, |
|
"eval_loss": 2.6864047050476074, |
|
"eval_runtime": 7.4551, |
|
"eval_samples_per_second": 59.556, |
|
"eval_steps_per_second": 0.268, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.6350591807078892e-05, |
|
"loss": 2.6256, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5682560989844753, |
|
"eval_loss": 2.602532148361206, |
|
"eval_runtime": 7.5503, |
|
"eval_samples_per_second": 58.806, |
|
"eval_steps_per_second": 0.265, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7420981285103056e-05, |
|
"loss": 2.5044, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.601172152498224, |
|
"eval_loss": 2.341265916824341, |
|
"eval_runtime": 7.5499, |
|
"eval_samples_per_second": 58.809, |
|
"eval_steps_per_second": 0.265, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.825123986666868e-05, |
|
"loss": 2.4348, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6008952865794787, |
|
"eval_loss": 2.3422300815582275, |
|
"eval_runtime": 7.5767, |
|
"eval_samples_per_second": 58.601, |
|
"eval_steps_per_second": 0.264, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.892961052197583e-05, |
|
"loss": 2.42, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5931487564523698, |
|
"eval_loss": 2.3707046508789062, |
|
"eval_runtime": 7.5549, |
|
"eval_samples_per_second": 58.77, |
|
"eval_steps_per_second": 0.265, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9503164738653782e-05, |
|
"loss": 2.334, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6161689510150978, |
|
"eval_loss": 2.23409366607666, |
|
"eval_runtime": 7.5922, |
|
"eval_samples_per_second": 58.481, |
|
"eval_steps_per_second": 0.263, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 2.288, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6175385999100584, |
|
"eval_loss": 2.2085084915161133, |
|
"eval_runtime": 7.5018, |
|
"eval_samples_per_second": 59.186, |
|
"eval_steps_per_second": 0.267, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2954, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6139404159640247, |
|
"eval_loss": 2.2152740955352783, |
|
"eval_runtime": 7.565, |
|
"eval_samples_per_second": 58.691, |
|
"eval_steps_per_second": 0.264, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2934, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6153211306628938, |
|
"eval_loss": 2.232224225997925, |
|
"eval_runtime": 7.5858, |
|
"eval_samples_per_second": 58.531, |
|
"eval_steps_per_second": 0.264, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2283, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6139165373025295, |
|
"eval_loss": 2.231621503829956, |
|
"eval_runtime": 6.8069, |
|
"eval_samples_per_second": 65.228, |
|
"eval_steps_per_second": 0.294, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2228, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6216490772616965, |
|
"eval_loss": 2.1547019481658936, |
|
"eval_runtime": 7.5498, |
|
"eval_samples_per_second": 58.809, |
|
"eval_steps_per_second": 0.265, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2178, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6255061354433901, |
|
"eval_loss": 2.1324307918548584, |
|
"eval_runtime": 7.037, |
|
"eval_samples_per_second": 63.095, |
|
"eval_steps_per_second": 0.284, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1743, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6387313908007641, |
|
"eval_loss": 2.018895387649536, |
|
"eval_runtime": 7.5158, |
|
"eval_samples_per_second": 59.076, |
|
"eval_steps_per_second": 0.266, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.0488, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6706725076959508, |
|
"eval_loss": 1.7761304378509521, |
|
"eval_runtime": 6.6483, |
|
"eval_samples_per_second": 66.784, |
|
"eval_steps_per_second": 0.301, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9293, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6976124177972629, |
|
"eval_loss": 1.5710580348968506, |
|
"eval_runtime": 7.0351, |
|
"eval_samples_per_second": 63.112, |
|
"eval_steps_per_second": 0.284, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.824, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7190793783123921, |
|
"eval_loss": 1.4179129600524902, |
|
"eval_runtime": 7.5577, |
|
"eval_samples_per_second": 58.748, |
|
"eval_steps_per_second": 0.265, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.677, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7287676129762578, |
|
"eval_loss": 1.3340463638305664, |
|
"eval_runtime": 7.6139, |
|
"eval_samples_per_second": 58.315, |
|
"eval_steps_per_second": 0.263, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5681, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7393688419248894, |
|
"eval_loss": 1.262519121170044, |
|
"eval_runtime": 7.6652, |
|
"eval_samples_per_second": 57.924, |
|
"eval_steps_per_second": 0.261, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4292, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7635153050702649, |
|
"eval_loss": 1.123979091644287, |
|
"eval_runtime": 7.5316, |
|
"eval_samples_per_second": 58.951, |
|
"eval_steps_per_second": 0.266, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3347, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7732615942452775, |
|
"eval_loss": 1.0541319847106934, |
|
"eval_runtime": 7.5273, |
|
"eval_samples_per_second": 58.985, |
|
"eval_steps_per_second": 0.266, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2435, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7811072373166627, |
|
"eval_loss": 1.0116688013076782, |
|
"eval_runtime": 7.6371, |
|
"eval_samples_per_second": 58.137, |
|
"eval_steps_per_second": 0.262, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1747, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7842111556166677, |
|
"eval_loss": 0.9821351766586304, |
|
"eval_runtime": 7.6429, |
|
"eval_samples_per_second": 58.093, |
|
"eval_steps_per_second": 0.262, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1536, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7867322173128384, |
|
"eval_loss": 0.964832603931427, |
|
"eval_runtime": 6.912, |
|
"eval_samples_per_second": 64.236, |
|
"eval_steps_per_second": 0.289, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0971, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7866705302810779, |
|
"eval_loss": 0.9783045053482056, |
|
"eval_runtime": 7.5744, |
|
"eval_samples_per_second": 58.618, |
|
"eval_steps_per_second": 0.264, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0809, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7918671517427532, |
|
"eval_loss": 0.9400666356086731, |
|
"eval_runtime": 7.1416, |
|
"eval_samples_per_second": 62.171, |
|
"eval_steps_per_second": 0.28, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0653, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7834480395236599, |
|
"eval_loss": 0.9835608005523682, |
|
"eval_runtime": 7.5584, |
|
"eval_samples_per_second": 58.743, |
|
"eval_steps_per_second": 0.265, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0341, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7897628339837887, |
|
"eval_loss": 0.9648067951202393, |
|
"eval_runtime": 7.5415, |
|
"eval_samples_per_second": 58.874, |
|
"eval_steps_per_second": 0.265, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0056, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7985819252651742, |
|
"eval_loss": 0.9186079502105713, |
|
"eval_runtime": 7.5907, |
|
"eval_samples_per_second": 58.492, |
|
"eval_steps_per_second": 0.263, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9993, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7981126511353583, |
|
"eval_loss": 0.9101386666297913, |
|
"eval_runtime": 7.191, |
|
"eval_samples_per_second": 61.744, |
|
"eval_steps_per_second": 0.278, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9849, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7893089053803339, |
|
"eval_loss": 0.9685505628585815, |
|
"eval_runtime": 7.5619, |
|
"eval_samples_per_second": 58.716, |
|
"eval_steps_per_second": 0.264, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9799, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7988627229152401, |
|
"eval_loss": 0.9036659002304077, |
|
"eval_runtime": 7.5514, |
|
"eval_samples_per_second": 58.797, |
|
"eval_steps_per_second": 0.265, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.957, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7937915742793792, |
|
"eval_loss": 0.9258220195770264, |
|
"eval_runtime": 7.548, |
|
"eval_samples_per_second": 58.824, |
|
"eval_steps_per_second": 0.265, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9535, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7996154854496198, |
|
"eval_loss": 0.8886759877204895, |
|
"eval_runtime": 7.5277, |
|
"eval_samples_per_second": 58.982, |
|
"eval_steps_per_second": 0.266, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9343, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8085179621980223, |
|
"eval_loss": 0.8577904105186462, |
|
"eval_runtime": 6.7518, |
|
"eval_samples_per_second": 65.76, |
|
"eval_steps_per_second": 0.296, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9346, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8009545281527245, |
|
"eval_loss": 0.8876005411148071, |
|
"eval_runtime": 6.6328, |
|
"eval_samples_per_second": 66.94, |
|
"eval_steps_per_second": 0.302, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9187, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.8115537848605577, |
|
"eval_loss": 0.8389037251472473, |
|
"eval_runtime": 7.5847, |
|
"eval_samples_per_second": 58.539, |
|
"eval_steps_per_second": 0.264, |
|
"step": 999 |
|
} |
|
], |
|
"max_steps": 1080, |
|
"num_train_epochs": 40, |
|
"total_flos": 311085194280960.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|