|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.4804803133010864, |
|
"eval_runtime": 2.9697, |
|
"eval_samples_per_second": 33.673, |
|
"eval_steps_per_second": 4.377, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.57, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.31524839997291565, |
|
"eval_runtime": 3.0246, |
|
"eval_samples_per_second": 33.062, |
|
"eval_steps_per_second": 4.298, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.59, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.30196473002433777, |
|
"eval_runtime": 3.0719, |
|
"eval_samples_per_second": 32.553, |
|
"eval_steps_per_second": 4.232, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2893486022949219, |
|
"eval_runtime": 3.1068, |
|
"eval_samples_per_second": 32.188, |
|
"eval_steps_per_second": 4.184, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.2987529933452606, |
|
"eval_runtime": 3.1313, |
|
"eval_samples_per_second": 31.936, |
|
"eval_steps_per_second": 4.152, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.2916392982006073, |
|
"eval_runtime": 3.1469, |
|
"eval_samples_per_second": 31.777, |
|
"eval_steps_per_second": 4.131, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.29468733072280884, |
|
"eval_runtime": 3.1542, |
|
"eval_samples_per_second": 31.704, |
|
"eval_steps_per_second": 4.121, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.2887653708457947, |
|
"eval_runtime": 3.1629, |
|
"eval_samples_per_second": 31.617, |
|
"eval_steps_per_second": 4.11, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.2915412187576294, |
|
"eval_runtime": 3.1715, |
|
"eval_samples_per_second": 31.531, |
|
"eval_steps_per_second": 4.099, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.29383793473243713, |
|
"eval_runtime": 3.1658, |
|
"eval_samples_per_second": 31.587, |
|
"eval_steps_per_second": 4.106, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.36, |
|
"eval_loss": 0.29850146174430847, |
|
"eval_runtime": 3.1673, |
|
"eval_samples_per_second": 31.572, |
|
"eval_steps_per_second": 4.104, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.28536227345466614, |
|
"eval_runtime": 3.1687, |
|
"eval_samples_per_second": 31.558, |
|
"eval_steps_per_second": 4.103, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.2870297431945801, |
|
"eval_runtime": 3.1692, |
|
"eval_samples_per_second": 31.554, |
|
"eval_steps_per_second": 4.102, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28021568059921265, |
|
"eval_runtime": 3.1713, |
|
"eval_samples_per_second": 31.533, |
|
"eval_steps_per_second": 4.099, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.2801496684551239, |
|
"eval_runtime": 3.1719, |
|
"eval_samples_per_second": 31.527, |
|
"eval_steps_per_second": 4.098, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2805960774421692, |
|
"eval_runtime": 3.1705, |
|
"eval_samples_per_second": 31.541, |
|
"eval_steps_per_second": 4.1, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.28096577525138855, |
|
"eval_runtime": 3.1719, |
|
"eval_samples_per_second": 31.527, |
|
"eval_steps_per_second": 4.098, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.2888098657131195, |
|
"eval_runtime": 3.1688, |
|
"eval_samples_per_second": 31.558, |
|
"eval_steps_per_second": 4.102, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.278043657541275, |
|
"eval_runtime": 3.1698, |
|
"eval_samples_per_second": 31.548, |
|
"eval_steps_per_second": 4.101, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.015, |
|
"loss": 0.6923, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.28032350540161133, |
|
"eval_runtime": 3.1696, |
|
"eval_samples_per_second": 31.549, |
|
"eval_steps_per_second": 4.101, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2768295407295227, |
|
"eval_runtime": 3.1686, |
|
"eval_samples_per_second": 31.559, |
|
"eval_steps_per_second": 4.103, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2743799388408661, |
|
"eval_runtime": 3.1675, |
|
"eval_samples_per_second": 31.57, |
|
"eval_steps_per_second": 4.104, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.2831212282180786, |
|
"eval_runtime": 3.1751, |
|
"eval_samples_per_second": 31.495, |
|
"eval_steps_per_second": 4.094, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.27428343892097473, |
|
"eval_runtime": 3.174, |
|
"eval_samples_per_second": 31.506, |
|
"eval_steps_per_second": 4.096, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 23, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2847433388233185, |
|
"eval_runtime": 3.1729, |
|
"eval_samples_per_second": 31.517, |
|
"eval_steps_per_second": 4.097, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.273659884929657, |
|
"eval_runtime": 3.1699, |
|
"eval_samples_per_second": 31.547, |
|
"eval_steps_per_second": 4.101, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.28170689940452576, |
|
"eval_runtime": 3.1699, |
|
"eval_samples_per_second": 31.547, |
|
"eval_steps_per_second": 4.101, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2770252525806427, |
|
"eval_runtime": 3.1658, |
|
"eval_samples_per_second": 31.587, |
|
"eval_steps_per_second": 4.106, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.28874772787094116, |
|
"eval_runtime": 3.1645, |
|
"eval_samples_per_second": 31.6, |
|
"eval_steps_per_second": 4.108, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.27803295850753784, |
|
"eval_runtime": 3.1654, |
|
"eval_samples_per_second": 31.592, |
|
"eval_steps_per_second": 4.107, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.270718514919281, |
|
"eval_runtime": 3.1669, |
|
"eval_samples_per_second": 31.577, |
|
"eval_steps_per_second": 4.105, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2889265716075897, |
|
"eval_runtime": 3.1651, |
|
"eval_samples_per_second": 31.595, |
|
"eval_steps_per_second": 4.107, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2821086645126343, |
|
"eval_runtime": 3.1646, |
|
"eval_samples_per_second": 31.6, |
|
"eval_steps_per_second": 4.108, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2735106647014618, |
|
"eval_runtime": 3.1678, |
|
"eval_samples_per_second": 31.568, |
|
"eval_steps_per_second": 4.104, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.27724650502204895, |
|
"eval_runtime": 3.1727, |
|
"eval_samples_per_second": 31.519, |
|
"eval_steps_per_second": 4.097, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.27664193511009216, |
|
"eval_runtime": 3.1673, |
|
"eval_samples_per_second": 31.573, |
|
"eval_steps_per_second": 4.104, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.28623029589653015, |
|
"eval_runtime": 3.1643, |
|
"eval_samples_per_second": 31.602, |
|
"eval_steps_per_second": 4.108, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.27454671263694763, |
|
"eval_runtime": 3.1647, |
|
"eval_samples_per_second": 31.599, |
|
"eval_steps_per_second": 4.108, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.28275245428085327, |
|
"eval_runtime": 3.1658, |
|
"eval_samples_per_second": 31.587, |
|
"eval_steps_per_second": 4.106, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.01, |
|
"loss": 0.5864, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.3264109790325165, |
|
"eval_runtime": 3.1694, |
|
"eval_samples_per_second": 31.552, |
|
"eval_steps_per_second": 4.102, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2749994993209839, |
|
"eval_runtime": 3.1811, |
|
"eval_samples_per_second": 31.436, |
|
"eval_steps_per_second": 4.087, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.28311243653297424, |
|
"eval_runtime": 3.1697, |
|
"eval_samples_per_second": 31.549, |
|
"eval_steps_per_second": 4.101, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.2724871039390564, |
|
"eval_runtime": 3.1778, |
|
"eval_samples_per_second": 31.468, |
|
"eval_steps_per_second": 4.091, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2908870577812195, |
|
"eval_runtime": 3.1727, |
|
"eval_samples_per_second": 31.519, |
|
"eval_steps_per_second": 4.098, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2840518653392792, |
|
"eval_runtime": 3.1714, |
|
"eval_samples_per_second": 31.532, |
|
"eval_steps_per_second": 4.099, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.31259122490882874, |
|
"eval_runtime": 3.1745, |
|
"eval_samples_per_second": 31.501, |
|
"eval_steps_per_second": 4.095, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.28924334049224854, |
|
"eval_runtime": 3.1749, |
|
"eval_samples_per_second": 31.497, |
|
"eval_steps_per_second": 4.095, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2886520326137543, |
|
"eval_runtime": 3.177, |
|
"eval_samples_per_second": 31.476, |
|
"eval_steps_per_second": 4.092, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2833870053291321, |
|
"eval_runtime": 3.176, |
|
"eval_samples_per_second": 31.486, |
|
"eval_steps_per_second": 4.093, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.2731325626373291, |
|
"eval_runtime": 3.1813, |
|
"eval_samples_per_second": 31.433, |
|
"eval_steps_per_second": 4.086, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.28878146409988403, |
|
"eval_runtime": 3.1811, |
|
"eval_samples_per_second": 31.436, |
|
"eval_steps_per_second": 4.087, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.3080463707447052, |
|
"eval_runtime": 3.1823, |
|
"eval_samples_per_second": 31.423, |
|
"eval_steps_per_second": 4.085, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.28622153401374817, |
|
"eval_runtime": 3.1939, |
|
"eval_samples_per_second": 31.309, |
|
"eval_steps_per_second": 4.07, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.27717000246047974, |
|
"eval_runtime": 3.1836, |
|
"eval_samples_per_second": 31.411, |
|
"eval_steps_per_second": 4.083, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.2790798544883728, |
|
"eval_runtime": 3.186, |
|
"eval_samples_per_second": 31.387, |
|
"eval_steps_per_second": 4.08, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.29302406311035156, |
|
"eval_runtime": 3.1924, |
|
"eval_samples_per_second": 31.325, |
|
"eval_steps_per_second": 4.072, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.27831196784973145, |
|
"eval_runtime": 3.1927, |
|
"eval_samples_per_second": 31.321, |
|
"eval_steps_per_second": 4.072, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.28552454710006714, |
|
"eval_runtime": 3.1926, |
|
"eval_samples_per_second": 31.322, |
|
"eval_steps_per_second": 4.072, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.28503143787384033, |
|
"eval_runtime": 3.19, |
|
"eval_samples_per_second": 31.348, |
|
"eval_steps_per_second": 4.075, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.005, |
|
"loss": 0.4926, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.28993481397628784, |
|
"eval_runtime": 3.1899, |
|
"eval_samples_per_second": 31.348, |
|
"eval_steps_per_second": 4.075, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.27968910336494446, |
|
"eval_runtime": 3.1909, |
|
"eval_samples_per_second": 31.339, |
|
"eval_steps_per_second": 4.074, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.33221057057380676, |
|
"eval_runtime": 3.1856, |
|
"eval_samples_per_second": 31.391, |
|
"eval_steps_per_second": 4.081, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2761762738227844, |
|
"eval_runtime": 3.1864, |
|
"eval_samples_per_second": 31.384, |
|
"eval_steps_per_second": 4.08, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2816024720668793, |
|
"eval_runtime": 3.1868, |
|
"eval_samples_per_second": 31.38, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2951596677303314, |
|
"eval_runtime": 3.1855, |
|
"eval_samples_per_second": 31.393, |
|
"eval_steps_per_second": 4.081, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2794454097747803, |
|
"eval_runtime": 3.1836, |
|
"eval_samples_per_second": 31.411, |
|
"eval_steps_per_second": 4.083, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2873387932777405, |
|
"eval_runtime": 3.1872, |
|
"eval_samples_per_second": 31.375, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2834796607494354, |
|
"eval_runtime": 3.1878, |
|
"eval_samples_per_second": 31.37, |
|
"eval_steps_per_second": 4.078, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2908068597316742, |
|
"eval_runtime": 3.2004, |
|
"eval_samples_per_second": 31.246, |
|
"eval_steps_per_second": 4.062, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.300818532705307, |
|
"eval_runtime": 3.1872, |
|
"eval_samples_per_second": 31.376, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.2892753481864929, |
|
"eval_runtime": 3.1915, |
|
"eval_samples_per_second": 31.333, |
|
"eval_steps_per_second": 4.073, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.28255218267440796, |
|
"eval_runtime": 3.1936, |
|
"eval_samples_per_second": 31.313, |
|
"eval_steps_per_second": 4.071, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.29189300537109375, |
|
"eval_runtime": 3.189, |
|
"eval_samples_per_second": 31.358, |
|
"eval_steps_per_second": 4.077, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2832392454147339, |
|
"eval_runtime": 3.1867, |
|
"eval_samples_per_second": 31.381, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2830210328102112, |
|
"eval_runtime": 3.1895, |
|
"eval_samples_per_second": 31.353, |
|
"eval_steps_per_second": 4.076, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2808878719806671, |
|
"eval_runtime": 3.1857, |
|
"eval_samples_per_second": 31.39, |
|
"eval_steps_per_second": 4.081, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2822246849536896, |
|
"eval_runtime": 3.1912, |
|
"eval_samples_per_second": 31.336, |
|
"eval_steps_per_second": 4.074, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.2884049117565155, |
|
"eval_runtime": 3.1866, |
|
"eval_samples_per_second": 31.382, |
|
"eval_steps_per_second": 4.08, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.290973961353302, |
|
"eval_runtime": 3.191, |
|
"eval_samples_per_second": 31.339, |
|
"eval_steps_per_second": 4.074, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.4369, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.2896697521209717, |
|
"eval_runtime": 3.1895, |
|
"eval_samples_per_second": 31.353, |
|
"eval_steps_per_second": 4.076, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.5520365753173828, |
|
"train_runtime": 1695.1694, |
|
"train_samples_per_second": 18.877, |
|
"train_steps_per_second": 1.18 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|