|
{ |
|
"best_metric": 3.473081588745117, |
|
"best_model_checkpoint": "domain-adapted-contriever/checkpoint-1440", |
|
"epoch": 48.0, |
|
"global_step": 1440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 10.873493194580078, |
|
"eval_runtime": 0.0467, |
|
"eval_samples_per_second": 449.222, |
|
"eval_steps_per_second": 64.175, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 9.2319917678833, |
|
"eval_runtime": 0.0465, |
|
"eval_samples_per_second": 451.748, |
|
"eval_steps_per_second": 64.535, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 7.761892318725586, |
|
"eval_runtime": 0.0466, |
|
"eval_samples_per_second": 450.664, |
|
"eval_steps_per_second": 64.381, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 7.499742031097412, |
|
"eval_runtime": 0.0467, |
|
"eval_samples_per_second": 449.436, |
|
"eval_steps_per_second": 64.205, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 6.862554550170898, |
|
"eval_runtime": 0.0466, |
|
"eval_samples_per_second": 450.876, |
|
"eval_steps_per_second": 64.411, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 6.674304962158203, |
|
"eval_runtime": 0.0466, |
|
"eval_samples_per_second": 450.913, |
|
"eval_steps_per_second": 64.416, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 6.445440292358398, |
|
"eval_runtime": 0.0466, |
|
"eval_samples_per_second": 450.283, |
|
"eval_steps_per_second": 64.326, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 6.35595178604126, |
|
"eval_runtime": 0.0468, |
|
"eval_samples_per_second": 448.943, |
|
"eval_steps_per_second": 64.135, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 6.049570083618164, |
|
"eval_runtime": 0.0468, |
|
"eval_samples_per_second": 448.913, |
|
"eval_steps_per_second": 64.13, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 5.842072486877441, |
|
"eval_runtime": 0.0468, |
|
"eval_samples_per_second": 448.648, |
|
"eval_steps_per_second": 64.093, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 6.05306339263916, |
|
"eval_runtime": 0.0467, |
|
"eval_samples_per_second": 449.222, |
|
"eval_steps_per_second": 64.175, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 5.601278781890869, |
|
"eval_runtime": 0.0474, |
|
"eval_samples_per_second": 443.192, |
|
"eval_steps_per_second": 63.313, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 5.842929363250732, |
|
"eval_runtime": 0.048, |
|
"eval_samples_per_second": 437.952, |
|
"eval_steps_per_second": 62.565, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 5.403830528259277, |
|
"eval_runtime": 0.0476, |
|
"eval_samples_per_second": 440.999, |
|
"eval_steps_per_second": 63.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 5.1446332931518555, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 448.066, |
|
"eval_steps_per_second": 64.009, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 4.861954689025879, |
|
"eval_runtime": 0.0478, |
|
"eval_samples_per_second": 439.045, |
|
"eval_steps_per_second": 62.721, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 1.9840000000000003e-05, |
|
"loss": 6.8332, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 5.051825046539307, |
|
"eval_runtime": 0.0484, |
|
"eval_samples_per_second": 433.592, |
|
"eval_steps_per_second": 61.942, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 4.834855556488037, |
|
"eval_runtime": 0.0475, |
|
"eval_samples_per_second": 442.506, |
|
"eval_steps_per_second": 63.215, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 4.873433589935303, |
|
"eval_runtime": 0.0478, |
|
"eval_samples_per_second": 439.758, |
|
"eval_steps_per_second": 62.823, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 4.606621265411377, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 445.719, |
|
"eval_steps_per_second": 63.674, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 4.420989513397217, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 447.929, |
|
"eval_steps_per_second": 63.99, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 4.37644100189209, |
|
"eval_runtime": 0.0472, |
|
"eval_samples_per_second": 445.111, |
|
"eval_steps_per_second": 63.587, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 4.4230475425720215, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 447.941, |
|
"eval_steps_per_second": 63.992, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 4.332743167877197, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 445.731, |
|
"eval_steps_per_second": 63.676, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 4.291798114776611, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 445.429, |
|
"eval_steps_per_second": 63.633, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 4.197042942047119, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 445.494, |
|
"eval_steps_per_second": 63.642, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 4.085911273956299, |
|
"eval_runtime": 0.047, |
|
"eval_samples_per_second": 446.664, |
|
"eval_steps_per_second": 63.809, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 4.071910858154297, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 445.733, |
|
"eval_steps_per_second": 63.676, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 4.150127410888672, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 446.266, |
|
"eval_steps_per_second": 63.752, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 3.875171184539795, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 447.781, |
|
"eval_steps_per_second": 63.969, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 3.8007264137268066, |
|
"eval_runtime": 0.047, |
|
"eval_samples_per_second": 447.009, |
|
"eval_steps_per_second": 63.858, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 3.7663497924804688, |
|
"eval_runtime": 0.047, |
|
"eval_samples_per_second": 447.099, |
|
"eval_steps_per_second": 63.871, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 4.154388427734375, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 445.803, |
|
"eval_steps_per_second": 63.686, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.008e-05, |
|
"loss": 4.123, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 3.6973140239715576, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 448.23, |
|
"eval_steps_per_second": 64.033, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 4.152168273925781, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 448.137, |
|
"eval_steps_per_second": 64.02, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 3.746863603591919, |
|
"eval_runtime": 0.0473, |
|
"eval_samples_per_second": 443.569, |
|
"eval_steps_per_second": 63.367, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 3.8012099266052246, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 446.105, |
|
"eval_steps_per_second": 63.729, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 3.714829683303833, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 447.308, |
|
"eval_steps_per_second": 63.901, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 3.4752964973449707, |
|
"eval_runtime": 0.047, |
|
"eval_samples_per_second": 447.127, |
|
"eval_steps_per_second": 63.875, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 3.982607126235962, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 447.311, |
|
"eval_steps_per_second": 63.902, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 3.850808620452881, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 447.856, |
|
"eval_steps_per_second": 63.979, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 4.094546794891357, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 448.002, |
|
"eval_steps_per_second": 64.0, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 3.4853432178497314, |
|
"eval_runtime": 0.047, |
|
"eval_samples_per_second": 446.474, |
|
"eval_steps_per_second": 63.782, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 3.5112228393554688, |
|
"eval_runtime": 0.0471, |
|
"eval_samples_per_second": 446.02, |
|
"eval_steps_per_second": 63.717, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 3.7965281009674072, |
|
"eval_runtime": 0.047, |
|
"eval_samples_per_second": 446.759, |
|
"eval_steps_per_second": 63.823, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 3.6154186725616455, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 448.15, |
|
"eval_steps_per_second": 64.021, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 3.5226211547851562, |
|
"eval_runtime": 0.0468, |
|
"eval_samples_per_second": 448.964, |
|
"eval_steps_per_second": 64.138, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 3.473081588745117, |
|
"eval_runtime": 0.0469, |
|
"eval_samples_per_second": 447.779, |
|
"eval_steps_per_second": 63.968, |
|
"step": 1440 |
|
} |
|
], |
|
"max_steps": 1500, |
|
"num_train_epochs": 50, |
|
"total_flos": 739079114342400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|