|
{ |
|
"best_metric": 1.251373291015625, |
|
"best_model_checkpoint": "./saved_checkpoints/ethical/mistral/checkpoint-50", |
|
"epoch": 3.000749962501875, |
|
"eval_steps": 50, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.251373291015625, |
|
"eval_runtime": 4559.2323, |
|
"eval_samples_per_second": 2.193, |
|
"eval_steps_per_second": 0.366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.842414742769675e-06, |
|
"loss": 1.2609, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.2726035118103027, |
|
"eval_runtime": 4449.7033, |
|
"eval_samples_per_second": 2.247, |
|
"eval_steps_per_second": 0.375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.2832562923431396, |
|
"eval_runtime": 4453.6157, |
|
"eval_samples_per_second": 2.245, |
|
"eval_steps_per_second": 0.374, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2778, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.2775572538375854, |
|
"eval_runtime": 4472.9332, |
|
"eval_samples_per_second": 2.236, |
|
"eval_steps_per_second": 0.373, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.271767258644104, |
|
"eval_runtime": 4524.5689, |
|
"eval_samples_per_second": 2.21, |
|
"eval_steps_per_second": 0.368, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2742, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.2736577987670898, |
|
"eval_runtime": 4552.8861, |
|
"eval_samples_per_second": 2.196, |
|
"eval_steps_per_second": 0.366, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.2683041095733643, |
|
"eval_runtime": 4481.4789, |
|
"eval_samples_per_second": 2.231, |
|
"eval_steps_per_second": 0.372, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2707, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.2667808532714844, |
|
"eval_runtime": 4512.9429, |
|
"eval_samples_per_second": 2.216, |
|
"eval_steps_per_second": 0.369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.2664167881011963, |
|
"eval_runtime": 4503.4031, |
|
"eval_samples_per_second": 2.221, |
|
"eval_steps_per_second": 0.37, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2633, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.2625447511672974, |
|
"eval_runtime": 4523.2077, |
|
"eval_samples_per_second": 2.211, |
|
"eval_steps_per_second": 0.369, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.2571557760238647, |
|
"eval_runtime": 4492.8054, |
|
"eval_samples_per_second": 2.226, |
|
"eval_steps_per_second": 0.371, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2605, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2611154317855835, |
|
"eval_runtime": 4465.1303, |
|
"eval_samples_per_second": 2.24, |
|
"eval_steps_per_second": 0.373, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.261015772819519, |
|
"eval_runtime": 4573.2523, |
|
"eval_samples_per_second": 2.187, |
|
"eval_steps_per_second": 0.365, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2533, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.2556573152542114, |
|
"eval_runtime": 4450.2442, |
|
"eval_samples_per_second": 2.247, |
|
"eval_steps_per_second": 0.375, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.2533992528915405, |
|
"eval_runtime": 4528.9739, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.368, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2519, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.2552576065063477, |
|
"eval_runtime": 4553.5444, |
|
"eval_samples_per_second": 2.196, |
|
"eval_steps_per_second": 0.366, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.3253833055496216, |
|
"eval_runtime": 4454.7877, |
|
"eval_samples_per_second": 2.245, |
|
"eval_steps_per_second": 0.374, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7228, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.3570994138717651, |
|
"eval_runtime": 4515.2615, |
|
"eval_samples_per_second": 2.215, |
|
"eval_steps_per_second": 0.369, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 1.3670397996902466, |
|
"eval_runtime": 4433.2773, |
|
"eval_samples_per_second": 2.256, |
|
"eval_steps_per_second": 0.376, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7433, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.3700823783874512, |
|
"eval_runtime": 4573.0757, |
|
"eval_samples_per_second": 2.187, |
|
"eval_steps_per_second": 0.365, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 1.36603844165802, |
|
"eval_runtime": 4525.6502, |
|
"eval_samples_per_second": 2.21, |
|
"eval_steps_per_second": 0.368, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1e-05, |
|
"loss": 0.758, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 1.3708640336990356, |
|
"eval_runtime": 4539.0904, |
|
"eval_samples_per_second": 2.203, |
|
"eval_steps_per_second": 0.367, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.3683034181594849, |
|
"eval_runtime": 4412.3439, |
|
"eval_samples_per_second": 2.266, |
|
"eval_steps_per_second": 0.378, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7668, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.3628712892532349, |
|
"eval_runtime": 4468.3564, |
|
"eval_samples_per_second": 2.238, |
|
"eval_steps_per_second": 0.373, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 1.371540904045105, |
|
"eval_runtime": 4466.1065, |
|
"eval_samples_per_second": 2.239, |
|
"eval_steps_per_second": 0.373, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7754, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 1.37712562084198, |
|
"eval_runtime": 4422.9951, |
|
"eval_samples_per_second": 2.261, |
|
"eval_steps_per_second": 0.377, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 1.3581366539001465, |
|
"eval_runtime": 4440.3874, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.375, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7827, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.3591225147247314, |
|
"eval_runtime": 4566.96, |
|
"eval_samples_per_second": 2.19, |
|
"eval_steps_per_second": 0.365, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 1.3655798435211182, |
|
"eval_runtime": 4518.9623, |
|
"eval_samples_per_second": 2.213, |
|
"eval_steps_per_second": 0.369, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7928, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 1.3691887855529785, |
|
"eval_runtime": 4550.0865, |
|
"eval_samples_per_second": 2.198, |
|
"eval_steps_per_second": 0.366, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 1.3695429563522339, |
|
"eval_runtime": 4559.5122, |
|
"eval_samples_per_second": 2.193, |
|
"eval_steps_per_second": 0.366, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7998, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 1.3639189004898071, |
|
"eval_runtime": 4389.9035, |
|
"eval_samples_per_second": 2.278, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 1.3616678714752197, |
|
"eval_runtime": 4562.3432, |
|
"eval_samples_per_second": 2.192, |
|
"eval_steps_per_second": 0.365, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5729, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.5398352146148682, |
|
"eval_runtime": 4441.3682, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 1.562680721282959, |
|
"eval_runtime": 4492.8359, |
|
"eval_samples_per_second": 2.226, |
|
"eval_steps_per_second": 0.371, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4759, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.5819048881530762, |
|
"eval_runtime": 4460.5449, |
|
"eval_samples_per_second": 2.242, |
|
"eval_steps_per_second": 0.374, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 1.5582789182662964, |
|
"eval_runtime": 4553.8843, |
|
"eval_samples_per_second": 2.196, |
|
"eval_steps_per_second": 0.366, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4857, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 1.54148530960083, |
|
"eval_runtime": 4406.6155, |
|
"eval_samples_per_second": 2.269, |
|
"eval_steps_per_second": 0.378, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 1.564630150794983, |
|
"eval_runtime": 4557.1018, |
|
"eval_samples_per_second": 2.194, |
|
"eval_steps_per_second": 0.366, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4921, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.5717904567718506, |
|
"eval_runtime": 4430.1088, |
|
"eval_samples_per_second": 2.257, |
|
"eval_steps_per_second": 0.376, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.5744233131408691, |
|
"eval_runtime": 4555.2722, |
|
"eval_samples_per_second": 2.195, |
|
"eval_steps_per_second": 0.366, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5034, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 1.5743005275726318, |
|
"eval_runtime": 4515.3853, |
|
"eval_samples_per_second": 2.215, |
|
"eval_steps_per_second": 0.369, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 1.5678597688674927, |
|
"eval_runtime": 4504.4169, |
|
"eval_samples_per_second": 2.22, |
|
"eval_steps_per_second": 0.37, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5071, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 1.5610437393188477, |
|
"eval_runtime": 4513.3469, |
|
"eval_samples_per_second": 2.216, |
|
"eval_steps_per_second": 0.369, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.544044852256775, |
|
"eval_runtime": 4583.4964, |
|
"eval_samples_per_second": 2.182, |
|
"eval_steps_per_second": 0.364, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5117, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 1.570798397064209, |
|
"eval_runtime": 4561.3159, |
|
"eval_samples_per_second": 2.192, |
|
"eval_steps_per_second": 0.365, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 1.5632375478744507, |
|
"eval_runtime": 4405.4774, |
|
"eval_samples_per_second": 2.27, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5191, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 1.5500311851501465, |
|
"eval_runtime": 4512.5389, |
|
"eval_samples_per_second": 2.216, |
|
"eval_steps_per_second": 0.369, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 1.571359395980835, |
|
"eval_runtime": 4476.4923, |
|
"eval_samples_per_second": 2.234, |
|
"eval_steps_per_second": 0.372, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.52, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.581729531288147, |
|
"eval_runtime": 4430.015, |
|
"eval_samples_per_second": 2.257, |
|
"eval_steps_per_second": 0.376, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1000000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1201, |
|
"save_steps": 50, |
|
"total_flos": 27154959237120.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|