|
{ |
|
"best_metric": 3752.7509765625, |
|
"best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-7680", |
|
"epoch": 0.7565780359633163, |
|
"eval_steps": 512, |
|
"global_step": 10240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.962169351263485e-05, |
|
"loss": 122076.6875, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.92433870252697e-05, |
|
"loss": 75620.8047, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 67218.640625, |
|
"eval_runtime": 49.08, |
|
"eval_samples_per_second": 69.234, |
|
"eval_steps_per_second": 69.234, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.886508053790455e-05, |
|
"loss": 71143.4766, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.848677405053938e-05, |
|
"loss": 65495.2617, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 55127.15234375, |
|
"eval_runtime": 57.5577, |
|
"eval_samples_per_second": 59.036, |
|
"eval_steps_per_second": 59.036, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.810846756317423e-05, |
|
"loss": 57857.6445, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.773016107580908e-05, |
|
"loss": 49931.2188, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 44795.0859375, |
|
"eval_runtime": 62.2532, |
|
"eval_samples_per_second": 54.584, |
|
"eval_steps_per_second": 54.584, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.735185458844393e-05, |
|
"loss": 42072.0, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.697354810107877e-05, |
|
"loss": 35028.5938, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 29700.298828125, |
|
"eval_runtime": 72.4329, |
|
"eval_samples_per_second": 46.912, |
|
"eval_steps_per_second": 46.912, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.659524161371362e-05, |
|
"loss": 27458.9082, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.621693512634847e-05, |
|
"loss": 21147.1016, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 19020.044921875, |
|
"eval_runtime": 49.0881, |
|
"eval_samples_per_second": 69.222, |
|
"eval_steps_per_second": 69.222, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.58386286389833e-05, |
|
"loss": 15475.9717, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.546032215161815e-05, |
|
"loss": 11322.8867, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 8387.529296875, |
|
"eval_runtime": 56.5145, |
|
"eval_samples_per_second": 60.126, |
|
"eval_steps_per_second": 60.126, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.5082015664253e-05, |
|
"loss": 8684.2373, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.470370917688785e-05, |
|
"loss": 6917.9409, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 5089.6796875, |
|
"eval_runtime": 52.3172, |
|
"eval_samples_per_second": 64.95, |
|
"eval_steps_per_second": 64.95, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.43254026895227e-05, |
|
"loss": 6025.4263, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.394709620215754e-05, |
|
"loss": 5538.1548, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 4470.64990234375, |
|
"eval_runtime": 49.8023, |
|
"eval_samples_per_second": 68.23, |
|
"eval_steps_per_second": 68.23, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.356878971479238e-05, |
|
"loss": 5257.3623, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.319048322742722e-05, |
|
"loss": 5375.1353, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 4827.8271484375, |
|
"eval_runtime": 49.3721, |
|
"eval_samples_per_second": 68.824, |
|
"eval_steps_per_second": 68.824, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.281217674006207e-05, |
|
"loss": 5494.5615, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.243387025269692e-05, |
|
"loss": 5258.2065, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 4288.33056640625, |
|
"eval_runtime": 51.0091, |
|
"eval_samples_per_second": 66.616, |
|
"eval_steps_per_second": 66.616, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.205556376533177e-05, |
|
"loss": 5085.8599, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.167725727796661e-05, |
|
"loss": 5071.4478, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 4449.0048828125, |
|
"eval_runtime": 55.4547, |
|
"eval_samples_per_second": 61.275, |
|
"eval_steps_per_second": 61.275, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.129895079060146e-05, |
|
"loss": 5510.5103, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.09206443032363e-05, |
|
"loss": 5384.3877, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 7380.9560546875, |
|
"eval_runtime": 49.4679, |
|
"eval_samples_per_second": 68.691, |
|
"eval_steps_per_second": 68.691, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.054233781587114e-05, |
|
"loss": 5411.5742, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.016403132850599e-05, |
|
"loss": 5327.8291, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 6015.3486328125, |
|
"eval_runtime": 49.5302, |
|
"eval_samples_per_second": 68.605, |
|
"eval_steps_per_second": 68.605, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.978572484114084e-05, |
|
"loss": 5498.8262, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.940741835377569e-05, |
|
"loss": 5376.377, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 4817.3671875, |
|
"eval_runtime": 49.2566, |
|
"eval_samples_per_second": 68.986, |
|
"eval_steps_per_second": 68.986, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.902911186641053e-05, |
|
"loss": 5066.939, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.865080537904538e-05, |
|
"loss": 4955.6113, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 3752.7509765625, |
|
"eval_runtime": 51.0919, |
|
"eval_samples_per_second": 66.508, |
|
"eval_steps_per_second": 66.508, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.827249889168022e-05, |
|
"loss": 4972.3188, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.789419240431506e-05, |
|
"loss": 5409.0205, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 4419.3115234375, |
|
"eval_runtime": 56.0194, |
|
"eval_samples_per_second": 60.658, |
|
"eval_steps_per_second": 60.658, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 8.751588591694991e-05, |
|
"loss": 4755.2881, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.713757942958476e-05, |
|
"loss": 4503.3687, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 4440.9599609375, |
|
"eval_runtime": 50.1462, |
|
"eval_samples_per_second": 67.762, |
|
"eval_steps_per_second": 67.762, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.67592729422196e-05, |
|
"loss": 4803.3394, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 8.638096645485444e-05, |
|
"loss": 5031.4937, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 5361.60546875, |
|
"eval_runtime": 49.6714, |
|
"eval_samples_per_second": 68.41, |
|
"eval_steps_per_second": 68.41, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.600265996748929e-05, |
|
"loss": 4789.9038, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.562435348012414e-05, |
|
"loss": 5079.5186, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 4070.673828125, |
|
"eval_runtime": 49.4243, |
|
"eval_samples_per_second": 68.752, |
|
"eval_steps_per_second": 68.752, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.524604699275897e-05, |
|
"loss": 5474.73, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.486774050539382e-05, |
|
"loss": 4787.0361, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 4277.46337890625, |
|
"eval_runtime": 49.554, |
|
"eval_samples_per_second": 68.572, |
|
"eval_steps_per_second": 68.572, |
|
"step": 10240 |
|
} |
|
], |
|
"logging_steps": 256, |
|
"max_steps": 67670, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 2560, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|