|
{ |
|
"best_metric": 8206.45703125, |
|
"best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-10240", |
|
"epoch": 0.7565780359633163, |
|
"eval_steps": 512, |
|
"global_step": 10240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.810846756317423e-05, |
|
"loss": 13895.6543, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.621693512634847e-05, |
|
"loss": 12089.9502, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 10878.3818359375, |
|
"eval_runtime": 52.2741, |
|
"eval_samples_per_second": 65.004, |
|
"eval_steps_per_second": 65.004, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.43254026895227e-05, |
|
"loss": 11585.8203, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.243387025269692e-05, |
|
"loss": 11148.8809, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 10289.9677734375, |
|
"eval_runtime": 52.2836, |
|
"eval_samples_per_second": 64.992, |
|
"eval_steps_per_second": 64.992, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.054233781587114e-05, |
|
"loss": 10962.8799, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.865080537904538e-05, |
|
"loss": 10798.7051, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 9891.8466796875, |
|
"eval_runtime": 52.2253, |
|
"eval_samples_per_second": 65.064, |
|
"eval_steps_per_second": 65.064, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.67592729422196e-05, |
|
"loss": 10776.9941, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.486774050539382e-05, |
|
"loss": 10478.6211, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 9580.3623046875, |
|
"eval_runtime": 54.3989, |
|
"eval_samples_per_second": 62.465, |
|
"eval_steps_per_second": 62.465, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.297620806856804e-05, |
|
"loss": 10421.6865, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.108467563174228e-05, |
|
"loss": 10349.1133, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 9357.197265625, |
|
"eval_runtime": 52.1384, |
|
"eval_samples_per_second": 65.173, |
|
"eval_steps_per_second": 65.173, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.919314319491651e-05, |
|
"loss": 10320.9795, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.730161075809073e-05, |
|
"loss": 10056.0762, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 9196.8330078125, |
|
"eval_runtime": 55.9285, |
|
"eval_samples_per_second": 60.756, |
|
"eval_steps_per_second": 60.756, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.541007832126497e-05, |
|
"loss": 10047.7236, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.35185458844392e-05, |
|
"loss": 9825.2148, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 9047.8759765625, |
|
"eval_runtime": 54.3128, |
|
"eval_samples_per_second": 62.564, |
|
"eval_steps_per_second": 62.564, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.162701344761342e-05, |
|
"loss": 9779.3662, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.973548101078765e-05, |
|
"loss": 9906.9102, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 8961.9609375, |
|
"eval_runtime": 53.0164, |
|
"eval_samples_per_second": 64.093, |
|
"eval_steps_per_second": 64.093, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.784394857396189e-05, |
|
"loss": 9788.2617, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.595241613713611e-05, |
|
"loss": 9622.2656, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 8833.9521484375, |
|
"eval_runtime": 74.0629, |
|
"eval_samples_per_second": 45.88, |
|
"eval_steps_per_second": 45.88, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.406088370031034e-05, |
|
"loss": 9643.9951, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.216935126348456e-05, |
|
"loss": 9615.2891, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 8750.201171875, |
|
"eval_runtime": 52.5556, |
|
"eval_samples_per_second": 64.655, |
|
"eval_steps_per_second": 64.655, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.0277818826658786e-05, |
|
"loss": 9523.9453, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.838628638983301e-05, |
|
"loss": 9480.083, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 8672.28515625, |
|
"eval_runtime": 55.0801, |
|
"eval_samples_per_second": 61.692, |
|
"eval_steps_per_second": 61.692, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.649475395300724e-05, |
|
"loss": 9486.6777, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.460322151618147e-05, |
|
"loss": 9286.3037, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 8618.2421875, |
|
"eval_runtime": 51.0906, |
|
"eval_samples_per_second": 66.509, |
|
"eval_steps_per_second": 66.509, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.27116890793557e-05, |
|
"loss": 9377.9355, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.0820156642529925e-05, |
|
"loss": 9192.1064, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 8541.9248046875, |
|
"eval_runtime": 52.3848, |
|
"eval_samples_per_second": 64.866, |
|
"eval_steps_per_second": 64.866, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.892862420570416e-05, |
|
"loss": 9293.0908, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.703709176887838e-05, |
|
"loss": 9297.0391, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 8475.4150390625, |
|
"eval_runtime": 52.6534, |
|
"eval_samples_per_second": 64.535, |
|
"eval_steps_per_second": 64.535, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5145559332052614e-05, |
|
"loss": 9279.9678, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.325402689522684e-05, |
|
"loss": 9241.9922, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 8431.3798828125, |
|
"eval_runtime": 51.9355, |
|
"eval_samples_per_second": 65.427, |
|
"eval_steps_per_second": 65.427, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.136249445840107e-05, |
|
"loss": 9255.4785, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.947096202157529e-05, |
|
"loss": 9076.7949, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 8366.28515625, |
|
"eval_runtime": 54.9213, |
|
"eval_samples_per_second": 61.87, |
|
"eval_steps_per_second": 61.87, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.757942958474952e-05, |
|
"loss": 9096.5859, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.5687897147923746e-05, |
|
"loss": 9114.2637, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 8331.78125, |
|
"eval_runtime": 52.2996, |
|
"eval_samples_per_second": 64.972, |
|
"eval_steps_per_second": 64.972, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.379636471109798e-05, |
|
"loss": 9127.2627, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.19048322742722e-05, |
|
"loss": 9108.6582, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 8280.7763671875, |
|
"eval_runtime": 52.2791, |
|
"eval_samples_per_second": 64.997, |
|
"eval_steps_per_second": 64.997, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.0013299837446435e-05, |
|
"loss": 9030.9062, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.8121767400620657e-05, |
|
"loss": 9014.1484, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 8238.181640625, |
|
"eval_runtime": 52.4655, |
|
"eval_samples_per_second": 64.766, |
|
"eval_steps_per_second": 64.766, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6230234963794885e-05, |
|
"loss": 8962.5713, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.4338702526969114e-05, |
|
"loss": 8950.7715, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 8206.45703125, |
|
"eval_runtime": 52.6843, |
|
"eval_samples_per_second": 64.497, |
|
"eval_steps_per_second": 64.497, |
|
"step": 10240 |
|
} |
|
], |
|
"logging_steps": 256, |
|
"max_steps": 13534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 2560, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|