TCS_Pairing_VAE / last-checkpoint /trainer_state.json
mgh6's picture
Training in progress, step 10240, checkpoint
41c1f83 verified
raw
history blame
No virus
9.46 kB
{
"best_metric": 8206.45703125,
"best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-10240",
"epoch": 0.7565780359633163,
"eval_steps": 512,
"global_step": 10240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.810846756317423e-05,
"loss": 13895.6543,
"step": 256
},
{
"epoch": 0.04,
"learning_rate": 9.621693512634847e-05,
"loss": 12089.9502,
"step": 512
},
{
"epoch": 0.04,
"eval_loss": 10878.3818359375,
"eval_runtime": 52.2741,
"eval_samples_per_second": 65.004,
"eval_steps_per_second": 65.004,
"step": 512
},
{
"epoch": 0.06,
"learning_rate": 9.43254026895227e-05,
"loss": 11585.8203,
"step": 768
},
{
"epoch": 0.08,
"learning_rate": 9.243387025269692e-05,
"loss": 11148.8809,
"step": 1024
},
{
"epoch": 0.08,
"eval_loss": 10289.9677734375,
"eval_runtime": 52.2836,
"eval_samples_per_second": 64.992,
"eval_steps_per_second": 64.992,
"step": 1024
},
{
"epoch": 0.09,
"learning_rate": 9.054233781587114e-05,
"loss": 10962.8799,
"step": 1280
},
{
"epoch": 0.11,
"learning_rate": 8.865080537904538e-05,
"loss": 10798.7051,
"step": 1536
},
{
"epoch": 0.11,
"eval_loss": 9891.8466796875,
"eval_runtime": 52.2253,
"eval_samples_per_second": 65.064,
"eval_steps_per_second": 65.064,
"step": 1536
},
{
"epoch": 0.13,
"learning_rate": 8.67592729422196e-05,
"loss": 10776.9941,
"step": 1792
},
{
"epoch": 0.15,
"learning_rate": 8.486774050539382e-05,
"loss": 10478.6211,
"step": 2048
},
{
"epoch": 0.15,
"eval_loss": 9580.3623046875,
"eval_runtime": 54.3989,
"eval_samples_per_second": 62.465,
"eval_steps_per_second": 62.465,
"step": 2048
},
{
"epoch": 0.17,
"learning_rate": 8.297620806856804e-05,
"loss": 10421.6865,
"step": 2304
},
{
"epoch": 0.19,
"learning_rate": 8.108467563174228e-05,
"loss": 10349.1133,
"step": 2560
},
{
"epoch": 0.19,
"eval_loss": 9357.197265625,
"eval_runtime": 52.1384,
"eval_samples_per_second": 65.173,
"eval_steps_per_second": 65.173,
"step": 2560
},
{
"epoch": 0.21,
"learning_rate": 7.919314319491651e-05,
"loss": 10320.9795,
"step": 2816
},
{
"epoch": 0.23,
"learning_rate": 7.730161075809073e-05,
"loss": 10056.0762,
"step": 3072
},
{
"epoch": 0.23,
"eval_loss": 9196.8330078125,
"eval_runtime": 55.9285,
"eval_samples_per_second": 60.756,
"eval_steps_per_second": 60.756,
"step": 3072
},
{
"epoch": 0.25,
"learning_rate": 7.541007832126497e-05,
"loss": 10047.7236,
"step": 3328
},
{
"epoch": 0.26,
"learning_rate": 7.35185458844392e-05,
"loss": 9825.2148,
"step": 3584
},
{
"epoch": 0.26,
"eval_loss": 9047.8759765625,
"eval_runtime": 54.3128,
"eval_samples_per_second": 62.564,
"eval_steps_per_second": 62.564,
"step": 3584
},
{
"epoch": 0.28,
"learning_rate": 7.162701344761342e-05,
"loss": 9779.3662,
"step": 3840
},
{
"epoch": 0.3,
"learning_rate": 6.973548101078765e-05,
"loss": 9906.9102,
"step": 4096
},
{
"epoch": 0.3,
"eval_loss": 8961.9609375,
"eval_runtime": 53.0164,
"eval_samples_per_second": 64.093,
"eval_steps_per_second": 64.093,
"step": 4096
},
{
"epoch": 0.32,
"learning_rate": 6.784394857396189e-05,
"loss": 9788.2617,
"step": 4352
},
{
"epoch": 0.34,
"learning_rate": 6.595241613713611e-05,
"loss": 9622.2656,
"step": 4608
},
{
"epoch": 0.34,
"eval_loss": 8833.9521484375,
"eval_runtime": 74.0629,
"eval_samples_per_second": 45.88,
"eval_steps_per_second": 45.88,
"step": 4608
},
{
"epoch": 0.36,
"learning_rate": 6.406088370031034e-05,
"loss": 9643.9951,
"step": 4864
},
{
"epoch": 0.38,
"learning_rate": 6.216935126348456e-05,
"loss": 9615.2891,
"step": 5120
},
{
"epoch": 0.38,
"eval_loss": 8750.201171875,
"eval_runtime": 52.5556,
"eval_samples_per_second": 64.655,
"eval_steps_per_second": 64.655,
"step": 5120
},
{
"epoch": 0.4,
"learning_rate": 6.0277818826658786e-05,
"loss": 9523.9453,
"step": 5376
},
{
"epoch": 0.42,
"learning_rate": 5.838628638983301e-05,
"loss": 9480.083,
"step": 5632
},
{
"epoch": 0.42,
"eval_loss": 8672.28515625,
"eval_runtime": 55.0801,
"eval_samples_per_second": 61.692,
"eval_steps_per_second": 61.692,
"step": 5632
},
{
"epoch": 0.44,
"learning_rate": 5.649475395300724e-05,
"loss": 9486.6777,
"step": 5888
},
{
"epoch": 0.45,
"learning_rate": 5.460322151618147e-05,
"loss": 9286.3037,
"step": 6144
},
{
"epoch": 0.45,
"eval_loss": 8618.2421875,
"eval_runtime": 51.0906,
"eval_samples_per_second": 66.509,
"eval_steps_per_second": 66.509,
"step": 6144
},
{
"epoch": 0.47,
"learning_rate": 5.27116890793557e-05,
"loss": 9377.9355,
"step": 6400
},
{
"epoch": 0.49,
"learning_rate": 5.0820156642529925e-05,
"loss": 9192.1064,
"step": 6656
},
{
"epoch": 0.49,
"eval_loss": 8541.9248046875,
"eval_runtime": 52.3848,
"eval_samples_per_second": 64.866,
"eval_steps_per_second": 64.866,
"step": 6656
},
{
"epoch": 0.51,
"learning_rate": 4.892862420570416e-05,
"loss": 9293.0908,
"step": 6912
},
{
"epoch": 0.53,
"learning_rate": 4.703709176887838e-05,
"loss": 9297.0391,
"step": 7168
},
{
"epoch": 0.53,
"eval_loss": 8475.4150390625,
"eval_runtime": 52.6534,
"eval_samples_per_second": 64.535,
"eval_steps_per_second": 64.535,
"step": 7168
},
{
"epoch": 0.55,
"learning_rate": 4.5145559332052614e-05,
"loss": 9279.9678,
"step": 7424
},
{
"epoch": 0.57,
"learning_rate": 4.325402689522684e-05,
"loss": 9241.9922,
"step": 7680
},
{
"epoch": 0.57,
"eval_loss": 8431.3798828125,
"eval_runtime": 51.9355,
"eval_samples_per_second": 65.427,
"eval_steps_per_second": 65.427,
"step": 7680
},
{
"epoch": 0.59,
"learning_rate": 4.136249445840107e-05,
"loss": 9255.4785,
"step": 7936
},
{
"epoch": 0.61,
"learning_rate": 3.947096202157529e-05,
"loss": 9076.7949,
"step": 8192
},
{
"epoch": 0.61,
"eval_loss": 8366.28515625,
"eval_runtime": 54.9213,
"eval_samples_per_second": 61.87,
"eval_steps_per_second": 61.87,
"step": 8192
},
{
"epoch": 0.62,
"learning_rate": 3.757942958474952e-05,
"loss": 9096.5859,
"step": 8448
},
{
"epoch": 0.64,
"learning_rate": 3.5687897147923746e-05,
"loss": 9114.2637,
"step": 8704
},
{
"epoch": 0.64,
"eval_loss": 8331.78125,
"eval_runtime": 52.2996,
"eval_samples_per_second": 64.972,
"eval_steps_per_second": 64.972,
"step": 8704
},
{
"epoch": 0.66,
"learning_rate": 3.379636471109798e-05,
"loss": 9127.2627,
"step": 8960
},
{
"epoch": 0.68,
"learning_rate": 3.19048322742722e-05,
"loss": 9108.6582,
"step": 9216
},
{
"epoch": 0.68,
"eval_loss": 8280.7763671875,
"eval_runtime": 52.2791,
"eval_samples_per_second": 64.997,
"eval_steps_per_second": 64.997,
"step": 9216
},
{
"epoch": 0.7,
"learning_rate": 3.0013299837446435e-05,
"loss": 9030.9062,
"step": 9472
},
{
"epoch": 0.72,
"learning_rate": 2.8121767400620657e-05,
"loss": 9014.1484,
"step": 9728
},
{
"epoch": 0.72,
"eval_loss": 8238.181640625,
"eval_runtime": 52.4655,
"eval_samples_per_second": 64.766,
"eval_steps_per_second": 64.766,
"step": 9728
},
{
"epoch": 0.74,
"learning_rate": 2.6230234963794885e-05,
"loss": 8962.5713,
"step": 9984
},
{
"epoch": 0.76,
"learning_rate": 2.4338702526969114e-05,
"loss": 8950.7715,
"step": 10240
},
{
"epoch": 0.76,
"eval_loss": 8206.45703125,
"eval_runtime": 52.6843,
"eval_samples_per_second": 64.497,
"eval_steps_per_second": 64.497,
"step": 10240
}
],
"logging_steps": 256,
"max_steps": 13534,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2560,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}