TCS_Pairing_VAE / last-checkpoint /trainer_state.json
mgh6's picture
Training in progress, step 10240, checkpoint
2928e6f verified
{
"best_metric": 3752.7509765625,
"best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-7680",
"epoch": 0.7565780359633163,
"eval_steps": 512,
"global_step": 10240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.962169351263485e-05,
"loss": 122076.6875,
"step": 256
},
{
"epoch": 0.04,
"learning_rate": 9.92433870252697e-05,
"loss": 75620.8047,
"step": 512
},
{
"epoch": 0.04,
"eval_loss": 67218.640625,
"eval_runtime": 49.08,
"eval_samples_per_second": 69.234,
"eval_steps_per_second": 69.234,
"step": 512
},
{
"epoch": 0.06,
"learning_rate": 9.886508053790455e-05,
"loss": 71143.4766,
"step": 768
},
{
"epoch": 0.08,
"learning_rate": 9.848677405053938e-05,
"loss": 65495.2617,
"step": 1024
},
{
"epoch": 0.08,
"eval_loss": 55127.15234375,
"eval_runtime": 57.5577,
"eval_samples_per_second": 59.036,
"eval_steps_per_second": 59.036,
"step": 1024
},
{
"epoch": 0.09,
"learning_rate": 9.810846756317423e-05,
"loss": 57857.6445,
"step": 1280
},
{
"epoch": 0.11,
"learning_rate": 9.773016107580908e-05,
"loss": 49931.2188,
"step": 1536
},
{
"epoch": 0.11,
"eval_loss": 44795.0859375,
"eval_runtime": 62.2532,
"eval_samples_per_second": 54.584,
"eval_steps_per_second": 54.584,
"step": 1536
},
{
"epoch": 0.13,
"learning_rate": 9.735185458844393e-05,
"loss": 42072.0,
"step": 1792
},
{
"epoch": 0.15,
"learning_rate": 9.697354810107877e-05,
"loss": 35028.5938,
"step": 2048
},
{
"epoch": 0.15,
"eval_loss": 29700.298828125,
"eval_runtime": 72.4329,
"eval_samples_per_second": 46.912,
"eval_steps_per_second": 46.912,
"step": 2048
},
{
"epoch": 0.17,
"learning_rate": 9.659524161371362e-05,
"loss": 27458.9082,
"step": 2304
},
{
"epoch": 0.19,
"learning_rate": 9.621693512634847e-05,
"loss": 21147.1016,
"step": 2560
},
{
"epoch": 0.19,
"eval_loss": 19020.044921875,
"eval_runtime": 49.0881,
"eval_samples_per_second": 69.222,
"eval_steps_per_second": 69.222,
"step": 2560
},
{
"epoch": 0.21,
"learning_rate": 9.58386286389833e-05,
"loss": 15475.9717,
"step": 2816
},
{
"epoch": 0.23,
"learning_rate": 9.546032215161815e-05,
"loss": 11322.8867,
"step": 3072
},
{
"epoch": 0.23,
"eval_loss": 8387.529296875,
"eval_runtime": 56.5145,
"eval_samples_per_second": 60.126,
"eval_steps_per_second": 60.126,
"step": 3072
},
{
"epoch": 0.25,
"learning_rate": 9.5082015664253e-05,
"loss": 8684.2373,
"step": 3328
},
{
"epoch": 0.26,
"learning_rate": 9.470370917688785e-05,
"loss": 6917.9409,
"step": 3584
},
{
"epoch": 0.26,
"eval_loss": 5089.6796875,
"eval_runtime": 52.3172,
"eval_samples_per_second": 64.95,
"eval_steps_per_second": 64.95,
"step": 3584
},
{
"epoch": 0.28,
"learning_rate": 9.43254026895227e-05,
"loss": 6025.4263,
"step": 3840
},
{
"epoch": 0.3,
"learning_rate": 9.394709620215754e-05,
"loss": 5538.1548,
"step": 4096
},
{
"epoch": 0.3,
"eval_loss": 4470.64990234375,
"eval_runtime": 49.8023,
"eval_samples_per_second": 68.23,
"eval_steps_per_second": 68.23,
"step": 4096
},
{
"epoch": 0.32,
"learning_rate": 9.356878971479238e-05,
"loss": 5257.3623,
"step": 4352
},
{
"epoch": 0.34,
"learning_rate": 9.319048322742722e-05,
"loss": 5375.1353,
"step": 4608
},
{
"epoch": 0.34,
"eval_loss": 4827.8271484375,
"eval_runtime": 49.3721,
"eval_samples_per_second": 68.824,
"eval_steps_per_second": 68.824,
"step": 4608
},
{
"epoch": 0.36,
"learning_rate": 9.281217674006207e-05,
"loss": 5494.5615,
"step": 4864
},
{
"epoch": 0.38,
"learning_rate": 9.243387025269692e-05,
"loss": 5258.2065,
"step": 5120
},
{
"epoch": 0.38,
"eval_loss": 4288.33056640625,
"eval_runtime": 51.0091,
"eval_samples_per_second": 66.616,
"eval_steps_per_second": 66.616,
"step": 5120
},
{
"epoch": 0.4,
"learning_rate": 9.205556376533177e-05,
"loss": 5085.8599,
"step": 5376
},
{
"epoch": 0.42,
"learning_rate": 9.167725727796661e-05,
"loss": 5071.4478,
"step": 5632
},
{
"epoch": 0.42,
"eval_loss": 4449.0048828125,
"eval_runtime": 55.4547,
"eval_samples_per_second": 61.275,
"eval_steps_per_second": 61.275,
"step": 5632
},
{
"epoch": 0.44,
"learning_rate": 9.129895079060146e-05,
"loss": 5510.5103,
"step": 5888
},
{
"epoch": 0.45,
"learning_rate": 9.09206443032363e-05,
"loss": 5384.3877,
"step": 6144
},
{
"epoch": 0.45,
"eval_loss": 7380.9560546875,
"eval_runtime": 49.4679,
"eval_samples_per_second": 68.691,
"eval_steps_per_second": 68.691,
"step": 6144
},
{
"epoch": 0.47,
"learning_rate": 9.054233781587114e-05,
"loss": 5411.5742,
"step": 6400
},
{
"epoch": 0.49,
"learning_rate": 9.016403132850599e-05,
"loss": 5327.8291,
"step": 6656
},
{
"epoch": 0.49,
"eval_loss": 6015.3486328125,
"eval_runtime": 49.5302,
"eval_samples_per_second": 68.605,
"eval_steps_per_second": 68.605,
"step": 6656
},
{
"epoch": 0.51,
"learning_rate": 8.978572484114084e-05,
"loss": 5498.8262,
"step": 6912
},
{
"epoch": 0.53,
"learning_rate": 8.940741835377569e-05,
"loss": 5376.377,
"step": 7168
},
{
"epoch": 0.53,
"eval_loss": 4817.3671875,
"eval_runtime": 49.2566,
"eval_samples_per_second": 68.986,
"eval_steps_per_second": 68.986,
"step": 7168
},
{
"epoch": 0.55,
"learning_rate": 8.902911186641053e-05,
"loss": 5066.939,
"step": 7424
},
{
"epoch": 0.57,
"learning_rate": 8.865080537904538e-05,
"loss": 4955.6113,
"step": 7680
},
{
"epoch": 0.57,
"eval_loss": 3752.7509765625,
"eval_runtime": 51.0919,
"eval_samples_per_second": 66.508,
"eval_steps_per_second": 66.508,
"step": 7680
},
{
"epoch": 0.59,
"learning_rate": 8.827249889168022e-05,
"loss": 4972.3188,
"step": 7936
},
{
"epoch": 0.61,
"learning_rate": 8.789419240431506e-05,
"loss": 5409.0205,
"step": 8192
},
{
"epoch": 0.61,
"eval_loss": 4419.3115234375,
"eval_runtime": 56.0194,
"eval_samples_per_second": 60.658,
"eval_steps_per_second": 60.658,
"step": 8192
},
{
"epoch": 0.62,
"learning_rate": 8.751588591694991e-05,
"loss": 4755.2881,
"step": 8448
},
{
"epoch": 0.64,
"learning_rate": 8.713757942958476e-05,
"loss": 4503.3687,
"step": 8704
},
{
"epoch": 0.64,
"eval_loss": 4440.9599609375,
"eval_runtime": 50.1462,
"eval_samples_per_second": 67.762,
"eval_steps_per_second": 67.762,
"step": 8704
},
{
"epoch": 0.66,
"learning_rate": 8.67592729422196e-05,
"loss": 4803.3394,
"step": 8960
},
{
"epoch": 0.68,
"learning_rate": 8.638096645485444e-05,
"loss": 5031.4937,
"step": 9216
},
{
"epoch": 0.68,
"eval_loss": 5361.60546875,
"eval_runtime": 49.6714,
"eval_samples_per_second": 68.41,
"eval_steps_per_second": 68.41,
"step": 9216
},
{
"epoch": 0.7,
"learning_rate": 8.600265996748929e-05,
"loss": 4789.9038,
"step": 9472
},
{
"epoch": 0.72,
"learning_rate": 8.562435348012414e-05,
"loss": 5079.5186,
"step": 9728
},
{
"epoch": 0.72,
"eval_loss": 4070.673828125,
"eval_runtime": 49.4243,
"eval_samples_per_second": 68.752,
"eval_steps_per_second": 68.752,
"step": 9728
},
{
"epoch": 0.74,
"learning_rate": 8.524604699275897e-05,
"loss": 5474.73,
"step": 9984
},
{
"epoch": 0.76,
"learning_rate": 8.486774050539382e-05,
"loss": 4787.0361,
"step": 10240
},
{
"epoch": 0.76,
"eval_loss": 4277.46337890625,
"eval_runtime": 49.554,
"eval_samples_per_second": 68.572,
"eval_steps_per_second": 68.572,
"step": 10240
}
],
"logging_steps": 256,
"max_steps": 67670,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2560,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}