{ "best_metric": 3752.7509765625, "best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-7680", "epoch": 0.7565780359633163, "eval_steps": 512, "global_step": 10240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 9.962169351263485e-05, "loss": 122076.6875, "step": 256 }, { "epoch": 0.04, "learning_rate": 9.92433870252697e-05, "loss": 75620.8047, "step": 512 }, { "epoch": 0.04, "eval_loss": 67218.640625, "eval_runtime": 49.08, "eval_samples_per_second": 69.234, "eval_steps_per_second": 69.234, "step": 512 }, { "epoch": 0.06, "learning_rate": 9.886508053790455e-05, "loss": 71143.4766, "step": 768 }, { "epoch": 0.08, "learning_rate": 9.848677405053938e-05, "loss": 65495.2617, "step": 1024 }, { "epoch": 0.08, "eval_loss": 55127.15234375, "eval_runtime": 57.5577, "eval_samples_per_second": 59.036, "eval_steps_per_second": 59.036, "step": 1024 }, { "epoch": 0.09, "learning_rate": 9.810846756317423e-05, "loss": 57857.6445, "step": 1280 }, { "epoch": 0.11, "learning_rate": 9.773016107580908e-05, "loss": 49931.2188, "step": 1536 }, { "epoch": 0.11, "eval_loss": 44795.0859375, "eval_runtime": 62.2532, "eval_samples_per_second": 54.584, "eval_steps_per_second": 54.584, "step": 1536 }, { "epoch": 0.13, "learning_rate": 9.735185458844393e-05, "loss": 42072.0, "step": 1792 }, { "epoch": 0.15, "learning_rate": 9.697354810107877e-05, "loss": 35028.5938, "step": 2048 }, { "epoch": 0.15, "eval_loss": 29700.298828125, "eval_runtime": 72.4329, "eval_samples_per_second": 46.912, "eval_steps_per_second": 46.912, "step": 2048 }, { "epoch": 0.17, "learning_rate": 9.659524161371362e-05, "loss": 27458.9082, "step": 2304 }, { "epoch": 0.19, "learning_rate": 9.621693512634847e-05, "loss": 21147.1016, "step": 2560 }, { "epoch": 0.19, "eval_loss": 19020.044921875, "eval_runtime": 49.0881, "eval_samples_per_second": 69.222, "eval_steps_per_second": 69.222, "step": 2560 }, { "epoch": 0.21, "learning_rate": 9.58386286389833e-05, "loss": 15475.9717, "step": 2816 }, { "epoch": 0.23, "learning_rate": 9.546032215161815e-05, "loss": 11322.8867, "step": 3072 }, { "epoch": 0.23, "eval_loss": 8387.529296875, "eval_runtime": 56.5145, "eval_samples_per_second": 60.126, "eval_steps_per_second": 60.126, "step": 3072 }, { "epoch": 0.25, "learning_rate": 9.5082015664253e-05, "loss": 8684.2373, "step": 3328 }, { "epoch": 0.26, "learning_rate": 9.470370917688785e-05, "loss": 6917.9409, "step": 3584 }, { "epoch": 0.26, "eval_loss": 5089.6796875, "eval_runtime": 52.3172, "eval_samples_per_second": 64.95, "eval_steps_per_second": 64.95, "step": 3584 }, { "epoch": 0.28, "learning_rate": 9.43254026895227e-05, "loss": 6025.4263, "step": 3840 }, { "epoch": 0.3, "learning_rate": 9.394709620215754e-05, "loss": 5538.1548, "step": 4096 }, { "epoch": 0.3, "eval_loss": 4470.64990234375, "eval_runtime": 49.8023, "eval_samples_per_second": 68.23, "eval_steps_per_second": 68.23, "step": 4096 }, { "epoch": 0.32, "learning_rate": 9.356878971479238e-05, "loss": 5257.3623, "step": 4352 }, { "epoch": 0.34, "learning_rate": 9.319048322742722e-05, "loss": 5375.1353, "step": 4608 }, { "epoch": 0.34, "eval_loss": 4827.8271484375, "eval_runtime": 49.3721, "eval_samples_per_second": 68.824, "eval_steps_per_second": 68.824, "step": 4608 }, { "epoch": 0.36, "learning_rate": 9.281217674006207e-05, "loss": 5494.5615, "step": 4864 }, { "epoch": 0.38, "learning_rate": 9.243387025269692e-05, "loss": 5258.2065, "step": 5120 }, { "epoch": 0.38, "eval_loss": 4288.33056640625, "eval_runtime": 51.0091, "eval_samples_per_second": 66.616, "eval_steps_per_second": 66.616, "step": 5120 }, { "epoch": 0.4, "learning_rate": 9.205556376533177e-05, "loss": 5085.8599, "step": 5376 }, { "epoch": 0.42, "learning_rate": 9.167725727796661e-05, "loss": 5071.4478, "step": 5632 }, { "epoch": 0.42, "eval_loss": 4449.0048828125, "eval_runtime": 55.4547, "eval_samples_per_second": 61.275, "eval_steps_per_second": 61.275, "step": 5632 }, { "epoch": 0.44, "learning_rate": 9.129895079060146e-05, "loss": 5510.5103, "step": 5888 }, { "epoch": 0.45, "learning_rate": 9.09206443032363e-05, "loss": 5384.3877, "step": 6144 }, { "epoch": 0.45, "eval_loss": 7380.9560546875, "eval_runtime": 49.4679, "eval_samples_per_second": 68.691, "eval_steps_per_second": 68.691, "step": 6144 }, { "epoch": 0.47, "learning_rate": 9.054233781587114e-05, "loss": 5411.5742, "step": 6400 }, { "epoch": 0.49, "learning_rate": 9.016403132850599e-05, "loss": 5327.8291, "step": 6656 }, { "epoch": 0.49, "eval_loss": 6015.3486328125, "eval_runtime": 49.5302, "eval_samples_per_second": 68.605, "eval_steps_per_second": 68.605, "step": 6656 }, { "epoch": 0.51, "learning_rate": 8.978572484114084e-05, "loss": 5498.8262, "step": 6912 }, { "epoch": 0.53, "learning_rate": 8.940741835377569e-05, "loss": 5376.377, "step": 7168 }, { "epoch": 0.53, "eval_loss": 4817.3671875, "eval_runtime": 49.2566, "eval_samples_per_second": 68.986, "eval_steps_per_second": 68.986, "step": 7168 }, { "epoch": 0.55, "learning_rate": 8.902911186641053e-05, "loss": 5066.939, "step": 7424 }, { "epoch": 0.57, "learning_rate": 8.865080537904538e-05, "loss": 4955.6113, "step": 7680 }, { "epoch": 0.57, "eval_loss": 3752.7509765625, "eval_runtime": 51.0919, "eval_samples_per_second": 66.508, "eval_steps_per_second": 66.508, "step": 7680 }, { "epoch": 0.59, "learning_rate": 8.827249889168022e-05, "loss": 4972.3188, "step": 7936 }, { "epoch": 0.61, "learning_rate": 8.789419240431506e-05, "loss": 5409.0205, "step": 8192 }, { "epoch": 0.61, "eval_loss": 4419.3115234375, "eval_runtime": 56.0194, "eval_samples_per_second": 60.658, "eval_steps_per_second": 60.658, "step": 8192 }, { "epoch": 0.62, "learning_rate": 8.751588591694991e-05, "loss": 4755.2881, "step": 8448 }, { "epoch": 0.64, "learning_rate": 8.713757942958476e-05, "loss": 4503.3687, "step": 8704 }, { "epoch": 0.64, "eval_loss": 4440.9599609375, "eval_runtime": 50.1462, "eval_samples_per_second": 67.762, "eval_steps_per_second": 67.762, "step": 8704 }, { "epoch": 0.66, "learning_rate": 8.67592729422196e-05, "loss": 4803.3394, "step": 8960 }, { "epoch": 0.68, "learning_rate": 8.638096645485444e-05, "loss": 5031.4937, "step": 9216 }, { "epoch": 0.68, "eval_loss": 5361.60546875, "eval_runtime": 49.6714, "eval_samples_per_second": 68.41, "eval_steps_per_second": 68.41, "step": 9216 }, { "epoch": 0.7, "learning_rate": 8.600265996748929e-05, "loss": 4789.9038, "step": 9472 }, { "epoch": 0.72, "learning_rate": 8.562435348012414e-05, "loss": 5079.5186, "step": 9728 }, { "epoch": 0.72, "eval_loss": 4070.673828125, "eval_runtime": 49.4243, "eval_samples_per_second": 68.752, "eval_steps_per_second": 68.752, "step": 9728 }, { "epoch": 0.74, "learning_rate": 8.524604699275897e-05, "loss": 5474.73, "step": 9984 }, { "epoch": 0.76, "learning_rate": 8.486774050539382e-05, "loss": 4787.0361, "step": 10240 }, { "epoch": 0.76, "eval_loss": 4277.46337890625, "eval_runtime": 49.554, "eval_samples_per_second": 68.572, "eval_steps_per_second": 68.572, "step": 10240 } ], "logging_steps": 256, "max_steps": 67670, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2560, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }