{ "best_metric": 0.00030898803379386663, "best_model_checkpoint": "AlexWang99/byt5_add/checkpoint-1275", "epoch": 51.0, "eval_steps": 500, "global_step": 1275, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 1.747314691543579, "eval_runtime": 11.1213, "eval_samples_per_second": 899.172, "eval_steps_per_second": 1.169, "step": 25 }, { "epoch": 2.0, "eval_loss": 1.6477937698364258, "eval_runtime": 10.7967, "eval_samples_per_second": 926.209, "eval_steps_per_second": 1.204, "step": 50 }, { "epoch": 3.0, "eval_loss": 1.5999796390533447, "eval_runtime": 10.8098, "eval_samples_per_second": 925.089, "eval_steps_per_second": 1.203, "step": 75 }, { "epoch": 4.0, "eval_loss": 1.4885144233703613, "eval_runtime": 10.8417, "eval_samples_per_second": 922.367, "eval_steps_per_second": 1.199, "step": 100 }, { "epoch": 5.0, "eval_loss": 1.3953396081924438, "eval_runtime": 10.8419, "eval_samples_per_second": 922.345, "eval_steps_per_second": 1.199, "step": 125 }, { "epoch": 6.0, "eval_loss": 1.2306207418441772, "eval_runtime": 10.8327, "eval_samples_per_second": 923.131, "eval_steps_per_second": 1.2, "step": 150 }, { "epoch": 7.0, "eval_loss": 1.0172127485275269, "eval_runtime": 10.8404, "eval_samples_per_second": 922.478, "eval_steps_per_second": 1.199, "step": 175 }, { "epoch": 8.0, "eval_loss": 0.7508996725082397, "eval_runtime": 10.867, "eval_samples_per_second": 920.221, "eval_steps_per_second": 1.196, "step": 200 }, { "epoch": 9.0, "eval_loss": 0.5204245448112488, "eval_runtime": 10.837, "eval_samples_per_second": 922.761, "eval_steps_per_second": 1.2, "step": 225 }, { "epoch": 10.0, "eval_loss": 0.3563512861728668, "eval_runtime": 11.004, "eval_samples_per_second": 908.763, "eval_steps_per_second": 1.181, "step": 250 }, { "epoch": 11.0, "eval_loss": 0.3062296211719513, "eval_runtime": 10.8369, "eval_samples_per_second": 922.772, "eval_steps_per_second": 1.2, "step": 275 }, { "epoch": 12.0, "eval_loss": 0.23057429492473602, "eval_runtime": 10.799, "eval_samples_per_second": 926.015, "eval_steps_per_second": 1.204, "step": 300 }, { "epoch": 13.0, "eval_loss": 0.17026692628860474, "eval_runtime": 10.8267, "eval_samples_per_second": 923.643, "eval_steps_per_second": 1.201, "step": 325 }, { "epoch": 14.0, "eval_loss": 0.14094401895999908, "eval_runtime": 10.8171, "eval_samples_per_second": 924.461, "eval_steps_per_second": 1.202, "step": 350 }, { "epoch": 15.0, "eval_loss": 0.11562483012676239, "eval_runtime": 10.8544, "eval_samples_per_second": 921.286, "eval_steps_per_second": 1.198, "step": 375 }, { "epoch": 16.0, "eval_loss": 0.1076672226190567, "eval_runtime": 10.7997, "eval_samples_per_second": 925.947, "eval_steps_per_second": 1.204, "step": 400 }, { "epoch": 17.0, "eval_loss": 0.07891710102558136, "eval_runtime": 10.8355, "eval_samples_per_second": 922.895, "eval_steps_per_second": 1.2, "step": 425 }, { "epoch": 18.0, "eval_loss": 0.07825633883476257, "eval_runtime": 10.9577, "eval_samples_per_second": 912.598, "eval_steps_per_second": 1.186, "step": 450 }, { "epoch": 19.0, "eval_loss": 0.053240709006786346, "eval_runtime": 10.8407, "eval_samples_per_second": 922.45, "eval_steps_per_second": 1.199, "step": 475 }, { "epoch": 20.0, "learning_rate": 4.5e-05, "loss": 0.8626, "step": 500 }, { "epoch": 20.0, "eval_loss": 0.03896724432706833, "eval_runtime": 10.8919, "eval_samples_per_second": 918.117, "eval_steps_per_second": 1.194, "step": 500 }, { "epoch": 21.0, "eval_loss": 0.0326126404106617, "eval_runtime": 10.9808, "eval_samples_per_second": 910.682, "eval_steps_per_second": 1.184, "step": 525 }, { "epoch": 22.0, "eval_loss": 0.026844095438718796, "eval_runtime": 10.8647, "eval_samples_per_second": 920.415, "eval_steps_per_second": 1.197, "step": 550 }, { "epoch": 23.0, "eval_loss": 0.022708676755428314, "eval_runtime": 11.1211, "eval_samples_per_second": 899.191, "eval_steps_per_second": 1.169, "step": 575 }, { "epoch": 24.0, "eval_loss": 0.020555635914206505, "eval_runtime": 10.8169, "eval_samples_per_second": 924.48, "eval_steps_per_second": 1.202, "step": 600 }, { "epoch": 25.0, "eval_loss": 0.016072452068328857, "eval_runtime": 10.8195, "eval_samples_per_second": 924.261, "eval_steps_per_second": 1.202, "step": 625 }, { "epoch": 26.0, "eval_loss": 0.015775442123413086, "eval_runtime": 11.0521, "eval_samples_per_second": 904.809, "eval_steps_per_second": 1.176, "step": 650 }, { "epoch": 27.0, "eval_loss": 0.010050756856799126, "eval_runtime": 10.96, "eval_samples_per_second": 912.407, "eval_steps_per_second": 1.186, "step": 675 }, { "epoch": 28.0, "eval_loss": 0.009800990112125874, "eval_runtime": 10.8085, "eval_samples_per_second": 925.196, "eval_steps_per_second": 1.203, "step": 700 }, { "epoch": 29.0, "eval_loss": 0.0077048842795193195, "eval_runtime": 10.9528, "eval_samples_per_second": 913.005, "eval_steps_per_second": 1.187, "step": 725 }, { "epoch": 30.0, "eval_loss": 0.005685885436832905, "eval_runtime": 10.9631, "eval_samples_per_second": 912.147, "eval_steps_per_second": 1.186, "step": 750 }, { "epoch": 31.0, "eval_loss": 0.006655455566942692, "eval_runtime": 10.8367, "eval_samples_per_second": 922.788, "eval_steps_per_second": 1.2, "step": 775 }, { "epoch": 32.0, "eval_loss": 0.004621443338692188, "eval_runtime": 10.8165, "eval_samples_per_second": 924.51, "eval_steps_per_second": 1.202, "step": 800 }, { "epoch": 33.0, "eval_loss": 0.0033882376737892628, "eval_runtime": 10.9293, "eval_samples_per_second": 914.976, "eval_steps_per_second": 1.189, "step": 825 }, { "epoch": 34.0, "eval_loss": 0.0038037376943975687, "eval_runtime": 10.7973, "eval_samples_per_second": 926.155, "eval_steps_per_second": 1.204, "step": 850 }, { "epoch": 35.0, "eval_loss": 0.003371346276253462, "eval_runtime": 10.834, "eval_samples_per_second": 923.021, "eval_steps_per_second": 1.2, "step": 875 }, { "epoch": 36.0, "eval_loss": 0.0024659824557602406, "eval_runtime": 10.7902, "eval_samples_per_second": 926.766, "eval_steps_per_second": 1.205, "step": 900 }, { "epoch": 37.0, "eval_loss": 0.0022366114426404238, "eval_runtime": 10.8096, "eval_samples_per_second": 925.1, "eval_steps_per_second": 1.203, "step": 925 }, { "epoch": 38.0, "eval_loss": 0.0022026619408279657, "eval_runtime": 10.8109, "eval_samples_per_second": 924.992, "eval_steps_per_second": 1.202, "step": 950 }, { "epoch": 39.0, "eval_loss": 0.0024010157212615013, "eval_runtime": 11.1034, "eval_samples_per_second": 900.623, "eval_steps_per_second": 1.171, "step": 975 }, { "epoch": 40.0, "learning_rate": 4e-05, "loss": 0.0919, "step": 1000 }, { "epoch": 40.0, "eval_loss": 0.0013342766324058175, "eval_runtime": 10.7511, "eval_samples_per_second": 930.139, "eval_steps_per_second": 1.209, "step": 1000 }, { "epoch": 41.0, "eval_loss": 0.0016493805451318622, "eval_runtime": 10.7987, "eval_samples_per_second": 926.034, "eval_steps_per_second": 1.204, "step": 1025 }, { "epoch": 42.0, "eval_loss": 0.001088765449821949, "eval_runtime": 10.8106, "eval_samples_per_second": 925.017, "eval_steps_per_second": 1.203, "step": 1050 }, { "epoch": 43.0, "eval_loss": 0.0009081660537049174, "eval_runtime": 10.7945, "eval_samples_per_second": 926.398, "eval_steps_per_second": 1.204, "step": 1075 }, { "epoch": 44.0, "eval_loss": 0.0007170450408011675, "eval_runtime": 10.9388, "eval_samples_per_second": 914.174, "eval_steps_per_second": 1.188, "step": 1100 }, { "epoch": 45.0, "eval_loss": 0.0006850157515145838, "eval_runtime": 10.8231, "eval_samples_per_second": 923.948, "eval_steps_per_second": 1.201, "step": 1125 }, { "epoch": 46.0, "eval_loss": 0.0007588361040689051, "eval_runtime": 10.9442, "eval_samples_per_second": 913.729, "eval_steps_per_second": 1.188, "step": 1150 }, { "epoch": 47.0, "eval_loss": 0.0007894792361184955, "eval_runtime": 10.9394, "eval_samples_per_second": 914.125, "eval_steps_per_second": 1.188, "step": 1175 }, { "epoch": 48.0, "eval_loss": 0.0004850537225138396, "eval_runtime": 10.8141, "eval_samples_per_second": 924.722, "eval_steps_per_second": 1.202, "step": 1200 }, { "epoch": 49.0, "eval_loss": 0.0003986251540482044, "eval_runtime": 10.7964, "eval_samples_per_second": 926.231, "eval_steps_per_second": 1.204, "step": 1225 }, { "epoch": 50.0, "eval_loss": 0.0005350292194634676, "eval_runtime": 10.9488, "eval_samples_per_second": 913.343, "eval_steps_per_second": 1.187, "step": 1250 }, { "epoch": 51.0, "eval_loss": 0.00030898803379386663, "eval_runtime": 10.8156, "eval_samples_per_second": 924.594, "eval_steps_per_second": 1.202, "step": 1275 } ], "logging_steps": 500, "max_steps": 5000, "num_train_epochs": 200, "save_steps": 500, "total_flos": 2.928514277376e+16, "trial_name": null, "trial_params": null }