{ "config": { "epochs": 10, "lr": 0.0003, "warmup_steps": 300, "grad_accum_steps": 8, "grad_clip": 1.0 }, "train": [ { "epoch": 1, "avg_loss": 7.070409, "lr": 0.00027, "step": 270, "time_s": 971.0 }, { "epoch": 2, "avg_loss": 6.138723, "lr": 0.0002986039232296601, "step": 540, "time_s": 969.5 }, { "epoch": 3, "avg_loss": 5.511719, "lr": 0.00029373201732051774, "step": 810, "time_s": 970.5 }, { "epoch": 4, "avg_loss": 5.033876, "lr": 0.00028548319801432657, "step": 1080, "time_s": 966.4 }, { "epoch": 5, "avg_loss": 4.711938, "lr": 0.00027406193668993577, "step": 1350, "time_s": 977.1 }, { "epoch": 6, "avg_loss": 4.464801, "lr": 0.00027245653264858043, "step": 1620, "time_s": 973.7 }, { "epoch": 7, "avg_loss": 4.292098, "lr": 0.0002676801152109725, "step": 1890, "time_s": 977.7 }, { "epoch": 8, "avg_loss": 4.150779, "lr": 0.00025985108200750904, "step": 2160, "time_s": 975.7 }, { "epoch": 9, "avg_loss": 3.803133, "lr": 0.0002587006831839486, "step": 2170, "time_s": 34.3 }, { "epoch": 10, "avg_loss": 3.725935, "lr": 0.00025557782178568903, "step": 2177, "time_s": 33.6 }, { "epoch": 11, "avg_loss": 3.637672, "lr": 0.00025043245687112674, "step": 2184, "time_s": 34.3 }, { "epoch": 12, "avg_loss": 3.560756, "lr": 0.00024335262706051453, "step": 2191, "time_s": 34.0 }, { "epoch": 13, "avg_loss": 3.493701, "lr": 0.00023445947020481162, "step": 2198, "time_s": 34.2 } ], "val": [ { "epoch": 1, "avg_loss": 6.459693 }, { "epoch": 2, "avg_loss": 5.804065 }, { "epoch": 3, "avg_loss": 5.179338 }, { "epoch": 4, "avg_loss": 4.774045 }, { "epoch": 5, "avg_loss": 4.511194 }, { "epoch": 6, "avg_loss": 4.305547 }, { "epoch": 7, "avg_loss": 4.154681 }, { "epoch": 8, "avg_loss": 4.037178 }, { "epoch": 9, "avg_loss": 3.911244 }, { "epoch": 10, "avg_loss": 3.914251 }, { "epoch": 11, "avg_loss": 3.909345 }, { "epoch": 12, "avg_loss": 3.932855 }, { "epoch": 13, "avg_loss": 3.951809 } ], "sanity": [] }