{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04081632653061224, "grad_norm": 16.731555938720703, "learning_rate": 9.918367346938776e-06, "loss": 0.2616, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08163265306122448, "grad_norm": 7.408310890197754, "learning_rate": 9.836734693877552e-06, "loss": 0.1553, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12244897959183673, "grad_norm": 5.734114170074463, "learning_rate": 9.755102040816327e-06, "loss": 0.1248, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16326530612244897, "grad_norm": 4.232840061187744, "learning_rate": 9.673469387755103e-06, "loss": 0.1106, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.20408163265306123, "grad_norm": 4.877067565917969, "learning_rate": 9.591836734693878e-06, "loss": 0.1233, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24489795918367346, "grad_norm": 2.0087852478027344, "learning_rate": 9.510204081632653e-06, "loss": 0.0799, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2857142857142857, "grad_norm": 4.425936698913574, "learning_rate": 9.42857142857143e-06, "loss": 0.1015, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32653061224489793, "grad_norm": 1.8348758220672607, "learning_rate": 9.346938775510204e-06, "loss": 0.1094, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3673469387755102, "grad_norm": 5.009132385253906, "learning_rate": 9.26530612244898e-06, "loss": 0.0837, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.40816326530612246, "grad_norm": 6.888875484466553, "learning_rate": 9.183673469387756e-06, "loss": 0.075, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4489795918367347, "grad_norm": 3.038123369216919, "learning_rate": 9.102040816326532e-06, "loss": 0.0709, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4897959183673469, "grad_norm": 2.6441116333007812, "learning_rate": 9.020408163265307e-06, "loss": 0.0706, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5306122448979592, "grad_norm": 3.5273971557617188, "learning_rate": 8.938775510204082e-06, "loss": 0.0597, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5714285714285714, "grad_norm": 1.2711573839187622, "learning_rate": 8.857142857142858e-06, "loss": 0.0499, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6122448979591837, "grad_norm": 2.2461705207824707, "learning_rate": 8.775510204081633e-06, "loss": 0.0636, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6530612244897959, "grad_norm": 2.0061120986938477, "learning_rate": 8.69387755102041e-06, "loss": 0.0708, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6938775510204082, "grad_norm": 1.7886881828308105, "learning_rate": 8.612244897959184e-06, "loss": 0.0753, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7346938775510204, "grad_norm": 1.8707109689712524, "learning_rate": 8.530612244897961e-06, "loss": 0.0612, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7755102040816326, "grad_norm": 2.3418948650360107, "learning_rate": 8.448979591836736e-06, "loss": 0.0586, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8163265306122449, "grad_norm": 0.7759063839912415, "learning_rate": 8.36734693877551e-06, "loss": 0.063, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8571428571428571, "grad_norm": 0.9075489044189453, "learning_rate": 8.285714285714287e-06, "loss": 0.063, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8979591836734694, "grad_norm": 1.0522433519363403, "learning_rate": 8.204081632653062e-06, "loss": 0.0605, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9387755102040817, "grad_norm": 0.7449356913566589, "learning_rate": 8.122448979591837e-06, "loss": 0.0564, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9795918367346939, "grad_norm": 0.878478467464447, "learning_rate": 8.040816326530613e-06, "loss": 0.0496, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0204081632653061, "grad_norm": 1.1043357849121094, "learning_rate": 7.959183673469388e-06, "loss": 0.0593, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0612244897959184, "grad_norm": 1.5091803073883057, "learning_rate": 7.877551020408164e-06, "loss": 0.0542, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1020408163265305, "grad_norm": 0.7768255472183228, "learning_rate": 7.79591836734694e-06, "loss": 0.0636, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1428571428571428, "grad_norm": 1.0384677648544312, "learning_rate": 7.714285714285716e-06, "loss": 0.0585, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.183673469387755, "grad_norm": 1.9693238735198975, "learning_rate": 7.63265306122449e-06, "loss": 0.0512, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2244897959183674, "grad_norm": 1.0379549264907837, "learning_rate": 7.551020408163265e-06, "loss": 0.0647, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2653061224489797, "grad_norm": 1.01348078250885, "learning_rate": 7.469387755102041e-06, "loss": 0.0668, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.306122448979592, "grad_norm": 0.9114493727684021, "learning_rate": 7.387755102040817e-06, "loss": 0.0509, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.346938775510204, "grad_norm": 0.5102170705795288, "learning_rate": 7.306122448979592e-06, "loss": 0.0612, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3877551020408163, "grad_norm": 1.097835659980774, "learning_rate": 7.224489795918368e-06, "loss": 0.055, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4285714285714286, "grad_norm": 0.5733208656311035, "learning_rate": 7.1428571428571436e-06, "loss": 0.05, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.469387755102041, "grad_norm": 2.428065538406372, "learning_rate": 7.061224489795919e-06, "loss": 0.0458, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.510204081632653, "grad_norm": 0.9827563166618347, "learning_rate": 6.979591836734695e-06, "loss": 0.0588, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5510204081632653, "grad_norm": 0.5563956499099731, "learning_rate": 6.8979591836734705e-06, "loss": 0.0473, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5918367346938775, "grad_norm": 0.9855480194091797, "learning_rate": 6.816326530612245e-06, "loss": 0.0653, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6326530612244898, "grad_norm": 0.7717217803001404, "learning_rate": 6.734693877551021e-06, "loss": 0.0618, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6734693877551021, "grad_norm": 0.5391837358474731, "learning_rate": 6.653061224489797e-06, "loss": 0.061, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7142857142857144, "grad_norm": 1.2298884391784668, "learning_rate": 6.571428571428572e-06, "loss": 0.0534, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7551020408163265, "grad_norm": 0.22150574624538422, "learning_rate": 6.489795918367348e-06, "loss": 0.0681, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7959183673469388, "grad_norm": 0.8992190957069397, "learning_rate": 6.408163265306124e-06, "loss": 0.0606, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.836734693877551, "grad_norm": 0.9748144149780273, "learning_rate": 6.326530612244899e-06, "loss": 0.0436, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8775510204081631, "grad_norm": 0.3272276520729065, "learning_rate": 6.244897959183675e-06, "loss": 0.0545, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9183673469387754, "grad_norm": 0.7515161633491516, "learning_rate": 6.163265306122449e-06, "loss": 0.0427, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9591836734693877, "grad_norm": 0.35405218601226807, "learning_rate": 6.0816326530612245e-06, "loss": 0.0501, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0, "grad_norm": 0.9796536564826965, "learning_rate": 6e-06, "loss": 0.0641, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0408163265306123, "grad_norm": 0.639869213104248, "learning_rate": 5.918367346938776e-06, "loss": 0.0592, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0816326530612246, "grad_norm": 0.8377614617347717, "learning_rate": 5.8367346938775515e-06, "loss": 0.0497, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 2.122448979591837, "grad_norm": 1.07124662399292, "learning_rate": 5.755102040816327e-06, "loss": 0.047, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 2.163265306122449, "grad_norm": 1.0354053974151611, "learning_rate": 5.673469387755103e-06, "loss": 0.0636, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 2.204081632653061, "grad_norm": 0.5396034717559814, "learning_rate": 5.591836734693878e-06, "loss": 0.0602, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2448979591836733, "grad_norm": 1.1461529731750488, "learning_rate": 5.510204081632653e-06, "loss": 0.0629, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2857142857142856, "grad_norm": 1.732596516609192, "learning_rate": 5.428571428571429e-06, "loss": 0.0575, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 2.326530612244898, "grad_norm": 0.6468518376350403, "learning_rate": 5.3469387755102045e-06, "loss": 0.0581, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 2.36734693877551, "grad_norm": 1.7525995969772339, "learning_rate": 5.26530612244898e-06, "loss": 0.0678, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4081632653061225, "grad_norm": 0.5057170391082764, "learning_rate": 5.183673469387756e-06, "loss": 0.0533, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4489795918367347, "grad_norm": 1.2042871713638306, "learning_rate": 5.1020408163265315e-06, "loss": 0.0444, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 2.489795918367347, "grad_norm": 0.20784775912761688, "learning_rate": 5.020408163265307e-06, "loss": 0.0413, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5306122448979593, "grad_norm": 5.924367427825928, "learning_rate": 4.938775510204082e-06, "loss": 0.0424, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 2.571428571428571, "grad_norm": 0.40434470772743225, "learning_rate": 4.857142857142858e-06, "loss": 0.0815, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 2.612244897959184, "grad_norm": 0.7878668904304504, "learning_rate": 4.775510204081633e-06, "loss": 0.0495, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6530612244897958, "grad_norm": 0.5937454700469971, "learning_rate": 4.693877551020409e-06, "loss": 0.0471, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 2.693877551020408, "grad_norm": 0.9869357347488403, "learning_rate": 4.612244897959184e-06, "loss": 0.0472, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7346938775510203, "grad_norm": 0.7606731653213501, "learning_rate": 4.530612244897959e-06, "loss": 0.0864, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7755102040816326, "grad_norm": 1.5359723567962646, "learning_rate": 4.448979591836735e-06, "loss": 0.0482, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 2.816326530612245, "grad_norm": 0.370749294757843, "learning_rate": 4.367346938775511e-06, "loss": 0.0447, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 2.857142857142857, "grad_norm": 0.633791983127594, "learning_rate": 4.2857142857142855e-06, "loss": 0.044, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8979591836734695, "grad_norm": 1.1335562467575073, "learning_rate": 4.204081632653061e-06, "loss": 0.0552, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 2.938775510204082, "grad_norm": 1.5698113441467285, "learning_rate": 4.122448979591837e-06, "loss": 0.0576, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 2.979591836734694, "grad_norm": 0.20579344034194946, "learning_rate": 4.040816326530612e-06, "loss": 0.0336, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 3.020408163265306, "grad_norm": 1.3172132968902588, "learning_rate": 3.959183673469388e-06, "loss": 0.0534, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 3.061224489795918, "grad_norm": 1.5193818807601929, "learning_rate": 3.877551020408164e-06, "loss": 0.0605, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 3.1020408163265305, "grad_norm": 0.7448021769523621, "learning_rate": 3.795918367346939e-06, "loss": 0.0524, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 3.142857142857143, "grad_norm": 0.6830514073371887, "learning_rate": 3.7142857142857146e-06, "loss": 0.0687, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 3.183673469387755, "grad_norm": 0.4188525378704071, "learning_rate": 3.6326530612244903e-06, "loss": 0.0646, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2244897959183674, "grad_norm": 0.562319278717041, "learning_rate": 3.5510204081632655e-06, "loss": 0.0523, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2653061224489797, "grad_norm": 0.383428156375885, "learning_rate": 3.469387755102041e-06, "loss": 0.0425, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 3.306122448979592, "grad_norm": 0.6743464469909668, "learning_rate": 3.3877551020408168e-06, "loss": 0.0546, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 3.3469387755102042, "grad_norm": 0.7104524970054626, "learning_rate": 3.3061224489795924e-06, "loss": 0.0448, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 3.387755102040816, "grad_norm": 0.5083824396133423, "learning_rate": 3.2244897959183672e-06, "loss": 0.048, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4285714285714284, "grad_norm": 0.49159926176071167, "learning_rate": 3.142857142857143e-06, "loss": 0.0521, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4693877551020407, "grad_norm": 0.635971188545227, "learning_rate": 3.0612244897959185e-06, "loss": 0.081, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 3.510204081632653, "grad_norm": 5.446665287017822, "learning_rate": 2.979591836734694e-06, "loss": 0.0449, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5510204081632653, "grad_norm": 0.3760354816913605, "learning_rate": 2.8979591836734694e-06, "loss": 0.064, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5918367346938775, "grad_norm": 1.0110430717468262, "learning_rate": 2.816326530612245e-06, "loss": 0.0433, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 3.63265306122449, "grad_norm": 0.1704891324043274, "learning_rate": 2.7346938775510207e-06, "loss": 0.0282, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 3.673469387755102, "grad_norm": 0.8289852142333984, "learning_rate": 2.6530612244897964e-06, "loss": 0.0518, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7142857142857144, "grad_norm": 0.812251091003418, "learning_rate": 2.571428571428571e-06, "loss": 0.0598, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7551020408163263, "grad_norm": 0.14789150655269623, "learning_rate": 2.489795918367347e-06, "loss": 0.0542, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 3.795918367346939, "grad_norm": 0.8937461972236633, "learning_rate": 2.4081632653061225e-06, "loss": 0.0499, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 3.836734693877551, "grad_norm": 6.832267761230469, "learning_rate": 2.326530612244898e-06, "loss": 0.0489, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 3.877551020408163, "grad_norm": 0.7444981336593628, "learning_rate": 2.244897959183674e-06, "loss": 0.0591, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9183673469387754, "grad_norm": 0.38545939326286316, "learning_rate": 2.1632653061224495e-06, "loss": 0.0499, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9591836734693877, "grad_norm": 1.0209099054336548, "learning_rate": 2.0816326530612247e-06, "loss": 0.0736, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 4.0, "grad_norm": 0.6384561657905579, "learning_rate": 2.0000000000000003e-06, "loss": 0.0705, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 4.040816326530612, "grad_norm": 0.6284443736076355, "learning_rate": 1.9183673469387756e-06, "loss": 0.0483, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 4.081632653061225, "grad_norm": 1.0421397686004639, "learning_rate": 1.8367346938775512e-06, "loss": 0.0627, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 4.122448979591836, "grad_norm": 0.6968585252761841, "learning_rate": 1.7551020408163267e-06, "loss": 0.0632, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1010, "total_memory_available (GB)": 94.62 }, { "epoch": 4.163265306122449, "grad_norm": 0.35346081852912903, "learning_rate": 1.6734693877551023e-06, "loss": 0.0402, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1020, "total_memory_available (GB)": 94.62 }, { "epoch": 4.204081632653061, "grad_norm": 0.8111013174057007, "learning_rate": 1.5918367346938775e-06, "loss": 0.0528, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1030, "total_memory_available (GB)": 94.62 }, { "epoch": 4.244897959183674, "grad_norm": 0.5146018862724304, "learning_rate": 1.5102040816326532e-06, "loss": 0.0396, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1040, "total_memory_available (GB)": 94.62 }, { "epoch": 4.285714285714286, "grad_norm": 0.6711630821228027, "learning_rate": 1.4285714285714286e-06, "loss": 0.0493, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1050, "total_memory_available (GB)": 94.62 }, { "epoch": 4.326530612244898, "grad_norm": 0.3221682012081146, "learning_rate": 1.3469387755102043e-06, "loss": 0.0454, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1060, "total_memory_available (GB)": 94.62 }, { "epoch": 4.36734693877551, "grad_norm": 0.9966460466384888, "learning_rate": 1.2653061224489795e-06, "loss": 0.0485, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1070, "total_memory_available (GB)": 94.62 }, { "epoch": 4.408163265306122, "grad_norm": 0.45073866844177246, "learning_rate": 1.1836734693877552e-06, "loss": 0.052, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1080, "total_memory_available (GB)": 94.62 }, { "epoch": 4.448979591836735, "grad_norm": 0.7129877805709839, "learning_rate": 1.1020408163265308e-06, "loss": 0.0511, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1090, "total_memory_available (GB)": 94.62 }, { "epoch": 4.489795918367347, "grad_norm": 0.333344966173172, "learning_rate": 1.0204081632653063e-06, "loss": 0.0352, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1100, "total_memory_available (GB)": 94.62 }, { "epoch": 4.530612244897959, "grad_norm": 0.11522499471902847, "learning_rate": 9.387755102040817e-07, "loss": 0.0457, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1110, "total_memory_available (GB)": 94.62 }, { "epoch": 4.571428571428571, "grad_norm": 0.3113747239112854, "learning_rate": 8.571428571428572e-07, "loss": 0.0402, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1120, "total_memory_available (GB)": 94.62 }, { "epoch": 4.612244897959184, "grad_norm": 0.8939142823219299, "learning_rate": 7.755102040816327e-07, "loss": 0.0536, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1130, "total_memory_available (GB)": 94.62 }, { "epoch": 4.653061224489796, "grad_norm": 0.7175144553184509, "learning_rate": 6.938775510204082e-07, "loss": 0.0724, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1140, "total_memory_available (GB)": 94.62 }, { "epoch": 4.6938775510204085, "grad_norm": 0.6586973071098328, "learning_rate": 6.122448979591837e-07, "loss": 0.0514, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1150, "total_memory_available (GB)": 94.62 }, { "epoch": 4.73469387755102, "grad_norm": 1.190521478652954, "learning_rate": 5.306122448979592e-07, "loss": 0.0608, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1160, "total_memory_available (GB)": 94.62 }, { "epoch": 4.775510204081632, "grad_norm": 0.47001194953918457, "learning_rate": 4.489795918367347e-07, "loss": 0.0548, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1170, "total_memory_available (GB)": 94.62 }, { "epoch": 4.816326530612245, "grad_norm": 0.9076634049415588, "learning_rate": 3.6734693877551025e-07, "loss": 0.0438, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1180, "total_memory_available (GB)": 94.62 }, { "epoch": 4.857142857142857, "grad_norm": 1.4096022844314575, "learning_rate": 2.8571428571428575e-07, "loss": 0.0542, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1190, "total_memory_available (GB)": 94.62 }, { "epoch": 4.8979591836734695, "grad_norm": 0.7559374570846558, "learning_rate": 2.0408163265306121e-07, "loss": 0.0715, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1200, "total_memory_available (GB)": 94.62 }, { "epoch": 4.938775510204081, "grad_norm": 0.6551031470298767, "learning_rate": 1.2244897959183673e-07, "loss": 0.0647, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1210, "total_memory_available (GB)": 94.62 }, { "epoch": 4.979591836734694, "grad_norm": 0.9617031812667847, "learning_rate": 4.0816326530612253e-08, "loss": 0.0418, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1220, "total_memory_available (GB)": 94.62 }, { "epoch": 5.0, "max_memory_allocated (GB)": 60.52, "memory_allocated (GB)": 50.57, "step": 1225, "total_flos": 3.0598946525952e+16, "total_memory_available (GB)": 94.62, "train_loss": 0.06082177159737567, "train_runtime": 1079.1005, "train_samples_per_second": 52.128, "train_steps_per_second": 1.304 } ], "logging_steps": 10, "max_steps": 1225, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.0598946525952e+16, "train_batch_size": 40, "trial_name": null, "trial_params": null }