|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 1225, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 16.731555938720703, |
|
"learning_rate": 9.918367346938776e-06, |
|
"loss": 0.2616, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 7.408310890197754, |
|
"learning_rate": 9.836734693877552e-06, |
|
"loss": 0.1553, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 5.734114170074463, |
|
"learning_rate": 9.755102040816327e-06, |
|
"loss": 0.1248, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 4.232840061187744, |
|
"learning_rate": 9.673469387755103e-06, |
|
"loss": 0.1106, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 4.877067565917969, |
|
"learning_rate": 9.591836734693878e-06, |
|
"loss": 0.1233, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 2.0087852478027344, |
|
"learning_rate": 9.510204081632653e-06, |
|
"loss": 0.0799, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 4.425936698913574, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.1015, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 1.8348758220672607, |
|
"learning_rate": 9.346938775510204e-06, |
|
"loss": 0.1094, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3673469387755102, |
|
"grad_norm": 5.009132385253906, |
|
"learning_rate": 9.26530612244898e-06, |
|
"loss": 0.0837, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 6.888875484466553, |
|
"learning_rate": 9.183673469387756e-06, |
|
"loss": 0.075, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4489795918367347, |
|
"grad_norm": 3.038123369216919, |
|
"learning_rate": 9.102040816326532e-06, |
|
"loss": 0.0709, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 2.6441116333007812, |
|
"learning_rate": 9.020408163265307e-06, |
|
"loss": 0.0706, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5306122448979592, |
|
"grad_norm": 3.5273971557617188, |
|
"learning_rate": 8.938775510204082e-06, |
|
"loss": 0.0597, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.2711573839187622, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.0499, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 2.2461705207824707, |
|
"learning_rate": 8.775510204081633e-06, |
|
"loss": 0.0636, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 2.0061120986938477, |
|
"learning_rate": 8.69387755102041e-06, |
|
"loss": 0.0708, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6938775510204082, |
|
"grad_norm": 1.7886881828308105, |
|
"learning_rate": 8.612244897959184e-06, |
|
"loss": 0.0753, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 1.8707109689712524, |
|
"learning_rate": 8.530612244897961e-06, |
|
"loss": 0.0612, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7755102040816326, |
|
"grad_norm": 2.3418948650360107, |
|
"learning_rate": 8.448979591836736e-06, |
|
"loss": 0.0586, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.7759063839912415, |
|
"learning_rate": 8.36734693877551e-06, |
|
"loss": 0.063, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.9075489044189453, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.063, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 1.0522433519363403, |
|
"learning_rate": 8.204081632653062e-06, |
|
"loss": 0.0605, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9387755102040817, |
|
"grad_norm": 0.7449356913566589, |
|
"learning_rate": 8.122448979591837e-06, |
|
"loss": 0.0564, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.878478467464447, |
|
"learning_rate": 8.040816326530613e-06, |
|
"loss": 0.0496, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 1.1043357849121094, |
|
"learning_rate": 7.959183673469388e-06, |
|
"loss": 0.0593, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0612244897959184, |
|
"grad_norm": 1.5091803073883057, |
|
"learning_rate": 7.877551020408164e-06, |
|
"loss": 0.0542, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1020408163265305, |
|
"grad_norm": 0.7768255472183228, |
|
"learning_rate": 7.79591836734694e-06, |
|
"loss": 0.0636, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 1.0384677648544312, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.0585, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.183673469387755, |
|
"grad_norm": 1.9693238735198975, |
|
"learning_rate": 7.63265306122449e-06, |
|
"loss": 0.0512, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 1.0379549264907837, |
|
"learning_rate": 7.551020408163265e-06, |
|
"loss": 0.0647, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2653061224489797, |
|
"grad_norm": 1.01348078250885, |
|
"learning_rate": 7.469387755102041e-06, |
|
"loss": 0.0668, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.306122448979592, |
|
"grad_norm": 0.9114493727684021, |
|
"learning_rate": 7.387755102040817e-06, |
|
"loss": 0.0509, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.346938775510204, |
|
"grad_norm": 0.5102170705795288, |
|
"learning_rate": 7.306122448979592e-06, |
|
"loss": 0.0612, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.3877551020408163, |
|
"grad_norm": 1.097835659980774, |
|
"learning_rate": 7.224489795918368e-06, |
|
"loss": 0.055, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.5733208656311035, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.05, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.469387755102041, |
|
"grad_norm": 2.428065538406372, |
|
"learning_rate": 7.061224489795919e-06, |
|
"loss": 0.0458, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.510204081632653, |
|
"grad_norm": 0.9827563166618347, |
|
"learning_rate": 6.979591836734695e-06, |
|
"loss": 0.0588, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5510204081632653, |
|
"grad_norm": 0.5563956499099731, |
|
"learning_rate": 6.8979591836734705e-06, |
|
"loss": 0.0473, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5918367346938775, |
|
"grad_norm": 0.9855480194091797, |
|
"learning_rate": 6.816326530612245e-06, |
|
"loss": 0.0653, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 0.7717217803001404, |
|
"learning_rate": 6.734693877551021e-06, |
|
"loss": 0.0618, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6734693877551021, |
|
"grad_norm": 0.5391837358474731, |
|
"learning_rate": 6.653061224489797e-06, |
|
"loss": 0.061, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 1.2298884391784668, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.0534, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7551020408163265, |
|
"grad_norm": 0.22150574624538422, |
|
"learning_rate": 6.489795918367348e-06, |
|
"loss": 0.0681, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7959183673469388, |
|
"grad_norm": 0.8992190957069397, |
|
"learning_rate": 6.408163265306124e-06, |
|
"loss": 0.0606, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.836734693877551, |
|
"grad_norm": 0.9748144149780273, |
|
"learning_rate": 6.326530612244899e-06, |
|
"loss": 0.0436, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.8775510204081631, |
|
"grad_norm": 0.3272276520729065, |
|
"learning_rate": 6.244897959183675e-06, |
|
"loss": 0.0545, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9183673469387754, |
|
"grad_norm": 0.7515161633491516, |
|
"learning_rate": 6.163265306122449e-06, |
|
"loss": 0.0427, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9591836734693877, |
|
"grad_norm": 0.35405218601226807, |
|
"learning_rate": 6.0816326530612245e-06, |
|
"loss": 0.0501, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9796536564826965, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0641, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 0.639869213104248, |
|
"learning_rate": 5.918367346938776e-06, |
|
"loss": 0.0592, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0816326530612246, |
|
"grad_norm": 0.8377614617347717, |
|
"learning_rate": 5.8367346938775515e-06, |
|
"loss": 0.0497, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.122448979591837, |
|
"grad_norm": 1.07124662399292, |
|
"learning_rate": 5.755102040816327e-06, |
|
"loss": 0.047, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.163265306122449, |
|
"grad_norm": 1.0354053974151611, |
|
"learning_rate": 5.673469387755103e-06, |
|
"loss": 0.0636, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.204081632653061, |
|
"grad_norm": 0.5396034717559814, |
|
"learning_rate": 5.591836734693878e-06, |
|
"loss": 0.0602, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2448979591836733, |
|
"grad_norm": 1.1461529731750488, |
|
"learning_rate": 5.510204081632653e-06, |
|
"loss": 0.0629, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 1.732596516609192, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.0575, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.326530612244898, |
|
"grad_norm": 0.6468518376350403, |
|
"learning_rate": 5.3469387755102045e-06, |
|
"loss": 0.0581, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.36734693877551, |
|
"grad_norm": 1.7525995969772339, |
|
"learning_rate": 5.26530612244898e-06, |
|
"loss": 0.0678, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4081632653061225, |
|
"grad_norm": 0.5057170391082764, |
|
"learning_rate": 5.183673469387756e-06, |
|
"loss": 0.0533, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4489795918367347, |
|
"grad_norm": 1.2042871713638306, |
|
"learning_rate": 5.1020408163265315e-06, |
|
"loss": 0.0444, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.489795918367347, |
|
"grad_norm": 0.20784775912761688, |
|
"learning_rate": 5.020408163265307e-06, |
|
"loss": 0.0413, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.5306122448979593, |
|
"grad_norm": 5.924367427825928, |
|
"learning_rate": 4.938775510204082e-06, |
|
"loss": 0.0424, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.40434470772743225, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.0815, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.612244897959184, |
|
"grad_norm": 0.7878668904304504, |
|
"learning_rate": 4.775510204081633e-06, |
|
"loss": 0.0495, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.6530612244897958, |
|
"grad_norm": 0.5937454700469971, |
|
"learning_rate": 4.693877551020409e-06, |
|
"loss": 0.0471, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.693877551020408, |
|
"grad_norm": 0.9869357347488403, |
|
"learning_rate": 4.612244897959184e-06, |
|
"loss": 0.0472, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7346938775510203, |
|
"grad_norm": 0.7606731653213501, |
|
"learning_rate": 4.530612244897959e-06, |
|
"loss": 0.0864, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7755102040816326, |
|
"grad_norm": 1.5359723567962646, |
|
"learning_rate": 4.448979591836735e-06, |
|
"loss": 0.0482, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.816326530612245, |
|
"grad_norm": 0.370749294757843, |
|
"learning_rate": 4.367346938775511e-06, |
|
"loss": 0.0447, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.633791983127594, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.044, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.8979591836734695, |
|
"grad_norm": 1.1335562467575073, |
|
"learning_rate": 4.204081632653061e-06, |
|
"loss": 0.0552, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.938775510204082, |
|
"grad_norm": 1.5698113441467285, |
|
"learning_rate": 4.122448979591837e-06, |
|
"loss": 0.0576, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.979591836734694, |
|
"grad_norm": 0.20579344034194946, |
|
"learning_rate": 4.040816326530612e-06, |
|
"loss": 0.0336, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.020408163265306, |
|
"grad_norm": 1.3172132968902588, |
|
"learning_rate": 3.959183673469388e-06, |
|
"loss": 0.0534, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.061224489795918, |
|
"grad_norm": 1.5193818807601929, |
|
"learning_rate": 3.877551020408164e-06, |
|
"loss": 0.0605, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.1020408163265305, |
|
"grad_norm": 0.7448021769523621, |
|
"learning_rate": 3.795918367346939e-06, |
|
"loss": 0.0524, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 0.6830514073371887, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.0687, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.183673469387755, |
|
"grad_norm": 0.4188525378704071, |
|
"learning_rate": 3.6326530612244903e-06, |
|
"loss": 0.0646, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2244897959183674, |
|
"grad_norm": 0.562319278717041, |
|
"learning_rate": 3.5510204081632655e-06, |
|
"loss": 0.0523, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2653061224489797, |
|
"grad_norm": 0.383428156375885, |
|
"learning_rate": 3.469387755102041e-06, |
|
"loss": 0.0425, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.306122448979592, |
|
"grad_norm": 0.6743464469909668, |
|
"learning_rate": 3.3877551020408168e-06, |
|
"loss": 0.0546, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.3469387755102042, |
|
"grad_norm": 0.7104524970054626, |
|
"learning_rate": 3.3061224489795924e-06, |
|
"loss": 0.0448, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.387755102040816, |
|
"grad_norm": 0.5083824396133423, |
|
"learning_rate": 3.2244897959183672e-06, |
|
"loss": 0.048, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 0.49159926176071167, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.0521, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4693877551020407, |
|
"grad_norm": 0.635971188545227, |
|
"learning_rate": 3.0612244897959185e-06, |
|
"loss": 0.081, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.510204081632653, |
|
"grad_norm": 5.446665287017822, |
|
"learning_rate": 2.979591836734694e-06, |
|
"loss": 0.0449, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5510204081632653, |
|
"grad_norm": 0.3760354816913605, |
|
"learning_rate": 2.8979591836734694e-06, |
|
"loss": 0.064, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5918367346938775, |
|
"grad_norm": 1.0110430717468262, |
|
"learning_rate": 2.816326530612245e-06, |
|
"loss": 0.0433, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.63265306122449, |
|
"grad_norm": 0.1704891324043274, |
|
"learning_rate": 2.7346938775510207e-06, |
|
"loss": 0.0282, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.673469387755102, |
|
"grad_norm": 0.8289852142333984, |
|
"learning_rate": 2.6530612244897964e-06, |
|
"loss": 0.0518, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 0.812251091003418, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.0598, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7551020408163263, |
|
"grad_norm": 0.14789150655269623, |
|
"learning_rate": 2.489795918367347e-06, |
|
"loss": 0.0542, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.795918367346939, |
|
"grad_norm": 0.8937461972236633, |
|
"learning_rate": 2.4081632653061225e-06, |
|
"loss": 0.0499, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.836734693877551, |
|
"grad_norm": 6.832267761230469, |
|
"learning_rate": 2.326530612244898e-06, |
|
"loss": 0.0489, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.877551020408163, |
|
"grad_norm": 0.7444981336593628, |
|
"learning_rate": 2.244897959183674e-06, |
|
"loss": 0.0591, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9183673469387754, |
|
"grad_norm": 0.38545939326286316, |
|
"learning_rate": 2.1632653061224495e-06, |
|
"loss": 0.0499, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9591836734693877, |
|
"grad_norm": 1.0209099054336548, |
|
"learning_rate": 2.0816326530612247e-06, |
|
"loss": 0.0736, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.6384561657905579, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0705, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.040816326530612, |
|
"grad_norm": 0.6284443736076355, |
|
"learning_rate": 1.9183673469387756e-06, |
|
"loss": 0.0483, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 1.0421397686004639, |
|
"learning_rate": 1.8367346938775512e-06, |
|
"loss": 0.0627, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.122448979591836, |
|
"grad_norm": 0.6968585252761841, |
|
"learning_rate": 1.7551020408163267e-06, |
|
"loss": 0.0632, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1010, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.163265306122449, |
|
"grad_norm": 0.35346081852912903, |
|
"learning_rate": 1.6734693877551023e-06, |
|
"loss": 0.0402, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1020, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.204081632653061, |
|
"grad_norm": 0.8111013174057007, |
|
"learning_rate": 1.5918367346938775e-06, |
|
"loss": 0.0528, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1030, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.244897959183674, |
|
"grad_norm": 0.5146018862724304, |
|
"learning_rate": 1.5102040816326532e-06, |
|
"loss": 0.0396, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1040, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.6711630821228027, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.0493, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1050, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.326530612244898, |
|
"grad_norm": 0.3221682012081146, |
|
"learning_rate": 1.3469387755102043e-06, |
|
"loss": 0.0454, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1060, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.36734693877551, |
|
"grad_norm": 0.9966460466384888, |
|
"learning_rate": 1.2653061224489795e-06, |
|
"loss": 0.0485, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1070, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.408163265306122, |
|
"grad_norm": 0.45073866844177246, |
|
"learning_rate": 1.1836734693877552e-06, |
|
"loss": 0.052, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1080, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.448979591836735, |
|
"grad_norm": 0.7129877805709839, |
|
"learning_rate": 1.1020408163265308e-06, |
|
"loss": 0.0511, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1090, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.489795918367347, |
|
"grad_norm": 0.333344966173172, |
|
"learning_rate": 1.0204081632653063e-06, |
|
"loss": 0.0352, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.530612244897959, |
|
"grad_norm": 0.11522499471902847, |
|
"learning_rate": 9.387755102040817e-07, |
|
"loss": 0.0457, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.3113747239112854, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.0402, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.612244897959184, |
|
"grad_norm": 0.8939142823219299, |
|
"learning_rate": 7.755102040816327e-07, |
|
"loss": 0.0536, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.653061224489796, |
|
"grad_norm": 0.7175144553184509, |
|
"learning_rate": 6.938775510204082e-07, |
|
"loss": 0.0724, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.6938775510204085, |
|
"grad_norm": 0.6586973071098328, |
|
"learning_rate": 6.122448979591837e-07, |
|
"loss": 0.0514, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.73469387755102, |
|
"grad_norm": 1.190521478652954, |
|
"learning_rate": 5.306122448979592e-07, |
|
"loss": 0.0608, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.775510204081632, |
|
"grad_norm": 0.47001194953918457, |
|
"learning_rate": 4.489795918367347e-07, |
|
"loss": 0.0548, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.816326530612245, |
|
"grad_norm": 0.9076634049415588, |
|
"learning_rate": 3.6734693877551025e-07, |
|
"loss": 0.0438, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 1.4096022844314575, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.0542, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.8979591836734695, |
|
"grad_norm": 0.7559374570846558, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"loss": 0.0715, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.938775510204081, |
|
"grad_norm": 0.6551031470298767, |
|
"learning_rate": 1.2244897959183673e-07, |
|
"loss": 0.0647, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.979591836734694, |
|
"grad_norm": 0.9617031812667847, |
|
"learning_rate": 4.0816326530612253e-08, |
|
"loss": 0.0418, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1225, |
|
"total_flos": 3.0598946525952e+16, |
|
"total_memory_available (GB)": 94.62, |
|
"train_loss": 0.06082177159737567, |
|
"train_runtime": 1079.1005, |
|
"train_samples_per_second": 52.128, |
|
"train_steps_per_second": 1.304 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1225, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0598946525952e+16, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|