hp_ablations_gemma_bsz512 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 0
ee3a7f4 verified
raw
history blame
8.78 kB
{"current_steps": 10, "total_steps": 1329, "loss": 0.7438, "learning_rate": 5e-06, "epoch": 0.022566995768688293, "percentage": 0.75, "elapsed_time": "0:15:00", "remaining_time": "1 day, 8:59:53"}
{"current_steps": 20, "total_steps": 1329, "loss": 0.6802, "learning_rate": 5e-06, "epoch": 0.045133991537376586, "percentage": 1.5, "elapsed_time": "0:29:59", "remaining_time": "1 day, 8:42:38"}
{"current_steps": 30, "total_steps": 1329, "loss": 0.6629, "learning_rate": 5e-06, "epoch": 0.06770098730606489, "percentage": 2.26, "elapsed_time": "0:44:56", "remaining_time": "1 day, 8:26:06"}
{"current_steps": 40, "total_steps": 1329, "loss": 0.6406, "learning_rate": 5e-06, "epoch": 0.09026798307475317, "percentage": 3.01, "elapsed_time": "0:59:50", "remaining_time": "1 day, 8:08:21"}
{"current_steps": 50, "total_steps": 1329, "loss": 0.634, "learning_rate": 5e-06, "epoch": 0.11283497884344147, "percentage": 3.76, "elapsed_time": "1:14:45", "remaining_time": "1 day, 7:52:25"}
{"current_steps": 60, "total_steps": 1329, "loss": 0.6332, "learning_rate": 5e-06, "epoch": 0.13540197461212977, "percentage": 4.51, "elapsed_time": "1:29:42", "remaining_time": "1 day, 7:37:19"}
{"current_steps": 70, "total_steps": 1329, "loss": 0.629, "learning_rate": 5e-06, "epoch": 0.15796897038081806, "percentage": 5.27, "elapsed_time": "1:44:39", "remaining_time": "1 day, 7:22:15"}
{"current_steps": 80, "total_steps": 1329, "loss": 0.6306, "learning_rate": 5e-06, "epoch": 0.18053596614950634, "percentage": 6.02, "elapsed_time": "1:59:36", "remaining_time": "1 day, 7:07:24"}
{"current_steps": 90, "total_steps": 1329, "loss": 0.6282, "learning_rate": 5e-06, "epoch": 0.20310296191819463, "percentage": 6.77, "elapsed_time": "2:14:34", "remaining_time": "1 day, 6:52:32"}
{"current_steps": 100, "total_steps": 1329, "loss": 0.6272, "learning_rate": 5e-06, "epoch": 0.22566995768688294, "percentage": 7.52, "elapsed_time": "2:29:29", "remaining_time": "1 day, 6:37:20"}
{"current_steps": 110, "total_steps": 1329, "loss": 0.618, "learning_rate": 5e-06, "epoch": 0.24823695345557123, "percentage": 8.28, "elapsed_time": "2:44:27", "remaining_time": "1 day, 6:22:26"}
{"current_steps": 120, "total_steps": 1329, "loss": 0.6173, "learning_rate": 5e-06, "epoch": 0.27080394922425954, "percentage": 9.03, "elapsed_time": "2:59:23", "remaining_time": "1 day, 6:07:18"}
{"current_steps": 130, "total_steps": 1329, "loss": 0.6185, "learning_rate": 5e-06, "epoch": 0.2933709449929478, "percentage": 9.78, "elapsed_time": "3:14:19", "remaining_time": "1 day, 5:52:15"}
{"current_steps": 140, "total_steps": 1329, "loss": 0.6135, "learning_rate": 5e-06, "epoch": 0.3159379407616361, "percentage": 10.53, "elapsed_time": "3:29:16", "remaining_time": "1 day, 5:37:24"}
{"current_steps": 150, "total_steps": 1329, "loss": 0.614, "learning_rate": 5e-06, "epoch": 0.3385049365303244, "percentage": 11.29, "elapsed_time": "3:44:10", "remaining_time": "1 day, 5:22:03"}
{"current_steps": 160, "total_steps": 1329, "loss": 0.6157, "learning_rate": 5e-06, "epoch": 0.3610719322990127, "percentage": 12.04, "elapsed_time": "3:59:07", "remaining_time": "1 day, 5:07:05"}
{"current_steps": 170, "total_steps": 1329, "loss": 0.6118, "learning_rate": 5e-06, "epoch": 0.383638928067701, "percentage": 12.79, "elapsed_time": "4:14:03", "remaining_time": "1 day, 4:52:04"}
{"current_steps": 180, "total_steps": 1329, "loss": 0.6087, "learning_rate": 5e-06, "epoch": 0.40620592383638926, "percentage": 13.54, "elapsed_time": "4:28:59", "remaining_time": "1 day, 4:37:06"}
{"current_steps": 190, "total_steps": 1329, "loss": 0.6032, "learning_rate": 5e-06, "epoch": 0.4287729196050776, "percentage": 14.3, "elapsed_time": "4:43:57", "remaining_time": "1 day, 4:22:13"}
{"current_steps": 200, "total_steps": 1329, "loss": 0.6099, "learning_rate": 5e-06, "epoch": 0.4513399153737659, "percentage": 15.05, "elapsed_time": "4:58:53", "remaining_time": "1 day, 4:07:16"}
{"current_steps": 210, "total_steps": 1329, "loss": 0.6063, "learning_rate": 5e-06, "epoch": 0.47390691114245415, "percentage": 15.8, "elapsed_time": "5:13:48", "remaining_time": "1 day, 3:52:07"}
{"current_steps": 220, "total_steps": 1329, "loss": 0.598, "learning_rate": 5e-06, "epoch": 0.49647390691114246, "percentage": 16.55, "elapsed_time": "5:28:43", "remaining_time": "1 day, 3:37:06"}
{"current_steps": 230, "total_steps": 1329, "loss": 0.6068, "learning_rate": 5e-06, "epoch": 0.5190409026798307, "percentage": 17.31, "elapsed_time": "5:43:39", "remaining_time": "1 day, 3:22:05"}
{"current_steps": 240, "total_steps": 1329, "loss": 0.6057, "learning_rate": 5e-06, "epoch": 0.5416078984485191, "percentage": 18.06, "elapsed_time": "5:58:36", "remaining_time": "1 day, 3:07:12"}
{"current_steps": 250, "total_steps": 1329, "loss": 0.5998, "learning_rate": 5e-06, "epoch": 0.5641748942172073, "percentage": 18.81, "elapsed_time": "6:13:31", "remaining_time": "1 day, 2:52:06"}
{"current_steps": 260, "total_steps": 1329, "loss": 0.6025, "learning_rate": 5e-06, "epoch": 0.5867418899858956, "percentage": 19.56, "elapsed_time": "6:28:27", "remaining_time": "1 day, 2:37:11"}
{"current_steps": 270, "total_steps": 1329, "loss": 0.5979, "learning_rate": 5e-06, "epoch": 0.609308885754584, "percentage": 20.32, "elapsed_time": "6:43:25", "remaining_time": "1 day, 2:22:19"}
{"current_steps": 280, "total_steps": 1329, "loss": 0.6045, "learning_rate": 5e-06, "epoch": 0.6318758815232722, "percentage": 21.07, "elapsed_time": "6:58:23", "remaining_time": "1 day, 2:07:28"}
{"current_steps": 290, "total_steps": 1329, "loss": 0.5979, "learning_rate": 5e-06, "epoch": 0.6544428772919605, "percentage": 21.82, "elapsed_time": "7:13:21", "remaining_time": "1 day, 1:52:36"}
{"current_steps": 300, "total_steps": 1329, "loss": 0.604, "learning_rate": 5e-06, "epoch": 0.6770098730606487, "percentage": 22.57, "elapsed_time": "7:28:18", "remaining_time": "1 day, 1:37:40"}
{"current_steps": 310, "total_steps": 1329, "loss": 0.5972, "learning_rate": 5e-06, "epoch": 0.6995768688293371, "percentage": 23.33, "elapsed_time": "7:43:13", "remaining_time": "1 day, 1:22:39"}
{"current_steps": 320, "total_steps": 1329, "loss": 0.6004, "learning_rate": 5e-06, "epoch": 0.7221438645980254, "percentage": 24.08, "elapsed_time": "7:58:10", "remaining_time": "1 day, 1:07:45"}
{"current_steps": 330, "total_steps": 1329, "loss": 0.596, "learning_rate": 5e-06, "epoch": 0.7447108603667136, "percentage": 24.83, "elapsed_time": "8:13:05", "remaining_time": "1 day, 0:52:44"}
{"current_steps": 340, "total_steps": 1329, "loss": 0.5955, "learning_rate": 5e-06, "epoch": 0.767277856135402, "percentage": 25.58, "elapsed_time": "8:28:03", "remaining_time": "1 day, 0:37:50"}
{"current_steps": 350, "total_steps": 1329, "loss": 0.597, "learning_rate": 5e-06, "epoch": 0.7898448519040903, "percentage": 26.34, "elapsed_time": "8:43:00", "remaining_time": "1 day, 0:22:54"}
{"current_steps": 360, "total_steps": 1329, "loss": 0.5959, "learning_rate": 5e-06, "epoch": 0.8124118476727785, "percentage": 27.09, "elapsed_time": "8:57:57", "remaining_time": "1 day, 0:07:59"}
{"current_steps": 370, "total_steps": 1329, "loss": 0.603, "learning_rate": 5e-06, "epoch": 0.8349788434414669, "percentage": 27.84, "elapsed_time": "9:12:54", "remaining_time": "23:53:04"}
{"current_steps": 380, "total_steps": 1329, "loss": 0.5997, "learning_rate": 5e-06, "epoch": 0.8575458392101551, "percentage": 28.59, "elapsed_time": "9:27:51", "remaining_time": "23:38:07"}
{"current_steps": 390, "total_steps": 1329, "loss": 0.5937, "learning_rate": 5e-06, "epoch": 0.8801128349788434, "percentage": 29.35, "elapsed_time": "9:42:45", "remaining_time": "23:23:06"}
{"current_steps": 400, "total_steps": 1329, "loss": 0.596, "learning_rate": 5e-06, "epoch": 0.9026798307475318, "percentage": 30.1, "elapsed_time": "9:57:42", "remaining_time": "23:08:11"}
{"current_steps": 410, "total_steps": 1329, "loss": 0.594, "learning_rate": 5e-06, "epoch": 0.92524682651622, "percentage": 30.85, "elapsed_time": "10:12:37", "remaining_time": "22:53:11"}
{"current_steps": 420, "total_steps": 1329, "loss": 0.5946, "learning_rate": 5e-06, "epoch": 0.9478138222849083, "percentage": 31.6, "elapsed_time": "10:27:33", "remaining_time": "22:38:13"}
{"current_steps": 430, "total_steps": 1329, "loss": 0.5886, "learning_rate": 5e-06, "epoch": 0.9703808180535967, "percentage": 32.36, "elapsed_time": "10:42:31", "remaining_time": "22:23:19"}
{"current_steps": 440, "total_steps": 1329, "loss": 0.5934, "learning_rate": 5e-06, "epoch": 0.9929478138222849, "percentage": 33.11, "elapsed_time": "10:57:26", "remaining_time": "22:08:20"}
{"current_steps": 443, "total_steps": 1329, "eval_loss": 0.5912777781486511, "epoch": 0.9997179125528914, "percentage": 33.33, "elapsed_time": "11:13:37", "remaining_time": "22:27:15"}