inflaton's picture
fine-tuned checkpoints
e1e71f5
raw
history blame
11.8 kB
{"current_steps": 5, "total_steps": 210, "loss": 2.4055, "learning_rate": 2.380952380952381e-05, "epoch": 0.1423487544483986, "percentage": 2.38, "elapsed_time": "0:01:07", "remaining_time": "0:46:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 10, "total_steps": 210, "loss": 2.2398, "learning_rate": 4.761904761904762e-05, "epoch": 0.2846975088967972, "percentage": 4.76, "elapsed_time": "0:02:18", "remaining_time": "0:46:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 15, "total_steps": 210, "loss": 1.9249, "learning_rate": 7.142857142857143e-05, "epoch": 0.42704626334519574, "percentage": 7.14, "elapsed_time": "0:03:24", "remaining_time": "0:44:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 210, "loss": 1.7843, "learning_rate": 9.523809523809524e-05, "epoch": 0.5693950177935944, "percentage": 9.52, "elapsed_time": "0:04:29", "remaining_time": "0:42:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 25, "total_steps": 210, "loss": 1.7326, "learning_rate": 9.988952191691925e-05, "epoch": 0.7117437722419929, "percentage": 11.9, "elapsed_time": "0:05:42", "remaining_time": "0:42:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 210, "loss": 1.6923, "learning_rate": 9.944154131125642e-05, "epoch": 0.8540925266903915, "percentage": 14.29, "elapsed_time": "0:06:49", "remaining_time": "0:40:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 35, "total_steps": 210, "loss": 1.715, "learning_rate": 9.865224352899119e-05, "epoch": 0.99644128113879, "percentage": 16.67, "elapsed_time": "0:08:01", "remaining_time": "0:40:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 35, "total_steps": 210, "eval_loss": 1.582451343536377, "epoch": 0.99644128113879, "percentage": 16.67, "elapsed_time": "0:08:03", "remaining_time": "0:40:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 210, "loss": 1.6396, "learning_rate": 9.752707744739145e-05, "epoch": 1.1387900355871885, "percentage": 19.05, "elapsed_time": "0:09:12", "remaining_time": "0:39:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 45, "total_steps": 210, "loss": 1.6017, "learning_rate": 9.607381059352038e-05, "epoch": 1.281138790035587, "percentage": 21.43, "elapsed_time": "0:10:25", "remaining_time": "0:38:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 210, "loss": 1.5959, "learning_rate": 9.430247552150673e-05, "epoch": 1.4234875444839858, "percentage": 23.81, "elapsed_time": "0:11:35", "remaining_time": "0:37:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 55, "total_steps": 210, "loss": 1.5805, "learning_rate": 9.22253005533154e-05, "epoch": 1.5658362989323842, "percentage": 26.19, "elapsed_time": "0:12:43", "remaining_time": "0:35:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 210, "loss": 1.579, "learning_rate": 8.985662536114613e-05, "epoch": 1.708185053380783, "percentage": 28.57, "elapsed_time": "0:13:49", "remaining_time": "0:34:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 65, "total_steps": 210, "loss": 1.5391, "learning_rate": 8.721280197423258e-05, "epoch": 1.8505338078291815, "percentage": 30.95, "elapsed_time": "0:15:00", "remaining_time": "0:33:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 210, "loss": 1.5599, "learning_rate": 8.43120818934367e-05, "epoch": 1.99288256227758, "percentage": 33.33, "elapsed_time": "0:16:08", "remaining_time": "0:32:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 210, "eval_loss": 1.508989691734314, "epoch": 1.99288256227758, "percentage": 33.33, "elapsed_time": "0:16:10", "remaining_time": "0:32:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 75, "total_steps": 210, "loss": 1.4277, "learning_rate": 8.117449009293668e-05, "epoch": 2.135231316725979, "percentage": 35.71, "elapsed_time": "0:17:18", "remaining_time": "0:31:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 210, "loss": 1.4388, "learning_rate": 7.782168677883206e-05, "epoch": 2.277580071174377, "percentage": 38.1, "elapsed_time": "0:18:26", "remaining_time": "0:29:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 85, "total_steps": 210, "loss": 1.4763, "learning_rate": 7.427681785900761e-05, "epoch": 2.419928825622776, "percentage": 40.48, "elapsed_time": "0:19:34", "remaining_time": "0:28:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 210, "loss": 1.4306, "learning_rate": 7.056435515653059e-05, "epoch": 2.562277580071174, "percentage": 42.86, "elapsed_time": "0:20:42", "remaining_time": "0:27:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 95, "total_steps": 210, "loss": 1.4139, "learning_rate": 6.670992746965938e-05, "epoch": 2.704626334519573, "percentage": 45.24, "elapsed_time": "0:21:52", "remaining_time": "0:26:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 210, "loss": 1.4257, "learning_rate": 6.274014364473274e-05, "epoch": 2.8469750889679717, "percentage": 47.62, "elapsed_time": "0:23:00", "remaining_time": "0:25:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 105, "total_steps": 210, "loss": 1.4363, "learning_rate": 5.868240888334653e-05, "epoch": 2.9893238434163703, "percentage": 50.0, "elapsed_time": "0:24:09", "remaining_time": "0:24:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 105, "total_steps": 210, "eval_loss": 1.5067311525344849, "epoch": 2.9893238434163703, "percentage": 50.0, "elapsed_time": "0:24:11", "remaining_time": "0:24:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 210, "loss": 1.3086, "learning_rate": 5.456473555193242e-05, "epoch": 3.131672597864769, "percentage": 52.38, "elapsed_time": "0:25:18", "remaining_time": "0:23:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 115, "total_steps": 210, "loss": 1.2949, "learning_rate": 5.041554979980486e-05, "epoch": 3.2740213523131674, "percentage": 54.76, "elapsed_time": "0:26:26", "remaining_time": "0:21:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 210, "loss": 1.2955, "learning_rate": 4.626349532067879e-05, "epoch": 3.416370106761566, "percentage": 57.14, "elapsed_time": "0:27:35", "remaining_time": "0:20:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 125, "total_steps": 210, "loss": 1.2972, "learning_rate": 4.213723561238074e-05, "epoch": 3.5587188612099645, "percentage": 59.52, "elapsed_time": "0:28:45", "remaining_time": "0:19:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 210, "loss": 1.2759, "learning_rate": 3.806525609984312e-05, "epoch": 3.701067615658363, "percentage": 61.9, "elapsed_time": "0:29:59", "remaining_time": "0:18:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 135, "total_steps": 210, "loss": 1.282, "learning_rate": 3.4075667487415785e-05, "epoch": 3.8434163701067616, "percentage": 64.29, "elapsed_time": "0:31:07", "remaining_time": "0:17:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 210, "loss": 1.2383, "learning_rate": 3.019601169804216e-05, "epoch": 3.98576512455516, "percentage": 66.67, "elapsed_time": "0:32:13", "remaining_time": "0:16:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 210, "eval_loss": 1.5461146831512451, "epoch": 3.98576512455516, "percentage": 66.67, "elapsed_time": "0:32:15", "remaining_time": "0:16:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 145, "total_steps": 210, "loss": 1.1755, "learning_rate": 2.645307173898901e-05, "epoch": 4.128113879003559, "percentage": 69.05, "elapsed_time": "0:33:23", "remaining_time": "0:14:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 150, "total_steps": 210, "loss": 1.1929, "learning_rate": 2.2872686806712035e-05, "epoch": 4.270462633451958, "percentage": 71.43, "elapsed_time": "0:34:33", "remaining_time": "0:13:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 155, "total_steps": 210, "loss": 1.1538, "learning_rate": 1.947957390727185e-05, "epoch": 4.412811387900356, "percentage": 73.81, "elapsed_time": "0:35:42", "remaining_time": "0:12:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 210, "loss": 1.1815, "learning_rate": 1.629715722373423e-05, "epoch": 4.555160142348754, "percentage": 76.19, "elapsed_time": "0:36:50", "remaining_time": "0:11:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 165, "total_steps": 210, "loss": 1.1403, "learning_rate": 1.3347406408508695e-05, "epoch": 4.697508896797153, "percentage": 78.57, "elapsed_time": "0:37:59", "remaining_time": "0:10:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 170, "total_steps": 210, "loss": 1.1399, "learning_rate": 1.0650684916965559e-05, "epoch": 4.839857651245552, "percentage": 80.95, "elapsed_time": "0:39:04", "remaining_time": "0:09:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 175, "total_steps": 210, "loss": 1.1633, "learning_rate": 8.225609429353187e-06, "epoch": 4.98220640569395, "percentage": 83.33, "elapsed_time": "0:40:18", "remaining_time": "0:08:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 175, "total_steps": 210, "eval_loss": 1.6059898138046265, "epoch": 4.98220640569395, "percentage": 83.33, "elapsed_time": "0:40:20", "remaining_time": "0:08:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 210, "loss": 1.1043, "learning_rate": 6.088921331488568e-06, "epoch": 5.124555160142349, "percentage": 85.71, "elapsed_time": "0:41:29", "remaining_time": "0:06:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 185, "total_steps": 210, "loss": 1.1141, "learning_rate": 4.255371141448272e-06, "epoch": 5.266903914590747, "percentage": 88.1, "elapsed_time": "0:42:38", "remaining_time": "0:05:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 190, "total_steps": 210, "loss": 1.1081, "learning_rate": 2.737616680113758e-06, "epoch": 5.409252669039146, "percentage": 90.48, "elapsed_time": "0:43:44", "remaining_time": "0:04:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 195, "total_steps": 210, "loss": 1.1185, "learning_rate": 1.5461356885461075e-06, "epoch": 5.551601423487544, "percentage": 92.86, "elapsed_time": "0:44:58", "remaining_time": "0:03:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 210, "loss": 1.0964, "learning_rate": 6.891534954310885e-07, "epoch": 5.693950177935943, "percentage": 95.24, "elapsed_time": "0:46:08", "remaining_time": "0:02:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 205, "total_steps": 210, "loss": 1.0836, "learning_rate": 1.725862339392259e-07, "epoch": 5.8362989323843415, "percentage": 97.62, "elapsed_time": "0:47:14", "remaining_time": "0:01:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 210, "loss": 1.1257, "learning_rate": 0.0, "epoch": 5.9786476868327405, "percentage": 100.0, "elapsed_time": "0:48:24", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 210, "eval_loss": 1.6233642101287842, "epoch": 5.9786476868327405, "percentage": 100.0, "elapsed_time": "0:48:27", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 210, "epoch": 5.9786476868327405, "percentage": 100.0, "elapsed_time": "0:48:28", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}