File size: 9,033 Bytes
671c258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
{"current_steps": 10, "total_steps": 366, "loss": 1.8972, "learning_rate": 2.702702702702703e-05, "epoch": 0.08154943934760449, "percentage": 2.73, "elapsed_time": "0:00:17", "remaining_time": "0:10:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 366, "loss": 1.7883, "learning_rate": 5.405405405405406e-05, "epoch": 0.16309887869520898, "percentage": 5.46, "elapsed_time": "0:00:34", "remaining_time": "0:09:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 366, "loss": 1.3865, "learning_rate": 8.108108108108109e-05, "epoch": 0.24464831804281345, "percentage": 8.2, "elapsed_time": "0:00:51", "remaining_time": "0:09:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 366, "loss": 1.3647, "learning_rate": 9.999088210158001e-05, "epoch": 0.32619775739041795, "percentage": 10.93, "elapsed_time": "0:01:08", "remaining_time": "0:09:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 366, "loss": 1.2114, "learning_rate": 9.967210469256656e-05, "epoch": 0.4077471967380224, "percentage": 13.66, "elapsed_time": "0:01:25", "remaining_time": "0:08:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 366, "loss": 1.2255, "learning_rate": 9.890075235781779e-05, "epoch": 0.4892966360856269, "percentage": 16.39, "elapsed_time": "0:01:42", "remaining_time": "0:08:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 366, "loss": 1.3242, "learning_rate": 9.768385308070138e-05, "epoch": 0.5708460754332314, "percentage": 19.13, "elapsed_time": "0:02:00", "remaining_time": "0:08:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 366, "loss": 1.106, "learning_rate": 9.603249433382144e-05, "epoch": 0.6523955147808359, "percentage": 21.86, "elapsed_time": "0:02:18", "remaining_time": "0:08:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 366, "loss": 1.1686, "learning_rate": 9.396172205829234e-05, "epoch": 0.7339449541284404, "percentage": 24.59, "elapsed_time": "0:02:36", "remaining_time": "0:08:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 366, "loss": 1.148, "learning_rate": 9.149040357641929e-05, "epoch": 0.8154943934760448, "percentage": 27.32, "elapsed_time": "0:02:54", "remaining_time": "0:07:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 366, "loss": 1.3969, "learning_rate": 8.864105568682244e-05, "epoch": 0.8970438328236493, "percentage": 30.05, "elapsed_time": "0:03:11", "remaining_time": "0:07:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 366, "loss": 1.1595, "learning_rate": 8.543963950827279e-05, "epoch": 0.9785932721712538, "percentage": 32.79, "elapsed_time": "0:03:29", "remaining_time": "0:07:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 366, "loss": 1.091, "learning_rate": 8.191532394146865e-05, "epoch": 1.0601427115188584, "percentage": 35.52, "elapsed_time": "0:03:47", "remaining_time": "0:06:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 366, "loss": 1.0718, "learning_rate": 7.810021990391164e-05, "epoch": 1.1416921508664628, "percentage": 38.25, "elapsed_time": "0:04:05", "remaining_time": "0:06:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 150, "total_steps": 366, "loss": 1.0833, "learning_rate": 7.402908775933419e-05, "epoch": 1.2232415902140672, "percentage": 40.98, "elapsed_time": "0:04:23", "remaining_time": "0:06:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 366, "loss": 1.073, "learning_rate": 6.973902060736226e-05, "epoch": 1.3047910295616718, "percentage": 43.72, "elapsed_time": "0:04:41", "remaining_time": "0:06:02", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 170, "total_steps": 366, "loss": 1.0823, "learning_rate": 6.526910631903973e-05, "epoch": 1.3863404689092762, "percentage": 46.45, "elapsed_time": "0:04:59", "remaining_time": "0:05:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 366, "loss": 0.9893, "learning_rate": 6.0660071397493514e-05, "epoch": 1.4678899082568808, "percentage": 49.18, "elapsed_time": "0:05:16", "remaining_time": "0:05:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 190, "total_steps": 366, "loss": 0.9235, "learning_rate": 5.5953909908613114e-05, "epoch": 1.5494393476044852, "percentage": 51.91, "elapsed_time": "0:05:34", "remaining_time": "0:05:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 366, "loss": 1.012, "learning_rate": 5.119350086265004e-05, "epoch": 1.6309887869520896, "percentage": 54.64, "elapsed_time": "0:05:51", "remaining_time": "0:04:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 366, "loss": 1.0746, "learning_rate": 4.64222175328687e-05, "epoch": 1.7125382262996942, "percentage": 57.38, "elapsed_time": "0:06:09", "remaining_time": "0:04:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 220, "total_steps": 366, "loss": 1.0924, "learning_rate": 4.1683532270843504e-05, "epoch": 1.7940876656472988, "percentage": 60.11, "elapsed_time": "0:06:27", "remaining_time": "0:04:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 230, "total_steps": 366, "loss": 1.0545, "learning_rate": 3.7020620419029094e-05, "epoch": 1.8756371049949032, "percentage": 62.84, "elapsed_time": "0:06:44", "remaining_time": "0:03:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 240, "total_steps": 366, "loss": 1.0335, "learning_rate": 3.2475966929454504e-05, "epoch": 1.9571865443425076, "percentage": 65.57, "elapsed_time": "0:07:04", "remaining_time": "0:03:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 250, "total_steps": 366, "loss": 0.9929, "learning_rate": 2.8090979272736662e-05, "epoch": 2.038735983690112, "percentage": 68.31, "elapsed_time": "0:07:24", "remaining_time": "0:03:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 260, "total_steps": 366, "loss": 0.9195, "learning_rate": 2.3905610164295394e-05, "epoch": 2.120285423037717, "percentage": 71.04, "elapsed_time": "0:07:45", "remaining_time": "0:03:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 270, "total_steps": 366, "loss": 0.8906, "learning_rate": 1.995799354520598e-05, "epoch": 2.2018348623853212, "percentage": 73.77, "elapsed_time": "0:08:07", "remaining_time": "0:02:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 280, "total_steps": 366, "loss": 0.8852, "learning_rate": 1.6284097134357536e-05, "epoch": 2.2833843017329256, "percentage": 76.5, "elapsed_time": "0:08:26", "remaining_time": "0:02:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 290, "total_steps": 366, "loss": 0.8899, "learning_rate": 1.2917394717602121e-05, "epoch": 2.36493374108053, "percentage": 79.23, "elapsed_time": "0:08:48", "remaining_time": "0:02:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 300, "total_steps": 366, "loss": 0.9932, "learning_rate": 9.888561159748993e-06, "epoch": 2.4464831804281344, "percentage": 81.97, "elapsed_time": "0:09:08", "remaining_time": "0:02:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 310, "total_steps": 366, "loss": 0.8445, "learning_rate": 7.225192918226214e-06, "epoch": 2.528032619775739, "percentage": 84.7, "elapsed_time": "0:09:30", "remaining_time": "0:01:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 320, "total_steps": 366, "loss": 0.8961, "learning_rate": 4.951556604879048e-06, "epoch": 2.6095820591233436, "percentage": 87.43, "elapsed_time": "0:09:50", "remaining_time": "0:01:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 330, "total_steps": 366, "loss": 0.9571, "learning_rate": 3.0883678868214806e-06, "epoch": 2.691131498470948, "percentage": 90.16, "elapsed_time": "0:10:11", "remaining_time": "0:01:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 340, "total_steps": 366, "loss": 0.9048, "learning_rate": 1.6526027408301226e-06, "epoch": 2.7726809378185524, "percentage": 92.9, "elapsed_time": "0:10:32", "remaining_time": "0:00:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 350, "total_steps": 366, "loss": 0.8868, "learning_rate": 6.573427809888067e-07, "epoch": 2.8542303771661572, "percentage": 95.63, "elapsed_time": "0:10:52", "remaining_time": "0:00:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 360, "total_steps": 366, "loss": 0.9579, "learning_rate": 1.1165606884234181e-07, "epoch": 2.9357798165137616, "percentage": 98.36, "elapsed_time": "0:11:12", "remaining_time": "0:00:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 366, "total_steps": 366, "epoch": 2.984709480122324, "percentage": 100.0, "elapsed_time": "0:11:24", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}