qwen2-cangjie-lora / .ipynb_checkpoints /trainer_log-checkpoint.jsonl
Zxilly's picture
Upload folder using huggingface_hub
81ecfc5 verified
{"current_steps": 2, "total_steps": 384, "loss": 0.831, "learning_rate": 0.0001, "epoch": 0.015518913676042677, "percentage": 0.52, "elapsed_time": "0:00:43", "remaining_time": "2:18:17", "throughput": "12068.40", "total_tokens": 524288}
{"current_steps": 4, "total_steps": 384, "loss": 0.7398, "learning_rate": 9.999323662872997e-05, "epoch": 0.031037827352085354, "percentage": 1.04, "elapsed_time": "0:01:25", "remaining_time": "2:15:43", "throughput": "12232.95", "total_tokens": 1048576}
{"current_steps": 6, "total_steps": 384, "loss": 0.6438, "learning_rate": 9.99729483446475e-05, "epoch": 0.04655674102812803, "percentage": 1.56, "elapsed_time": "0:02:08", "remaining_time": "2:14:25", "throughput": "12285.60", "total_tokens": 1572864}
{"current_steps": 8, "total_steps": 384, "loss": 0.6032, "learning_rate": 9.993914063644052e-05, "epoch": 0.06207565470417071, "percentage": 2.08, "elapsed_time": "0:02:50", "remaining_time": "2:13:28", "throughput": "12307.51", "total_tokens": 2097152}
{"current_steps": 10, "total_steps": 384, "loss": 0.5433, "learning_rate": 9.989182265027232e-05, "epoch": 0.07759456838021339, "percentage": 2.6, "elapsed_time": "0:03:32", "remaining_time": "2:12:35", "throughput": "12323.95", "total_tokens": 2621440}
{"current_steps": 12, "total_steps": 384, "loss": 0.5228, "learning_rate": 9.98310071873072e-05, "epoch": 0.09311348205625607, "percentage": 3.12, "elapsed_time": "0:04:15", "remaining_time": "2:11:47", "throughput": "12332.99", "total_tokens": 3145728}
{"current_steps": 14, "total_steps": 384, "loss": 0.4702, "learning_rate": 9.97567107002474e-05, "epoch": 0.10863239573229874, "percentage": 3.65, "elapsed_time": "0:04:57", "remaining_time": "2:10:58", "throughput": "12341.83", "total_tokens": 3670016}
{"current_steps": 16, "total_steps": 384, "loss": 0.4574, "learning_rate": 9.966895328888194e-05, "epoch": 0.12415130940834142, "percentage": 4.17, "elapsed_time": "0:05:39", "remaining_time": "2:10:13", "throughput": "12347.21", "total_tokens": 4194304}
{"current_steps": 18, "total_steps": 384, "loss": 0.5093, "learning_rate": 9.956775869464901e-05, "epoch": 0.1396702230843841, "percentage": 4.69, "elapsed_time": "0:06:22", "remaining_time": "2:09:28", "throughput": "12350.26", "total_tokens": 4718592}
{"current_steps": 20, "total_steps": 384, "loss": 0.4771, "learning_rate": 9.945315429421306e-05, "epoch": 0.15518913676042678, "percentage": 5.21, "elapsed_time": "0:07:04", "remaining_time": "2:08:44", "throughput": "12352.92", "total_tokens": 5242880}
{"current_steps": 22, "total_steps": 384, "loss": 0.4343, "learning_rate": 9.932517109205849e-05, "epoch": 0.17070805043646944, "percentage": 5.73, "elapsed_time": "0:07:46", "remaining_time": "2:08:00", "throughput": "12355.21", "total_tokens": 5767168}
{"current_steps": 24, "total_steps": 384, "loss": 0.4455, "learning_rate": 9.918384371210176e-05, "epoch": 0.18622696411251213, "percentage": 6.25, "elapsed_time": "0:08:29", "remaining_time": "2:07:17", "throughput": "12356.26", "total_tokens": 6291456}
{"current_steps": 26, "total_steps": 384, "loss": 0.4669, "learning_rate": 9.902921038832455e-05, "epoch": 0.2017458777885548, "percentage": 6.77, "elapsed_time": "0:09:11", "remaining_time": "2:06:35", "throughput": "12356.01", "total_tokens": 6815744}
{"current_steps": 28, "total_steps": 384, "loss": 0.4723, "learning_rate": 9.886131295443003e-05, "epoch": 0.21726479146459748, "percentage": 7.29, "elapsed_time": "0:09:53", "remaining_time": "2:05:51", "throughput": "12357.59", "total_tokens": 7340032}
{"current_steps": 30, "total_steps": 384, "loss": 0.4364, "learning_rate": 9.868019683252543e-05, "epoch": 0.23278370514064015, "percentage": 7.81, "elapsed_time": "0:10:36", "remaining_time": "2:05:09", "throughput": "12358.33", "total_tokens": 7864320}
{"current_steps": 32, "total_steps": 384, "loss": 0.4013, "learning_rate": 9.848591102083375e-05, "epoch": 0.24830261881668284, "percentage": 8.33, "elapsed_time": "0:11:18", "remaining_time": "2:04:25", "throughput": "12360.25", "total_tokens": 8388608}
{"current_steps": 34, "total_steps": 384, "loss": 0.3875, "learning_rate": 9.82785080804381e-05, "epoch": 0.2638215324927255, "percentage": 8.85, "elapsed_time": "0:12:01", "remaining_time": "2:03:42", "throughput": "12360.97", "total_tokens": 8912896}
{"current_steps": 36, "total_steps": 384, "loss": 0.4187, "learning_rate": 9.805804412106198e-05, "epoch": 0.2793404461687682, "percentage": 9.38, "elapsed_time": "0:12:43", "remaining_time": "2:02:59", "throughput": "12361.99", "total_tokens": 9437184}
{"current_steps": 38, "total_steps": 384, "loss": 0.3981, "learning_rate": 9.782457878588977e-05, "epoch": 0.2948593598448109, "percentage": 9.9, "elapsed_time": "0:13:25", "remaining_time": "2:02:16", "throughput": "12363.00", "total_tokens": 9961472}
{"current_steps": 40, "total_steps": 384, "loss": 0.4121, "learning_rate": 9.757817523543109e-05, "epoch": 0.31037827352085356, "percentage": 10.42, "elapsed_time": "0:14:08", "remaining_time": "2:01:34", "throughput": "12363.06", "total_tokens": 10485760}
{"current_steps": 42, "total_steps": 384, "loss": 0.392, "learning_rate": 9.731890013043368e-05, "epoch": 0.3258971871968962, "percentage": 10.94, "elapsed_time": "0:14:50", "remaining_time": "2:00:51", "throughput": "12363.66", "total_tokens": 11010048}
{"current_steps": 44, "total_steps": 384, "loss": 0.3845, "learning_rate": 9.704682361384941e-05, "epoch": 0.3414161008729389, "percentage": 11.46, "elapsed_time": "0:15:32", "remaining_time": "2:00:08", "throughput": "12365.26", "total_tokens": 11534336}
{"current_steps": 46, "total_steps": 384, "loss": 0.397, "learning_rate": 9.676201929185809e-05, "epoch": 0.3569350145489816, "percentage": 11.98, "elapsed_time": "0:16:15", "remaining_time": "1:59:25", "throughput": "12365.45", "total_tokens": 12058624}
{"current_steps": 48, "total_steps": 384, "loss": 0.3753, "learning_rate": 9.646456421395446e-05, "epoch": 0.37245392822502427, "percentage": 12.5, "elapsed_time": "0:16:57", "remaining_time": "1:58:42", "throughput": "12365.80", "total_tokens": 12582912}
{"current_steps": 50, "total_steps": 384, "loss": 0.387, "learning_rate": 9.615453885210369e-05, "epoch": 0.3879728419010669, "percentage": 13.02, "elapsed_time": "0:17:39", "remaining_time": "1:58:00", "throughput": "12366.58", "total_tokens": 13107200}
{"current_steps": 52, "total_steps": 384, "loss": 0.3724, "learning_rate": 9.583202707897074e-05, "epoch": 0.4034917555771096, "percentage": 13.54, "elapsed_time": "0:18:22", "remaining_time": "1:57:17", "throughput": "12367.11", "total_tokens": 13631488}
{"current_steps": 54, "total_steps": 384, "loss": 0.4394, "learning_rate": 9.549711614523007e-05, "epoch": 0.4190106692531523, "percentage": 14.06, "elapsed_time": "0:19:04", "remaining_time": "1:56:34", "throughput": "12367.54", "total_tokens": 14155776}
{"current_steps": 56, "total_steps": 384, "loss": 0.4177, "learning_rate": 9.514989665596114e-05, "epoch": 0.43452958292919497, "percentage": 14.58, "elapsed_time": "0:19:46", "remaining_time": "1:55:52", "throughput": "12368.03", "total_tokens": 14680064}
{"current_steps": 58, "total_steps": 384, "loss": 0.3939, "learning_rate": 9.479046254613673e-05, "epoch": 0.45004849660523766, "percentage": 15.1, "elapsed_time": "0:20:29", "remaining_time": "1:55:09", "throughput": "12368.21", "total_tokens": 15204352}
{"current_steps": 60, "total_steps": 384, "loss": 0.4207, "learning_rate": 9.441891105521006e-05, "epoch": 0.4655674102812803, "percentage": 15.62, "elapsed_time": "0:21:11", "remaining_time": "1:54:26", "throughput": "12368.89", "total_tokens": 15728640}
{"current_steps": 62, "total_steps": 384, "loss": 0.3653, "learning_rate": 9.403534270080829e-05, "epoch": 0.481086323957323, "percentage": 16.15, "elapsed_time": "0:21:53", "remaining_time": "1:53:44", "throughput": "12369.17", "total_tokens": 16252928}
{"current_steps": 64, "total_steps": 384, "loss": 0.3925, "learning_rate": 9.3639861251539e-05, "epoch": 0.49660523763336567, "percentage": 16.67, "elapsed_time": "0:22:36", "remaining_time": "1:53:01", "throughput": "12369.62", "total_tokens": 16777216}
{"current_steps": 66, "total_steps": 384, "loss": 0.3982, "learning_rate": 9.323257369891703e-05, "epoch": 0.5121241513094084, "percentage": 17.19, "elapsed_time": "0:23:18", "remaining_time": "1:52:19", "throughput": "12369.99", "total_tokens": 17301504}
{"current_steps": 68, "total_steps": 384, "loss": 0.3709, "learning_rate": 9.281359022841965e-05, "epoch": 0.527643064985451, "percentage": 17.71, "elapsed_time": "0:24:01", "remaining_time": "1:51:36", "throughput": "12370.31", "total_tokens": 17825792}
{"current_steps": 70, "total_steps": 384, "loss": 0.3744, "learning_rate": 9.238302418967756e-05, "epoch": 0.5431619786614937, "percentage": 18.23, "elapsed_time": "0:24:43", "remaining_time": "1:50:54", "throughput": "12370.44", "total_tokens": 18350080}
{"current_steps": 72, "total_steps": 384, "loss": 0.3929, "learning_rate": 9.194099206580982e-05, "epoch": 0.5586808923375364, "percentage": 18.75, "elapsed_time": "0:25:25", "remaining_time": "1:50:11", "throughput": "12370.64", "total_tokens": 18874368}
{"current_steps": 74, "total_steps": 384, "loss": 0.3716, "learning_rate": 9.148761344191109e-05, "epoch": 0.574199806013579, "percentage": 19.27, "elapsed_time": "0:26:08", "remaining_time": "1:49:29", "throughput": "12370.73", "total_tokens": 19398656}
{"current_steps": 76, "total_steps": 384, "loss": 0.3959, "learning_rate": 9.102301097269974e-05, "epoch": 0.5897187196896218, "percentage": 19.79, "elapsed_time": "0:26:50", "remaining_time": "1:48:46", "throughput": "12371.11", "total_tokens": 19922944}
{"current_steps": 78, "total_steps": 384, "loss": 0.3514, "learning_rate": 9.054731034933549e-05, "epoch": 0.6052376333656644, "percentage": 20.31, "elapsed_time": "0:27:32", "remaining_time": "1:48:04", "throughput": "12371.15", "total_tokens": 20447232}
{"current_steps": 80, "total_steps": 384, "loss": 0.3767, "learning_rate": 9.006064026541548e-05, "epoch": 0.6207565470417071, "percentage": 20.83, "elapsed_time": "0:28:15", "remaining_time": "1:47:21", "throughput": "12371.38", "total_tokens": 20971520}
{"current_steps": 82, "total_steps": 384, "loss": 0.371, "learning_rate": 8.956313238215824e-05, "epoch": 0.6362754607177498, "percentage": 21.35, "elapsed_time": "0:28:57", "remaining_time": "1:46:39", "throughput": "12371.34", "total_tokens": 21495808}
{"current_steps": 84, "total_steps": 384, "loss": 0.3529, "learning_rate": 8.905492129278478e-05, "epoch": 0.6517943743937924, "percentage": 21.88, "elapsed_time": "0:29:39", "remaining_time": "1:45:56", "throughput": "12371.72", "total_tokens": 22020096}
{"current_steps": 86, "total_steps": 384, "loss": 0.3044, "learning_rate": 8.853614448610631e-05, "epoch": 0.6673132880698351, "percentage": 22.4, "elapsed_time": "0:30:22", "remaining_time": "1:45:14", "throughput": "12371.87", "total_tokens": 22544384}
{"current_steps": 88, "total_steps": 384, "loss": 0.3532, "learning_rate": 8.800694230932884e-05, "epoch": 0.6828322017458778, "percentage": 22.92, "elapsed_time": "0:31:04", "remaining_time": "1:44:31", "throughput": "12372.26", "total_tokens": 23068672}
{"current_steps": 90, "total_steps": 384, "loss": 0.3461, "learning_rate": 8.74674579300843e-05, "epoch": 0.6983511154219205, "percentage": 23.44, "elapsed_time": "0:31:46", "remaining_time": "1:43:49", "throughput": "12371.97", "total_tokens": 23592960}
{"current_steps": 92, "total_steps": 384, "loss": 0.3513, "learning_rate": 8.691783729769874e-05, "epoch": 0.7138700290979632, "percentage": 23.96, "elapsed_time": "0:32:29", "remaining_time": "1:43:07", "throughput": "12371.69", "total_tokens": 24117248}
{"current_steps": 94, "total_steps": 384, "loss": 0.3842, "learning_rate": 8.635822910370792e-05, "epoch": 0.7293889427740058, "percentage": 24.48, "elapsed_time": "0:33:11", "remaining_time": "1:42:24", "throughput": "12371.92", "total_tokens": 24641536}
{"current_steps": 96, "total_steps": 384, "loss": 0.363, "learning_rate": 8.578878474163115e-05, "epoch": 0.7449078564500485, "percentage": 25.0, "elapsed_time": "0:33:54", "remaining_time": "1:41:42", "throughput": "12372.14", "total_tokens": 25165824}
{"current_steps": 98, "total_steps": 384, "loss": 0.3079, "learning_rate": 8.520965826601394e-05, "epoch": 0.7604267701260912, "percentage": 25.52, "elapsed_time": "0:34:36", "remaining_time": "1:40:59", "throughput": "12372.67", "total_tokens": 25690112}
{"current_steps": 100, "total_steps": 384, "loss": 0.3769, "learning_rate": 8.462100635075097e-05, "epoch": 0.7759456838021338, "percentage": 26.04, "elapsed_time": "0:35:18", "remaining_time": "1:40:17", "throughput": "12372.59", "total_tokens": 26214400}
{"current_steps": 102, "total_steps": 384, "loss": 0.3907, "learning_rate": 8.40229882467003e-05, "epoch": 0.7914645974781765, "percentage": 26.56, "elapsed_time": "0:36:01", "remaining_time": "1:39:34", "throughput": "12372.64", "total_tokens": 26738688}
{"current_steps": 104, "total_steps": 384, "loss": 0.3457, "learning_rate": 8.341576573860048e-05, "epoch": 0.8069835111542192, "percentage": 27.08, "elapsed_time": "0:36:43", "remaining_time": "1:38:52", "throughput": "12372.69", "total_tokens": 27262976}
{"current_steps": 106, "total_steps": 384, "loss": 0.3889, "learning_rate": 8.279950310130217e-05, "epoch": 0.8225024248302619, "percentage": 27.6, "elapsed_time": "0:37:25", "remaining_time": "1:38:09", "throughput": "12373.02", "total_tokens": 27787264}
{"current_steps": 108, "total_steps": 384, "loss": 0.3142, "learning_rate": 8.2174367055326e-05, "epoch": 0.8380213385063046, "percentage": 28.12, "elapsed_time": "0:38:08", "remaining_time": "1:37:27", "throughput": "12373.10", "total_tokens": 28311552}
{"current_steps": 110, "total_steps": 384, "loss": 0.3299, "learning_rate": 8.154052672175887e-05, "epoch": 0.8535402521823472, "percentage": 28.65, "elapsed_time": "0:38:50", "remaining_time": "1:36:45", "throughput": "12373.25", "total_tokens": 28835840}
{"current_steps": 112, "total_steps": 384, "loss": 0.3425, "learning_rate": 8.089815357650089e-05, "epoch": 0.8690591658583899, "percentage": 29.17, "elapsed_time": "0:39:32", "remaining_time": "1:36:02", "throughput": "12373.26", "total_tokens": 29360128}
{"current_steps": 114, "total_steps": 384, "loss": 0.3363, "learning_rate": 8.024742140387506e-05, "epoch": 0.8845780795344326, "percentage": 29.69, "elapsed_time": "0:40:15", "remaining_time": "1:35:20", "throughput": "12373.27", "total_tokens": 29884416}
{"current_steps": 116, "total_steps": 384, "loss": 0.3725, "learning_rate": 7.95885062496126e-05, "epoch": 0.9000969932104753, "percentage": 30.21, "elapsed_time": "0:40:57", "remaining_time": "1:34:37", "throughput": "12373.41", "total_tokens": 30408704}
{"current_steps": 118, "total_steps": 384, "loss": 0.3397, "learning_rate": 7.892158637322646e-05, "epoch": 0.915615906886518, "percentage": 30.73, "elapsed_time": "0:41:39", "remaining_time": "1:33:55", "throughput": "12373.69", "total_tokens": 30932992}
{"current_steps": 120, "total_steps": 384, "loss": 0.2812, "learning_rate": 7.824684219978591e-05, "epoch": 0.9311348205625606, "percentage": 31.25, "elapsed_time": "0:42:22", "remaining_time": "1:33:12", "throughput": "12373.85", "total_tokens": 31457280}
{"current_steps": 122, "total_steps": 384, "loss": 0.3555, "learning_rate": 7.756445627110523e-05, "epoch": 0.9466537342386033, "percentage": 31.77, "elapsed_time": "0:43:04", "remaining_time": "1:32:30", "throughput": "12373.80", "total_tokens": 31981568}
{"current_steps": 124, "total_steps": 384, "loss": 0.3362, "learning_rate": 7.687461319635981e-05, "epoch": 0.962172647914646, "percentage": 32.29, "elapsed_time": "0:43:46", "remaining_time": "1:31:48", "throughput": "12373.83", "total_tokens": 32505856}
{"current_steps": 126, "total_steps": 384, "loss": 0.3133, "learning_rate": 7.6177499602143e-05, "epoch": 0.9776915615906887, "percentage": 32.81, "elapsed_time": "0:44:29", "remaining_time": "1:31:05", "throughput": "12374.00", "total_tokens": 33030144}
{"current_steps": 128, "total_steps": 384, "loss": 0.3119, "learning_rate": 7.547330408197695e-05, "epoch": 0.9932104752667313, "percentage": 33.33, "elapsed_time": "0:45:11", "remaining_time": "1:30:23", "throughput": "12374.26", "total_tokens": 33554432}
{"current_steps": 130, "total_steps": 384, "loss": 0.3117, "learning_rate": 7.476221714529167e-05, "epoch": 1.008729388942774, "percentage": 33.85, "elapsed_time": "0:45:54", "remaining_time": "1:29:40", "throughput": "12374.25", "total_tokens": 34078720}
{"current_steps": 132, "total_steps": 384, "loss": 0.329, "learning_rate": 7.404443116588548e-05, "epoch": 1.0242483026188167, "percentage": 34.38, "elapsed_time": "0:46:36", "remaining_time": "1:28:58", "throughput": "12374.33", "total_tokens": 34603008}
{"current_steps": 134, "total_steps": 384, "loss": 0.279, "learning_rate": 7.332014032988123e-05, "epoch": 1.0397672162948595, "percentage": 34.9, "elapsed_time": "0:47:18", "remaining_time": "1:28:16", "throughput": "12374.33", "total_tokens": 35127296}
{"current_steps": 136, "total_steps": 384, "loss": 0.2682, "learning_rate": 7.258954058319216e-05, "epoch": 1.055286129970902, "percentage": 35.42, "elapsed_time": "0:48:01", "remaining_time": "1:27:33", "throughput": "12374.30", "total_tokens": 35651584}
{"current_steps": 138, "total_steps": 384, "loss": 0.293, "learning_rate": 7.185282957851175e-05, "epoch": 1.0708050436469447, "percentage": 35.94, "elapsed_time": "0:48:43", "remaining_time": "1:26:51", "throughput": "12374.23", "total_tokens": 36175872}
{"current_steps": 140, "total_steps": 384, "loss": 0.315, "learning_rate": 7.111020662184174e-05, "epoch": 1.0863239573229875, "percentage": 36.46, "elapsed_time": "0:49:25", "remaining_time": "1:26:08", "throughput": "12374.40", "total_tokens": 36700160}
{"current_steps": 142, "total_steps": 384, "loss": 0.289, "learning_rate": 7.036187261857289e-05, "epoch": 1.10184287099903, "percentage": 36.98, "elapsed_time": "0:50:08", "remaining_time": "1:25:26", "throughput": "12374.16", "total_tokens": 37224448}
{"current_steps": 144, "total_steps": 384, "loss": 0.2808, "learning_rate": 6.960803001913314e-05, "epoch": 1.1173617846750727, "percentage": 37.5, "elapsed_time": "0:50:50", "remaining_time": "1:24:44", "throughput": "12374.30", "total_tokens": 37748736}
{"current_steps": 146, "total_steps": 384, "loss": 0.318, "learning_rate": 6.884888276421766e-05, "epoch": 1.1328806983511155, "percentage": 38.02, "elapsed_time": "0:51:32", "remaining_time": "1:24:01", "throughput": "12374.33", "total_tokens": 38273024}
{"current_steps": 148, "total_steps": 384, "loss": 0.2685, "learning_rate": 6.808463622961578e-05, "epoch": 1.148399612027158, "percentage": 38.54, "elapsed_time": "0:52:15", "remaining_time": "1:23:19", "throughput": "12374.43", "total_tokens": 38797312}
{"current_steps": 150, "total_steps": 384, "loss": 0.3121, "learning_rate": 6.731549717064974e-05, "epoch": 1.1639185257032008, "percentage": 39.06, "elapsed_time": "0:52:57", "remaining_time": "1:22:37", "throughput": "12374.58", "total_tokens": 39321600}
{"current_steps": 152, "total_steps": 384, "loss": 0.2835, "learning_rate": 6.654167366624009e-05, "epoch": 1.1794374393792435, "percentage": 39.58, "elapsed_time": "0:53:39", "remaining_time": "1:21:54", "throughput": "12374.63", "total_tokens": 39845888}
{"current_steps": 154, "total_steps": 384, "loss": 0.2905, "learning_rate": 6.576337506261314e-05, "epoch": 1.1949563530552862, "percentage": 40.1, "elapsed_time": "0:54:22", "remaining_time": "1:21:12", "throughput": "12375.02", "total_tokens": 40370176}
{"current_steps": 156, "total_steps": 384, "loss": 0.3277, "learning_rate": 6.498081191666548e-05, "epoch": 1.2104752667313288, "percentage": 40.62, "elapsed_time": "0:55:04", "remaining_time": "1:20:29", "throughput": "12375.10", "total_tokens": 40894464}
{"current_steps": 158, "total_steps": 384, "loss": 0.2788, "learning_rate": 6.419419593900108e-05, "epoch": 1.2259941804073715, "percentage": 41.15, "elapsed_time": "0:55:46", "remaining_time": "1:19:47", "throughput": "12375.10", "total_tokens": 41418752}
{"current_steps": 160, "total_steps": 384, "loss": 0.2971, "learning_rate": 6.340373993665607e-05, "epoch": 1.2415130940834143, "percentage": 41.67, "elapsed_time": "0:56:29", "remaining_time": "1:19:05", "throughput": "12375.12", "total_tokens": 41943040}
{"current_steps": 162, "total_steps": 384, "loss": 0.287, "learning_rate": 6.260965775552712e-05, "epoch": 1.2570320077594568, "percentage": 42.19, "elapsed_time": "0:57:11", "remaining_time": "1:18:22", "throughput": "12374.98", "total_tokens": 42467328}
{"current_steps": 164, "total_steps": 384, "loss": 0.3196, "learning_rate": 6.181216422251862e-05, "epoch": 1.2725509214354995, "percentage": 42.71, "elapsed_time": "0:57:54", "remaining_time": "1:17:40", "throughput": "12375.00", "total_tokens": 42991616}
{"current_steps": 166, "total_steps": 384, "loss": 0.3021, "learning_rate": 6.101147508742455e-05, "epoch": 1.2880698351115423, "percentage": 43.23, "elapsed_time": "0:58:36", "remaining_time": "1:16:57", "throughput": "12375.01", "total_tokens": 43515904}
{"current_steps": 168, "total_steps": 384, "loss": 0.2329, "learning_rate": 6.0207806964560584e-05, "epoch": 1.3035887487875848, "percentage": 43.75, "elapsed_time": "0:59:18", "remaining_time": "1:16:15", "throughput": "12375.04", "total_tokens": 44040192}
{"current_steps": 170, "total_steps": 384, "loss": 0.2803, "learning_rate": 5.940137727416246e-05, "epoch": 1.3191076624636275, "percentage": 44.27, "elapsed_time": "1:00:01", "remaining_time": "1:15:33", "throughput": "12375.12", "total_tokens": 44564480}
{"current_steps": 172, "total_steps": 384, "loss": 0.2744, "learning_rate": 5.8592404183566144e-05, "epoch": 1.3346265761396703, "percentage": 44.79, "elapsed_time": "1:00:43", "remaining_time": "1:14:50", "throughput": "12375.21", "total_tokens": 45088768}
{"current_steps": 174, "total_steps": 384, "loss": 0.3332, "learning_rate": 5.778110654818601e-05, "epoch": 1.3501454898157128, "percentage": 45.31, "elapsed_time": "1:01:25", "remaining_time": "1:14:08", "throughput": "12375.49", "total_tokens": 45613056}
{"current_steps": 176, "total_steps": 384, "loss": 0.3223, "learning_rate": 5.6967703852306786e-05, "epoch": 1.3656644034917556, "percentage": 45.83, "elapsed_time": "1:02:08", "remaining_time": "1:13:25", "throughput": "12375.42", "total_tokens": 46137344}
{"current_steps": 178, "total_steps": 384, "loss": 0.3127, "learning_rate": 5.6152416149705455e-05, "epoch": 1.3811833171677983, "percentage": 46.35, "elapsed_time": "1:02:50", "remaining_time": "1:12:43", "throughput": "12375.48", "total_tokens": 46661632}
{"current_steps": 180, "total_steps": 384, "loss": 0.2908, "learning_rate": 5.5335464004118986e-05, "epoch": 1.3967022308438408, "percentage": 46.88, "elapsed_time": "1:03:32", "remaining_time": "1:12:01", "throughput": "12375.67", "total_tokens": 47185920}
{"current_steps": 182, "total_steps": 384, "loss": 0.2918, "learning_rate": 5.4517068429574215e-05, "epoch": 1.4122211445198836, "percentage": 47.4, "elapsed_time": "1:04:15", "remaining_time": "1:11:18", "throughput": "12375.66", "total_tokens": 47710208}
{"current_steps": 184, "total_steps": 384, "loss": 0.268, "learning_rate": 5.3697450830595774e-05, "epoch": 1.4277400581959263, "percentage": 47.92, "elapsed_time": "1:04:57", "remaining_time": "1:10:36", "throughput": "12375.75", "total_tokens": 48234496}
{"current_steps": 186, "total_steps": 384, "loss": 0.2862, "learning_rate": 5.287683294230855e-05, "epoch": 1.4432589718719688, "percentage": 48.44, "elapsed_time": "1:05:39", "remaining_time": "1:09:54", "throughput": "12375.71", "total_tokens": 48758784}
{"current_steps": 188, "total_steps": 384, "loss": 0.3054, "learning_rate": 5.205543677045049e-05, "epoch": 1.4587778855480116, "percentage": 48.96, "elapsed_time": "1:06:22", "remaining_time": "1:09:11", "throughput": "12375.67", "total_tokens": 49283072}
{"current_steps": 190, "total_steps": 384, "loss": 0.2814, "learning_rate": 5.1233484531312414e-05, "epoch": 1.4742967992240543, "percentage": 49.48, "elapsed_time": "1:07:04", "remaining_time": "1:08:29", "throughput": "12375.72", "total_tokens": 49807360}
{"current_steps": 192, "total_steps": 384, "loss": 0.2703, "learning_rate": 5.0411198591620676e-05, "epoch": 1.489815712900097, "percentage": 50.0, "elapsed_time": "1:07:46", "remaining_time": "1:07:46", "throughput": "12375.63", "total_tokens": 50331648}
{"current_steps": 194, "total_steps": 384, "loss": 0.2689, "learning_rate": 4.958880140837933e-05, "epoch": 1.5053346265761398, "percentage": 50.52, "elapsed_time": "1:08:29", "remaining_time": "1:07:04", "throughput": "12375.75", "total_tokens": 50855936}
{"current_steps": 196, "total_steps": 384, "loss": 0.3013, "learning_rate": 4.876651546868759e-05, "epoch": 1.5208535402521823, "percentage": 51.04, "elapsed_time": "1:09:11", "remaining_time": "1:06:22", "throughput": "12375.76", "total_tokens": 51380224}
{"current_steps": 198, "total_steps": 384, "loss": 0.2751, "learning_rate": 4.794456322954952e-05, "epoch": 1.536372453928225, "percentage": 51.56, "elapsed_time": "1:09:54", "remaining_time": "1:05:39", "throughput": "12375.72", "total_tokens": 51904512}
{"current_steps": 200, "total_steps": 384, "loss": 0.3178, "learning_rate": 4.712316705769145e-05, "epoch": 1.5518913676042678, "percentage": 52.08, "elapsed_time": "1:10:36", "remaining_time": "1:04:57", "throughput": "12375.77", "total_tokens": 52428800}
{"current_steps": 202, "total_steps": 384, "loss": 0.2742, "learning_rate": 4.630254916940424e-05, "epoch": 1.5674102812803103, "percentage": 52.6, "elapsed_time": "1:11:18", "remaining_time": "1:04:15", "throughput": "12375.80", "total_tokens": 52953088}
{"current_steps": 204, "total_steps": 384, "loss": 0.2751, "learning_rate": 4.548293157042581e-05, "epoch": 1.582929194956353, "percentage": 53.12, "elapsed_time": "1:12:01", "remaining_time": "1:03:32", "throughput": "12375.84", "total_tokens": 53477376}
{"current_steps": 206, "total_steps": 384, "loss": 0.3256, "learning_rate": 4.466453599588103e-05, "epoch": 1.5984481086323958, "percentage": 53.65, "elapsed_time": "1:12:43", "remaining_time": "1:02:50", "throughput": "12375.76", "total_tokens": 54001664}
{"current_steps": 208, "total_steps": 384, "loss": 0.2603, "learning_rate": 4.384758385029457e-05, "epoch": 1.6139670223084384, "percentage": 54.17, "elapsed_time": "1:13:25", "remaining_time": "1:02:08", "throughput": "12375.86", "total_tokens": 54525952}
{"current_steps": 210, "total_steps": 384, "loss": 0.2598, "learning_rate": 4.3032296147693225e-05, "epoch": 1.629485935984481, "percentage": 54.69, "elapsed_time": "1:14:08", "remaining_time": "1:01:25", "throughput": "12375.97", "total_tokens": 55050240}
{"current_steps": 212, "total_steps": 384, "loss": 0.2811, "learning_rate": 4.2218893451814005e-05, "epoch": 1.6450048496605238, "percentage": 55.21, "elapsed_time": "1:14:50", "remaining_time": "1:00:43", "throughput": "12376.02", "total_tokens": 55574528}
{"current_steps": 214, "total_steps": 384, "loss": 0.2386, "learning_rate": 4.140759581643386e-05, "epoch": 1.6605237633365664, "percentage": 55.73, "elapsed_time": "1:15:32", "remaining_time": "1:00:00", "throughput": "12375.99", "total_tokens": 56098816}
{"current_steps": 216, "total_steps": 384, "loss": 0.2999, "learning_rate": 4.059862272583755e-05, "epoch": 1.6760426770126091, "percentage": 56.25, "elapsed_time": "1:16:15", "remaining_time": "0:59:18", "throughput": "12375.93", "total_tokens": 56623104}
{"current_steps": 218, "total_steps": 384, "loss": 0.2857, "learning_rate": 3.979219303543942e-05, "epoch": 1.6915615906886519, "percentage": 56.77, "elapsed_time": "1:16:57", "remaining_time": "0:58:36", "throughput": "12375.82", "total_tokens": 57147392}
{"current_steps": 220, "total_steps": 384, "loss": 0.2533, "learning_rate": 3.898852491257546e-05, "epoch": 1.7070805043646944, "percentage": 57.29, "elapsed_time": "1:17:40", "remaining_time": "0:57:53", "throughput": "12375.79", "total_tokens": 57671680}
{"current_steps": 222, "total_steps": 384, "loss": 0.306, "learning_rate": 3.818783577748138e-05, "epoch": 1.7225994180407371, "percentage": 57.81, "elapsed_time": "1:18:22", "remaining_time": "0:57:11", "throughput": "12375.96", "total_tokens": 58195968}
{"current_steps": 224, "total_steps": 384, "loss": 0.2594, "learning_rate": 3.739034224447289e-05, "epoch": 1.7381183317167799, "percentage": 58.33, "elapsed_time": "1:19:04", "remaining_time": "0:56:29", "throughput": "12376.04", "total_tokens": 58720256}
{"current_steps": 226, "total_steps": 384, "loss": 0.284, "learning_rate": 3.659626006334395e-05, "epoch": 1.7536372453928224, "percentage": 58.85, "elapsed_time": "1:19:47", "remaining_time": "0:55:46", "throughput": "12376.05", "total_tokens": 59244544}
{"current_steps": 228, "total_steps": 384, "loss": 0.33, "learning_rate": 3.580580406099893e-05, "epoch": 1.7691561590688651, "percentage": 59.38, "elapsed_time": "1:20:29", "remaining_time": "0:55:04", "throughput": "12376.03", "total_tokens": 59768832}
{"current_steps": 230, "total_steps": 384, "loss": 0.2968, "learning_rate": 3.501918808333453e-05, "epoch": 1.7846750727449079, "percentage": 59.9, "elapsed_time": "1:21:11", "remaining_time": "0:54:21", "throughput": "12375.98", "total_tokens": 60293120}
{"current_steps": 232, "total_steps": 384, "loss": 0.2836, "learning_rate": 3.4236624937386876e-05, "epoch": 1.8001939864209504, "percentage": 60.42, "elapsed_time": "1:21:54", "remaining_time": "0:53:39", "throughput": "12376.10", "total_tokens": 60817408}
{"current_steps": 234, "total_steps": 384, "loss": 0.2452, "learning_rate": 3.3458326333759925e-05, "epoch": 1.8157129000969934, "percentage": 60.94, "elapsed_time": "1:22:36", "remaining_time": "0:52:57", "throughput": "12376.14", "total_tokens": 61341696}
{"current_steps": 236, "total_steps": 384, "loss": 0.2526, "learning_rate": 3.268450282935026e-05, "epoch": 1.831231813773036, "percentage": 61.46, "elapsed_time": "1:23:18", "remaining_time": "0:52:14", "throughput": "12376.09", "total_tokens": 61865984}
{"current_steps": 238, "total_steps": 384, "loss": 0.2578, "learning_rate": 3.191536377038422e-05, "epoch": 1.8467507274490784, "percentage": 61.98, "elapsed_time": "1:24:01", "remaining_time": "0:51:32", "throughput": "12376.15", "total_tokens": 62390272}
{"current_steps": 240, "total_steps": 384, "loss": 0.2895, "learning_rate": 3.115111723578235e-05, "epoch": 1.8622696411251214, "percentage": 62.5, "elapsed_time": "1:24:43", "remaining_time": "0:50:50", "throughput": "12376.22", "total_tokens": 62914560}
{"current_steps": 242, "total_steps": 384, "loss": 0.3047, "learning_rate": 3.0391969980866875e-05, "epoch": 1.877788554801164, "percentage": 63.02, "elapsed_time": "1:25:25", "remaining_time": "0:50:07", "throughput": "12376.20", "total_tokens": 63438848}
{"current_steps": 244, "total_steps": 384, "loss": 0.2958, "learning_rate": 2.963812738142713e-05, "epoch": 1.8933074684772064, "percentage": 63.54, "elapsed_time": "1:26:08", "remaining_time": "0:49:25", "throughput": "12376.13", "total_tokens": 63963136}
{"current_steps": 246, "total_steps": 384, "loss": 0.2598, "learning_rate": 2.888979337815828e-05, "epoch": 1.9088263821532494, "percentage": 64.06, "elapsed_time": "1:26:50", "remaining_time": "0:48:43", "throughput": "12376.10", "total_tokens": 64487424}
{"current_steps": 248, "total_steps": 384, "loss": 0.2699, "learning_rate": 2.8147170421488272e-05, "epoch": 1.924345295829292, "percentage": 64.58, "elapsed_time": "1:27:33", "remaining_time": "0:48:00", "throughput": "12376.09", "total_tokens": 65011712}
{"current_steps": 250, "total_steps": 384, "loss": 0.2827, "learning_rate": 2.7410459416807853e-05, "epoch": 1.9398642095053347, "percentage": 65.1, "elapsed_time": "1:28:15", "remaining_time": "0:47:18", "throughput": "12376.20", "total_tokens": 65536000}
{"current_steps": 252, "total_steps": 384, "loss": 0.3119, "learning_rate": 2.6679859670118783e-05, "epoch": 1.9553831231813774, "percentage": 65.62, "elapsed_time": "1:28:57", "remaining_time": "0:46:35", "throughput": "12376.24", "total_tokens": 66060288}
{"current_steps": 254, "total_steps": 384, "loss": 0.2837, "learning_rate": 2.5955568834114524e-05, "epoch": 1.97090203685742, "percentage": 66.15, "elapsed_time": "1:29:40", "remaining_time": "0:45:53", "throughput": "12376.19", "total_tokens": 66584576}
{"current_steps": 256, "total_steps": 384, "loss": 0.2511, "learning_rate": 2.5237782854708348e-05, "epoch": 1.9864209505334627, "percentage": 66.67, "elapsed_time": "1:30:22", "remaining_time": "0:45:11", "throughput": "12376.12", "total_tokens": 67108864}
{"current_steps": 258, "total_steps": 384, "loss": 0.2501, "learning_rate": 2.452669591802307e-05, "epoch": 2.0019398642095054, "percentage": 67.19, "elapsed_time": "1:31:04", "remaining_time": "0:44:28", "throughput": "12376.06", "total_tokens": 67633152}
{"current_steps": 260, "total_steps": 384, "loss": 0.2296, "learning_rate": 2.3822500397857018e-05, "epoch": 2.017458777885548, "percentage": 67.71, "elapsed_time": "1:31:47", "remaining_time": "0:43:46", "throughput": "12376.11", "total_tokens": 68157440}
{"current_steps": 262, "total_steps": 384, "loss": 0.2333, "learning_rate": 2.3125386803640187e-05, "epoch": 2.0329776915615905, "percentage": 68.23, "elapsed_time": "1:32:29", "remaining_time": "0:43:04", "throughput": "12376.14", "total_tokens": 68681728}
{"current_steps": 264, "total_steps": 384, "loss": 0.2119, "learning_rate": 2.2435543728894792e-05, "epoch": 2.0484966052376334, "percentage": 68.75, "elapsed_time": "1:33:11", "remaining_time": "0:42:21", "throughput": "12376.08", "total_tokens": 69206016}
{"current_steps": 266, "total_steps": 384, "loss": 0.2676, "learning_rate": 2.175315780021411e-05, "epoch": 2.064015518913676, "percentage": 69.27, "elapsed_time": "1:33:54", "remaining_time": "0:41:39", "throughput": "12376.02", "total_tokens": 69730304}
{"current_steps": 268, "total_steps": 384, "loss": 0.2285, "learning_rate": 2.1078413626773546e-05, "epoch": 2.079534432589719, "percentage": 69.79, "elapsed_time": "1:34:36", "remaining_time": "0:40:57", "throughput": "12376.08", "total_tokens": 70254592}
{"current_steps": 270, "total_steps": 384, "loss": 0.2281, "learning_rate": 2.0411493750387423e-05, "epoch": 2.0950533462657615, "percentage": 70.31, "elapsed_time": "1:35:18", "remaining_time": "0:40:14", "throughput": "12376.12", "total_tokens": 70778880}
{"current_steps": 272, "total_steps": 384, "loss": 0.2701, "learning_rate": 1.9752578596124954e-05, "epoch": 2.110572259941804, "percentage": 70.83, "elapsed_time": "1:36:01", "remaining_time": "0:39:32", "throughput": "12376.07", "total_tokens": 71303168}
{"current_steps": 274, "total_steps": 384, "loss": 0.2033, "learning_rate": 1.9101846423499116e-05, "epoch": 2.126091173617847, "percentage": 71.35, "elapsed_time": "1:36:43", "remaining_time": "0:38:49", "throughput": "12375.95", "total_tokens": 71827456}
{"current_steps": 276, "total_steps": 384, "loss": 0.2489, "learning_rate": 1.8459473278241126e-05, "epoch": 2.1416100872938895, "percentage": 71.88, "elapsed_time": "1:37:26", "remaining_time": "0:38:07", "throughput": "12375.99", "total_tokens": 72351744}
{"current_steps": 278, "total_steps": 384, "loss": 0.2294, "learning_rate": 1.7825632944674015e-05, "epoch": 2.157129000969932, "percentage": 72.4, "elapsed_time": "1:38:08", "remaining_time": "0:37:25", "throughput": "12375.97", "total_tokens": 72876032}
{"current_steps": 280, "total_steps": 384, "loss": 0.2452, "learning_rate": 1.7200496898697832e-05, "epoch": 2.172647914645975, "percentage": 72.92, "elapsed_time": "1:38:50", "remaining_time": "0:36:42", "throughput": "12375.92", "total_tokens": 73400320}
{"current_steps": 282, "total_steps": 384, "loss": 0.242, "learning_rate": 1.6584234261399534e-05, "epoch": 2.1881668283220175, "percentage": 73.44, "elapsed_time": "1:39:33", "remaining_time": "0:36:00", "throughput": "12375.92", "total_tokens": 73924608}
{"current_steps": 284, "total_steps": 384, "loss": 0.2894, "learning_rate": 1.5977011753299725e-05, "epoch": 2.20368574199806, "percentage": 73.96, "elapsed_time": "1:40:15", "remaining_time": "0:35:18", "throughput": "12376.01", "total_tokens": 74448896}
{"current_steps": 286, "total_steps": 384, "loss": 0.231, "learning_rate": 1.537899364924905e-05, "epoch": 2.219204655674103, "percentage": 74.48, "elapsed_time": "1:40:57", "remaining_time": "0:34:35", "throughput": "12376.10", "total_tokens": 74973184}
{"current_steps": 288, "total_steps": 384, "loss": 0.2412, "learning_rate": 1.4790341733986085e-05, "epoch": 2.2347235693501455, "percentage": 75.0, "elapsed_time": "1:41:40", "remaining_time": "0:33:53", "throughput": "12376.22", "total_tokens": 75497472}
{"current_steps": 290, "total_steps": 384, "loss": 0.2464, "learning_rate": 1.4211215258368866e-05, "epoch": 2.250242483026188, "percentage": 75.52, "elapsed_time": "1:42:22", "remaining_time": "0:33:11", "throughput": "12376.23", "total_tokens": 76021760}
{"current_steps": 292, "total_steps": 384, "loss": 0.2231, "learning_rate": 1.3641770896292084e-05, "epoch": 2.265761396702231, "percentage": 76.04, "elapsed_time": "1:43:04", "remaining_time": "0:32:28", "throughput": "12376.27", "total_tokens": 76546048}
{"current_steps": 294, "total_steps": 384, "loss": 0.2432, "learning_rate": 1.3082162702301276e-05, "epoch": 2.2812803103782735, "percentage": 76.56, "elapsed_time": "1:43:47", "remaining_time": "0:31:46", "throughput": "12376.31", "total_tokens": 77070336}
{"current_steps": 296, "total_steps": 384, "loss": 0.2147, "learning_rate": 1.253254206991572e-05, "epoch": 2.296799224054316, "percentage": 77.08, "elapsed_time": "1:44:29", "remaining_time": "0:31:03", "throughput": "12376.31", "total_tokens": 77594624}
{"current_steps": 298, "total_steps": 384, "loss": 0.249, "learning_rate": 1.1993057690671173e-05, "epoch": 2.312318137730359, "percentage": 77.6, "elapsed_time": "1:45:11", "remaining_time": "0:30:21", "throughput": "12376.43", "total_tokens": 78118912}
{"current_steps": 300, "total_steps": 384, "loss": 0.2362, "learning_rate": 1.1463855513893695e-05, "epoch": 2.3278370514064015, "percentage": 78.12, "elapsed_time": "1:45:54", "remaining_time": "0:29:39", "throughput": "12376.44", "total_tokens": 78643200}
{"current_steps": 302, "total_steps": 384, "loss": 0.2232, "learning_rate": 1.0945078707215222e-05, "epoch": 2.343355965082444, "percentage": 78.65, "elapsed_time": "1:46:36", "remaining_time": "0:28:56", "throughput": "12376.47", "total_tokens": 79167488}
{"current_steps": 304, "total_steps": 384, "loss": 0.2569, "learning_rate": 1.0436867617841768e-05, "epoch": 2.358874878758487, "percentage": 79.17, "elapsed_time": "1:47:18", "remaining_time": "0:28:14", "throughput": "12376.49", "total_tokens": 79691776}
{"current_steps": 306, "total_steps": 384, "loss": 0.214, "learning_rate": 9.939359734584553e-06, "epoch": 2.3743937924345295, "percentage": 79.69, "elapsed_time": "1:48:01", "remaining_time": "0:27:32", "throughput": "12376.52", "total_tokens": 80216064}
{"current_steps": 308, "total_steps": 384, "loss": 0.2451, "learning_rate": 9.452689650664515e-06, "epoch": 2.3899127061105725, "percentage": 80.21, "elapsed_time": "1:48:43", "remaining_time": "0:26:49", "throughput": "12376.53", "total_tokens": 80740352}
{"current_steps": 310, "total_steps": 384, "loss": 0.2288, "learning_rate": 8.976989027300264e-06, "epoch": 2.405431619786615, "percentage": 80.73, "elapsed_time": "1:49:25", "remaining_time": "0:26:07", "throughput": "12376.63", "total_tokens": 81264640}
{"current_steps": 312, "total_steps": 384, "loss": 0.2332, "learning_rate": 8.51238655808892e-06, "epoch": 2.4209505334626575, "percentage": 81.25, "elapsed_time": "1:50:08", "remaining_time": "0:25:25", "throughput": "12376.58", "total_tokens": 81788928}
{"current_steps": 314, "total_steps": 384, "loss": 0.202, "learning_rate": 8.059007934190194e-06, "epoch": 2.4364694471387, "percentage": 81.77, "elapsed_time": "1:50:50", "remaining_time": "0:24:42", "throughput": "12376.66", "total_tokens": 82313216}
{"current_steps": 316, "total_steps": 384, "loss": 0.227, "learning_rate": 7.61697581032243e-06, "epoch": 2.451988360814743, "percentage": 82.29, "elapsed_time": "1:51:33", "remaining_time": "0:24:00", "throughput": "12376.73", "total_tokens": 82837504}
{"current_steps": 318, "total_steps": 384, "loss": 0.2429, "learning_rate": 7.186409771580354e-06, "epoch": 2.4675072744907856, "percentage": 82.81, "elapsed_time": "1:52:15", "remaining_time": "0:23:17", "throughput": "12376.77", "total_tokens": 83361792}
{"current_steps": 320, "total_steps": 384, "loss": 0.2147, "learning_rate": 6.76742630108298e-06, "epoch": 2.4830261881668285, "percentage": 83.33, "elapsed_time": "1:52:57", "remaining_time": "0:22:35", "throughput": "12376.72", "total_tokens": 83886080}
{"current_steps": 322, "total_steps": 384, "loss": 0.2423, "learning_rate": 6.3601387484610145e-06, "epoch": 2.498545101842871, "percentage": 83.85, "elapsed_time": "1:53:40", "remaining_time": "0:21:53", "throughput": "12376.78", "total_tokens": 84410368}
{"current_steps": 324, "total_steps": 384, "loss": 0.2828, "learning_rate": 5.9646572991917116e-06, "epoch": 2.5140640155189136, "percentage": 84.38, "elapsed_time": "1:54:22", "remaining_time": "0:21:10", "throughput": "12376.78", "total_tokens": 84934656}
{"current_steps": 326, "total_steps": 384, "loss": 0.2461, "learning_rate": 5.581088944789953e-06, "epoch": 2.529582929194956, "percentage": 84.9, "elapsed_time": "1:55:04", "remaining_time": "0:20:28", "throughput": "12376.78", "total_tokens": 85458944}
{"current_steps": 328, "total_steps": 384, "loss": 0.296, "learning_rate": 5.209537453863289e-06, "epoch": 2.545101842870999, "percentage": 85.42, "elapsed_time": "1:55:47", "remaining_time": "0:19:46", "throughput": "12376.71", "total_tokens": 85983232}
{"current_steps": 330, "total_steps": 384, "loss": 0.2061, "learning_rate": 4.850103344038853e-06, "epoch": 2.5606207565470416, "percentage": 85.94, "elapsed_time": "1:56:29", "remaining_time": "0:19:03", "throughput": "12376.82", "total_tokens": 86507520}
{"current_steps": 332, "total_steps": 384, "loss": 0.2323, "learning_rate": 4.502883854769935e-06, "epoch": 2.5761396702230845, "percentage": 86.46, "elapsed_time": "1:57:11", "remaining_time": "0:18:21", "throughput": "12376.90", "total_tokens": 87031808}
{"current_steps": 334, "total_steps": 384, "loss": 0.2156, "learning_rate": 4.167972921029262e-06, "epoch": 2.591658583899127, "percentage": 86.98, "elapsed_time": "1:57:54", "remaining_time": "0:17:39", "throughput": "12376.92", "total_tokens": 87556096}
{"current_steps": 336, "total_steps": 384, "loss": 0.2393, "learning_rate": 3.845461147896323e-06, "epoch": 2.6071774975751696, "percentage": 87.5, "elapsed_time": "1:58:36", "remaining_time": "0:16:56", "throughput": "12376.97", "total_tokens": 88080384}
{"current_steps": 338, "total_steps": 384, "loss": 0.2165, "learning_rate": 3.535435786045538e-06, "epoch": 2.6226964112512126, "percentage": 88.02, "elapsed_time": "1:59:18", "remaining_time": "0:16:14", "throughput": "12377.00", "total_tokens": 88604672}
{"current_steps": 340, "total_steps": 384, "loss": 0.2313, "learning_rate": 3.2379807081419187e-06, "epoch": 2.638215324927255, "percentage": 88.54, "elapsed_time": "2:00:01", "remaining_time": "0:15:31", "throughput": "12377.04", "total_tokens": 89128960}
{"current_steps": 342, "total_steps": 384, "loss": 0.2336, "learning_rate": 2.9531763861505966e-06, "epoch": 2.653734238603298, "percentage": 89.06, "elapsed_time": "2:00:43", "remaining_time": "0:14:49", "throughput": "12377.01", "total_tokens": 89653248}
{"current_steps": 344, "total_steps": 384, "loss": 0.2311, "learning_rate": 2.6810998695663282e-06, "epoch": 2.6692531522793406, "percentage": 89.58, "elapsed_time": "2:01:25", "remaining_time": "0:14:07", "throughput": "12376.99", "total_tokens": 90177536}
{"current_steps": 346, "total_steps": 384, "loss": 0.213, "learning_rate": 2.4218247645689307e-06, "epoch": 2.684772065955383, "percentage": 90.1, "elapsed_time": "2:02:08", "remaining_time": "0:13:24", "throughput": "12376.99", "total_tokens": 90701824}
{"current_steps": 348, "total_steps": 384, "loss": 0.2364, "learning_rate": 2.1754212141102346e-06, "epoch": 2.7002909796314256, "percentage": 90.62, "elapsed_time": "2:02:50", "remaining_time": "0:12:42", "throughput": "12377.02", "total_tokens": 91226112}
{"current_steps": 350, "total_steps": 384, "loss": 0.2147, "learning_rate": 1.941955878938029e-06, "epoch": 2.7158098933074686, "percentage": 91.15, "elapsed_time": "2:03:32", "remaining_time": "0:12:00", "throughput": "12376.97", "total_tokens": 91750400}
{"current_steps": 352, "total_steps": 384, "loss": 0.2316, "learning_rate": 1.7214919195619127e-06, "epoch": 2.731328806983511, "percentage": 91.67, "elapsed_time": "2:04:15", "remaining_time": "0:11:17", "throughput": "12377.00", "total_tokens": 92274688}
{"current_steps": 354, "total_steps": 384, "loss": 0.2263, "learning_rate": 1.514088979166256e-06, "epoch": 2.746847720659554, "percentage": 92.19, "elapsed_time": "2:04:57", "remaining_time": "0:10:35", "throughput": "12376.97", "total_tokens": 92798976}
{"current_steps": 356, "total_steps": 384, "loss": 0.2323, "learning_rate": 1.3198031674745813e-06, "epoch": 2.7623666343355966, "percentage": 92.71, "elapsed_time": "2:05:40", "remaining_time": "0:09:53", "throughput": "12376.99", "total_tokens": 93323264}
{"current_steps": 358, "total_steps": 384, "loss": 0.2246, "learning_rate": 1.138687045569975e-06, "epoch": 2.777885548011639, "percentage": 93.23, "elapsed_time": "2:06:22", "remaining_time": "0:09:10", "throughput": "12376.92", "total_tokens": 93847552}
{"current_steps": 360, "total_steps": 384, "loss": 0.2287, "learning_rate": 9.707896116754488e-07, "epoch": 2.7934044616876816, "percentage": 93.75, "elapsed_time": "2:07:04", "remaining_time": "0:08:28", "throughput": "12376.95", "total_tokens": 94371840}
{"current_steps": 362, "total_steps": 384, "loss": 0.2081, "learning_rate": 8.161562878982398e-07, "epoch": 2.8089233753637246, "percentage": 94.27, "elapsed_time": "2:07:47", "remaining_time": "0:07:45", "throughput": "12376.94", "total_tokens": 94896128}
{"current_steps": 364, "total_steps": 384, "loss": 0.226, "learning_rate": 6.74828907941516e-07, "epoch": 2.824442289039767, "percentage": 94.79, "elapsed_time": "2:08:29", "remaining_time": "0:07:03", "throughput": "12377.00", "total_tokens": 95420416}
{"current_steps": 366, "total_steps": 384, "loss": 0.273, "learning_rate": 5.468457057869358e-07, "epoch": 2.83996120271581, "percentage": 95.31, "elapsed_time": "2:09:11", "remaining_time": "0:06:21", "throughput": "12377.06", "total_tokens": 95944704}
{"current_steps": 368, "total_steps": 384, "loss": 0.2634, "learning_rate": 4.322413053509944e-07, "epoch": 2.8554801163918526, "percentage": 95.83, "elapsed_time": "2:09:54", "remaining_time": "0:05:38", "throughput": "12377.12", "total_tokens": 96468992}
{"current_steps": 370, "total_steps": 384, "loss": 0.2592, "learning_rate": 3.3104671111806593e-07, "epoch": 2.870999030067895, "percentage": 96.35, "elapsed_time": "2:10:36", "remaining_time": "0:04:56", "throughput": "12377.13", "total_tokens": 96993280}
{"current_steps": 372, "total_steps": 384, "loss": 0.2566, "learning_rate": 2.432892997526026e-07, "epoch": 2.8865179437439377, "percentage": 96.88, "elapsed_time": "2:11:18", "remaining_time": "0:04:14", "throughput": "12377.08", "total_tokens": 97517568}
{"current_steps": 374, "total_steps": 384, "loss": 0.2575, "learning_rate": 1.6899281269279755e-07, "epoch": 2.9020368574199806, "percentage": 97.4, "elapsed_time": "2:12:01", "remaining_time": "0:03:31", "throughput": "12377.14", "total_tokens": 98041856}
{"current_steps": 376, "total_steps": 384, "loss": 0.2482, "learning_rate": 1.0817734972768944e-07, "epoch": 2.917555771096023, "percentage": 97.92, "elapsed_time": "2:12:43", "remaining_time": "0:02:49", "throughput": "12377.10", "total_tokens": 98566144}
{"current_steps": 378, "total_steps": 384, "loss": 0.2483, "learning_rate": 6.085936355947897e-08, "epoch": 2.933074684772066, "percentage": 98.44, "elapsed_time": "2:13:25", "remaining_time": "0:02:07", "throughput": "12377.12", "total_tokens": 99090432}
{"current_steps": 380, "total_steps": 384, "loss": 0.2359, "learning_rate": 2.7051655352494652e-08, "epoch": 2.9485935984481086, "percentage": 98.96, "elapsed_time": "2:14:08", "remaining_time": "0:01:24", "throughput": "12377.19", "total_tokens": 99614720}
{"current_steps": 382, "total_steps": 384, "loss": 0.2434, "learning_rate": 6.763371270035457e-09, "epoch": 2.964112512124151, "percentage": 99.48, "elapsed_time": "2:14:50", "remaining_time": "0:00:42", "throughput": "12377.19", "total_tokens": 100139008}
{"current_steps": 384, "total_steps": 384, "loss": 0.2042, "learning_rate": 0.0, "epoch": 2.979631425800194, "percentage": 100.0, "elapsed_time": "2:15:33", "remaining_time": "0:00:00", "throughput": "12377.13", "total_tokens": 100663296}
{"current_steps": 384, "total_steps": 384, "epoch": 2.979631425800194, "percentage": 100.0, "elapsed_time": "2:15:33", "remaining_time": "0:00:00", "throughput": "12375.80", "total_tokens": 100663296}