Phi-3-medium-128K-LoRA / trainer_log.jsonl
alsokit's picture
Upload 17 files
56e5777 verified
{"current_steps": 10, "total_steps": 462, "loss": 0.5099, "learning_rate": 1e-05, "epoch": 0.06472491909385113, "percentage": 2.16, "elapsed_time": "0:08:24", "remaining_time": "6:19:55", "throughput": "2571.26", "total_tokens": 1296736}
{"current_steps": 20, "total_steps": 462, "loss": 0.5115, "learning_rate": 2e-05, "epoch": 0.12944983818770225, "percentage": 4.33, "elapsed_time": "0:17:06", "remaining_time": "6:18:14", "throughput": "2534.92", "total_tokens": 2603088}
{"current_steps": 30, "total_steps": 462, "loss": 0.4846, "learning_rate": 3e-05, "epoch": 0.1941747572815534, "percentage": 6.49, "elapsed_time": "0:25:32", "remaining_time": "6:07:42", "throughput": "2535.35", "total_tokens": 3884384}
{"current_steps": 40, "total_steps": 462, "loss": 0.4076, "learning_rate": 4e-05, "epoch": 0.2588996763754045, "percentage": 8.66, "elapsed_time": "0:34:25", "remaining_time": "6:03:11", "throughput": "2553.97", "total_tokens": 5275424}
{"current_steps": 50, "total_steps": 462, "loss": 0.3073, "learning_rate": 5e-05, "epoch": 0.32362459546925565, "percentage": 10.82, "elapsed_time": "0:43:30", "remaining_time": "5:58:30", "throughput": "2556.06", "total_tokens": 6672576}
{"current_steps": 60, "total_steps": 462, "loss": 0.2516, "learning_rate": 4.992735514089577e-05, "epoch": 0.3883495145631068, "percentage": 12.99, "elapsed_time": "0:51:43", "remaining_time": "5:46:34", "throughput": "2559.12", "total_tokens": 7942496}
{"current_steps": 70, "total_steps": 462, "loss": 0.2256, "learning_rate": 4.970984274562741e-05, "epoch": 0.45307443365695793, "percentage": 15.15, "elapsed_time": "1:00:47", "remaining_time": "5:40:24", "throughput": "2549.95", "total_tokens": 9300240}
{"current_steps": 80, "total_steps": 462, "loss": 0.2146, "learning_rate": 4.934872690677953e-05, "epoch": 0.517799352750809, "percentage": 17.32, "elapsed_time": "1:09:49", "remaining_time": "5:33:26", "throughput": "2547.79", "total_tokens": 10674880}
{"current_steps": 90, "total_steps": 462, "loss": 0.2018, "learning_rate": 4.884610628109082e-05, "epoch": 0.5825242718446602, "percentage": 19.48, "elapsed_time": "1:18:41", "remaining_time": "5:25:16", "throughput": "2552.59", "total_tokens": 12052928}
{"current_steps": 100, "total_steps": 462, "loss": 0.1958, "learning_rate": 4.820490189292415e-05, "epoch": 0.6472491909385113, "percentage": 21.65, "elapsed_time": "1:27:15", "remaining_time": "5:15:53", "throughput": "2556.94", "total_tokens": 13387616}
{"current_steps": 110, "total_steps": 462, "loss": 0.1912, "learning_rate": 4.742884015847436e-05, "epoch": 0.7119741100323624, "percentage": 23.81, "elapsed_time": "1:35:39", "remaining_time": "5:06:06", "throughput": "2557.17", "total_tokens": 14676976}
{"current_steps": 120, "total_steps": 462, "loss": 0.1876, "learning_rate": 4.652243122936986e-05, "epoch": 0.7766990291262136, "percentage": 25.97, "elapsed_time": "1:44:09", "remaining_time": "4:56:51", "throughput": "2558.65", "total_tokens": 15991008}
{"current_steps": 130, "total_steps": 462, "loss": 0.1802, "learning_rate": 4.5490942781526316e-05, "epoch": 0.8414239482200647, "percentage": 28.14, "elapsed_time": "1:52:27", "remaining_time": "4:47:12", "throughput": "2561.14", "total_tokens": 17281840}
{"current_steps": 140, "total_steps": 462, "loss": 0.1793, "learning_rate": 4.434036940158062e-05, "epoch": 0.9061488673139159, "percentage": 30.3, "elapsed_time": "2:01:06", "remaining_time": "4:38:33", "throughput": "2560.01", "total_tokens": 18603200}
{"current_steps": 150, "total_steps": 462, "loss": 0.1759, "learning_rate": 4.307739774881878e-05, "epoch": 0.970873786407767, "percentage": 32.47, "elapsed_time": "2:08:53", "remaining_time": "4:28:06", "throughput": "2565.87", "total_tokens": 19844160}
{"current_steps": 160, "total_steps": 462, "loss": 0.1746, "learning_rate": 4.170936769506222e-05, "epoch": 1.035598705501618, "percentage": 34.63, "elapsed_time": "2:17:10", "remaining_time": "4:18:55", "throughput": "2564.86", "total_tokens": 21111168}
{"current_steps": 170, "total_steps": 462, "loss": 0.1699, "learning_rate": 4.024422966835136e-05, "epoch": 1.1003236245954693, "percentage": 36.8, "elapsed_time": "2:25:52", "remaining_time": "4:10:34", "throughput": "2564.04", "total_tokens": 22442800}
{"current_steps": 180, "total_steps": 462, "loss": 0.168, "learning_rate": 3.86904984483277e-05, "epoch": 1.1650485436893203, "percentage": 38.96, "elapsed_time": "2:35:02", "remaining_time": "4:02:54", "throughput": "2560.03", "total_tokens": 23815072}
{"current_steps": 190, "total_steps": 462, "loss": 0.1646, "learning_rate": 3.7057203681836406e-05, "epoch": 1.2297734627831716, "percentage": 41.13, "elapsed_time": "2:42:57", "remaining_time": "3:53:17", "throughput": "2563.38", "total_tokens": 25064672}
{"current_steps": 200, "total_steps": 462, "loss": 0.1667, "learning_rate": 3.535383740633246e-05, "epoch": 1.2944983818770226, "percentage": 43.29, "elapsed_time": "2:51:22", "remaining_time": "3:44:30", "throughput": "2565.53", "total_tokens": 26380864}
{"current_steps": 210, "total_steps": 462, "loss": 0.1664, "learning_rate": 3.3590298886062833e-05, "epoch": 1.3592233009708738, "percentage": 45.45, "elapsed_time": "3:00:09", "remaining_time": "3:36:11", "throughput": "2565.84", "total_tokens": 27735952}
{"current_steps": 220, "total_steps": 462, "loss": 0.1622, "learning_rate": 3.177683708161389e-05, "epoch": 1.4239482200647249, "percentage": 47.62, "elapsed_time": "3:08:20", "remaining_time": "3:27:10", "throughput": "2565.58", "total_tokens": 28991248}
{"current_steps": 230, "total_steps": 462, "loss": 0.1623, "learning_rate": 2.9923991087167658e-05, "epoch": 1.4886731391585761, "percentage": 49.78, "elapsed_time": "3:16:56", "remaining_time": "3:18:39", "throughput": "2565.43", "total_tokens": 30313776}
{"current_steps": 240, "total_steps": 462, "loss": 0.1616, "learning_rate": 2.804252888162079e-05, "epoch": 1.5533980582524272, "percentage": 51.95, "elapsed_time": "3:25:43", "remaining_time": "3:10:17", "throughput": "2565.51", "total_tokens": 31667088}
{"current_steps": 250, "total_steps": 462, "loss": 0.159, "learning_rate": 2.6143384749519866e-05, "epoch": 1.6181229773462782, "percentage": 54.11, "elapsed_time": "3:33:56", "remaining_time": "3:01:25", "throughput": "2566.31", "total_tokens": 32942832}
{"current_steps": 260, "total_steps": 462, "loss": 0.1619, "learning_rate": 2.423759573549647e-05, "epoch": 1.6828478964401294, "percentage": 56.28, "elapsed_time": "3:42:42", "remaining_time": "2:53:01", "throughput": "2564.17", "total_tokens": 34263264}
{"current_steps": 270, "total_steps": 462, "loss": 0.1616, "learning_rate": 2.23362375015031e-05, "epoch": 1.7475728155339807, "percentage": 58.44, "elapsed_time": "3:51:17", "remaining_time": "2:44:28", "throughput": "2565.53", "total_tokens": 35603952}
{"current_steps": 280, "total_steps": 462, "loss": 0.1604, "learning_rate": 2.0450359959620967e-05, "epoch": 1.8122977346278317, "percentage": 60.61, "elapsed_time": "3:59:33", "remaining_time": "2:35:42", "throughput": "2566.61", "total_tokens": 36890768}
{"current_steps": 290, "total_steps": 462, "loss": 0.1563, "learning_rate": 1.8590923054515503e-05, "epoch": 1.8770226537216828, "percentage": 62.77, "elapsed_time": "4:08:43", "remaining_time": "2:27:31", "throughput": "2564.70", "total_tokens": 38275536}
{"current_steps": 300, "total_steps": 462, "loss": 0.1548, "learning_rate": 1.676873306874547e-05, "epoch": 1.941747572815534, "percentage": 64.94, "elapsed_time": "4:17:35", "remaining_time": "2:19:05", "throughput": "2565.36", "total_tokens": 39648800}
{"current_steps": 310, "total_steps": 462, "loss": 0.1555, "learning_rate": 1.4994379821093049e-05, "epoch": 2.0064724919093853, "percentage": 67.1, "elapsed_time": "4:26:19", "remaining_time": "2:10:35", "throughput": "2565.22", "total_tokens": 40991008}
{"current_steps": 320, "total_steps": 462, "loss": 0.1526, "learning_rate": 1.3278175122892416e-05, "epoch": 2.071197411003236, "percentage": 69.26, "elapsed_time": "4:35:09", "remaining_time": "2:02:05", "throughput": "2564.82", "total_tokens": 42342880}
{"current_steps": 330, "total_steps": 462, "loss": 0.1526, "learning_rate": 1.1630092850023147e-05, "epoch": 2.1359223300970873, "percentage": 71.43, "elapsed_time": "4:43:48", "remaining_time": "1:53:31", "throughput": "2563.60", "total_tokens": 43654784}
{"current_steps": 340, "total_steps": 462, "loss": 0.1516, "learning_rate": 1.005971097884561e-05, "epoch": 2.2006472491909386, "percentage": 73.59, "elapsed_time": "4:52:41", "remaining_time": "1:45:01", "throughput": "2564.75", "total_tokens": 45039744}
{"current_steps": 350, "total_steps": 462, "loss": 0.1524, "learning_rate": 8.576155922941548e-06, "epoch": 2.26537216828479, "percentage": 75.76, "elapsed_time": "5:00:59", "remaining_time": "1:36:18", "throughput": "2565.56", "total_tokens": 46331568}
{"current_steps": 360, "total_steps": 462, "loss": 0.1502, "learning_rate": 7.1880494941517026e-06, "epoch": 2.3300970873786406, "percentage": 77.92, "elapsed_time": "5:09:57", "remaining_time": "1:27:49", "throughput": "2564.08", "total_tokens": 47685808}
{"current_steps": 370, "total_steps": 462, "loss": 0.1506, "learning_rate": 5.903458796151381e-06, "epoch": 2.394822006472492, "percentage": 80.09, "elapsed_time": "5:18:40", "remaining_time": "1:19:14", "throughput": "2563.27", "total_tokens": 49011728}
{"current_steps": 380, "total_steps": 462, "loss": 0.1479, "learning_rate": 4.729849341761602e-06, "epoch": 2.459546925566343, "percentage": 82.25, "elapsed_time": "5:27:36", "remaining_time": "1:10:41", "throughput": "2560.31", "total_tokens": 50327504}
{"current_steps": 390, "total_steps": 462, "loss": 0.1501, "learning_rate": 3.674041666458963e-06, "epoch": 2.524271844660194, "percentage": 84.42, "elapsed_time": "5:35:57", "remaining_time": "1:02:01", "throughput": "2560.79", "total_tokens": 51620256}
{"current_steps": 400, "total_steps": 462, "loss": 0.1504, "learning_rate": 2.7421716902285623e-06, "epoch": 2.588996763754045, "percentage": 86.58, "elapsed_time": "5:44:30", "remaining_time": "0:53:24", "throughput": "2561.70", "total_tokens": 52952880}
{"current_steps": 410, "total_steps": 462, "loss": 0.1524, "learning_rate": 1.939655058120521e-06, "epoch": 2.6537216828478964, "percentage": 88.74, "elapsed_time": "5:52:47", "remaining_time": "0:44:44", "throughput": "2561.78", "total_tokens": 54225632}
{"current_steps": 420, "total_steps": 462, "loss": 0.1494, "learning_rate": 1.271155666748311e-06, "epoch": 2.7184466019417477, "percentage": 90.91, "elapsed_time": "6:01:44", "remaining_time": "0:36:10", "throughput": "2559.88", "total_tokens": 55561888}
{"current_steps": 430, "total_steps": 462, "loss": 0.1515, "learning_rate": 7.405585596397313e-07, "epoch": 2.783171521035599, "percentage": 93.07, "elapsed_time": "6:10:28", "remaining_time": "0:27:34", "throughput": "2559.75", "total_tokens": 56900512}
{"current_steps": 440, "total_steps": 462, "loss": 0.1504, "learning_rate": 3.5094734896174987e-07, "epoch": 2.8478964401294498, "percentage": 95.24, "elapsed_time": "6:18:53", "remaining_time": "0:18:56", "throughput": "2559.97", "total_tokens": 58197632}
{"current_steps": 450, "total_steps": 462, "loss": 0.1498, "learning_rate": 1.0458629483476867e-07, "epoch": 2.912621359223301, "percentage": 97.4, "elapsed_time": "6:27:36", "remaining_time": "0:10:20", "throughput": "2559.45", "total_tokens": 59523808}
{"current_steps": 460, "total_steps": 462, "loss": 0.1469, "learning_rate": 2.9071463840540935e-09, "epoch": 2.9773462783171523, "percentage": 99.57, "elapsed_time": "6:35:45", "remaining_time": "0:01:43", "throughput": "2559.91", "total_tokens": 60787344}
{"current_steps": 462, "total_steps": 462, "epoch": 2.9902912621359223, "percentage": 100.0, "elapsed_time": "6:37:32", "remaining_time": "0:00:00", "throughput": "2560.36", "total_tokens": 61072080}