Upcycled-Qwen1.5-MoE2.7B-LoRA / trainer_log.jsonl
gabrielmbmb's picture
gabrielmbmb HF staff
Upload folder using huggingface_hub
3999164 verified
{"current_steps": 10, "total_steps": 480, "loss": 8.9503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.994647308096509e-05, "epoch": 0.06, "percentage": 2.08, "elapsed_time": "0:01:45", "remaining_time": "1:22:28"}
{"current_steps": 20, "total_steps": 480, "loss": 8.0851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9786121534345265e-05, "epoch": 0.12, "percentage": 4.17, "elapsed_time": "0:03:35", "remaining_time": "1:22:28"}
{"current_steps": 30, "total_steps": 480, "loss": 7.5574, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.951963201008076e-05, "epoch": 0.19, "percentage": 6.25, "elapsed_time": "0:05:25", "remaining_time": "1:21:16"}
{"current_steps": 40, "total_steps": 480, "loss": 6.9461, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.914814565722671e-05, "epoch": 0.25, "percentage": 8.33, "elapsed_time": "0:07:16", "remaining_time": "1:20:00"}
{"current_steps": 50, "total_steps": 480, "loss": 6.1526, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.867325323737765e-05, "epoch": 0.31, "percentage": 10.42, "elapsed_time": "0:09:07", "remaining_time": "1:18:27"}
{"current_steps": 60, "total_steps": 480, "loss": 5.4116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8096988312782174e-05, "epoch": 0.38, "percentage": 12.5, "elapsed_time": "0:10:55", "remaining_time": "1:16:25"}
{"current_steps": 70, "total_steps": 480, "loss": 5.0721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.742181853831721e-05, "epoch": 0.44, "percentage": 14.58, "elapsed_time": "0:12:44", "remaining_time": "1:14:36"}
{"current_steps": 80, "total_steps": 480, "loss": 4.7695, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.665063509461097e-05, "epoch": 0.5, "percentage": 16.67, "elapsed_time": "0:14:35", "remaining_time": "1:12:58"}
{"current_steps": 90, "total_steps": 480, "loss": 4.6179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5786740307563636e-05, "epoch": 0.56, "percentage": 18.75, "elapsed_time": "0:16:28", "remaining_time": "1:11:21"}
{"current_steps": 100, "total_steps": 480, "loss": 4.4848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4833833507280884e-05, "epoch": 0.62, "percentage": 20.83, "elapsed_time": "0:18:21", "remaining_time": "1:09:43"}
{"current_steps": 110, "total_steps": 480, "loss": 4.3627, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.379599518697444e-05, "epoch": 0.69, "percentage": 22.92, "elapsed_time": "0:20:14", "remaining_time": "1:08:05"}
{"current_steps": 120, "total_steps": 480, "loss": 4.3319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.267766952966369e-05, "epoch": 0.75, "percentage": 25.0, "elapsed_time": "0:22:08", "remaining_time": "1:06:26"}
{"current_steps": 130, "total_steps": 480, "loss": 4.269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.148364537750172e-05, "epoch": 0.81, "percentage": 27.08, "elapsed_time": "0:24:02", "remaining_time": "1:04:44"}
{"current_steps": 140, "total_steps": 480, "loss": 4.2557, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.021903572521802e-05, "epoch": 0.88, "percentage": 29.17, "elapsed_time": "0:25:56", "remaining_time": "1:03:00"}
{"current_steps": 150, "total_steps": 480, "loss": 4.1534, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.888925582549006e-05, "epoch": 0.94, "percentage": 31.25, "elapsed_time": "0:27:51", "remaining_time": "1:01:17"}
{"current_steps": 160, "total_steps": 480, "loss": 4.2017, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7500000000000003e-05, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:29:45", "remaining_time": "0:59:30"}
{"current_steps": 170, "total_steps": 480, "loss": 4.1732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6057217255475034e-05, "epoch": 1.06, "percentage": 35.42, "elapsed_time": "0:31:39", "remaining_time": "0:57:44"}
{"current_steps": 180, "total_steps": 480, "loss": 4.1358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.456708580912725e-05, "epoch": 1.12, "percentage": 37.5, "elapsed_time": "0:33:33", "remaining_time": "0:55:56"}
{"current_steps": 190, "total_steps": 480, "loss": 4.1514, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.303598663257904e-05, "epoch": 1.19, "percentage": 39.58, "elapsed_time": "0:35:29", "remaining_time": "0:54:10"}
{"current_steps": 200, "total_steps": 480, "loss": 4.0925, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.147047612756302e-05, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "0:37:25", "remaining_time": "0:52:23"}
{"current_steps": 210, "total_steps": 480, "loss": 4.0496, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9877258050403212e-05, "epoch": 1.31, "percentage": 43.75, "elapsed_time": "0:39:20", "remaining_time": "0:50:35"}
{"current_steps": 220, "total_steps": 480, "loss": 4.0853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8263154805501297e-05, "epoch": 1.38, "percentage": 45.83, "elapsed_time": "0:41:16", "remaining_time": "0:48:46"}
{"current_steps": 230, "total_steps": 480, "loss": 4.1283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.663507823075358e-05, "epoch": 1.44, "percentage": 47.92, "elapsed_time": "0:43:11", "remaining_time": "0:46:56"}
{"current_steps": 240, "total_steps": 480, "loss": 4.0567, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5e-05, "epoch": 1.5, "percentage": 50.0, "elapsed_time": "0:45:07", "remaining_time": "0:45:07"}
{"current_steps": 250, "total_steps": 480, "loss": 4.0141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3364921769246423e-05, "epoch": 1.56, "percentage": 52.08, "elapsed_time": "0:47:03", "remaining_time": "0:43:17"}
{"current_steps": 260, "total_steps": 480, "loss": 4.0356, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.173684519449872e-05, "epoch": 1.62, "percentage": 54.17, "elapsed_time": "0:48:59", "remaining_time": "0:41:27"}
{"current_steps": 270, "total_steps": 480, "loss": 4.082, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0122741949596797e-05, "epoch": 1.69, "percentage": 56.25, "elapsed_time": "0:50:56", "remaining_time": "0:39:36"}
{"current_steps": 280, "total_steps": 480, "loss": 4.0767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.852952387243698e-05, "epoch": 1.75, "percentage": 58.33, "elapsed_time": "0:52:52", "remaining_time": "0:37:46"}
{"current_steps": 290, "total_steps": 480, "loss": 3.9817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6964013367420966e-05, "epoch": 1.81, "percentage": 60.42, "elapsed_time": "0:54:49", "remaining_time": "0:35:55"}
{"current_steps": 300, "total_steps": 480, "loss": 4.0618, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5432914190872757e-05, "epoch": 1.88, "percentage": 62.5, "elapsed_time": "0:56:46", "remaining_time": "0:34:03"}
{"current_steps": 310, "total_steps": 480, "loss": 4.0106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3942782744524973e-05, "epoch": 1.94, "percentage": 64.58, "elapsed_time": "0:58:43", "remaining_time": "0:32:12"}
{"current_steps": 320, "total_steps": 480, "loss": 4.0549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2500000000000006e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:00:39", "remaining_time": "0:30:19"}
{"current_steps": 330, "total_steps": 480, "loss": 3.919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1110744174509952e-05, "epoch": 2.06, "percentage": 68.75, "elapsed_time": "1:02:36", "remaining_time": "0:28:27"}
{"current_steps": 340, "total_steps": 480, "loss": 4.053, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.780964274781984e-06, "epoch": 2.12, "percentage": 70.83, "elapsed_time": "1:04:33", "remaining_time": "0:26:34"}
{"current_steps": 350, "total_steps": 480, "loss": 4.0032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.51635462249828e-06, "epoch": 2.19, "percentage": 72.92, "elapsed_time": "1:06:30", "remaining_time": "0:24:42"}
{"current_steps": 360, "total_steps": 480, "loss": 3.991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.3223304703363135e-06, "epoch": 2.25, "percentage": 75.0, "elapsed_time": "1:08:27", "remaining_time": "0:22:49"}
{"current_steps": 370, "total_steps": 480, "loss": 3.9941, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.204004813025568e-06, "epoch": 2.31, "percentage": 77.08, "elapsed_time": "1:10:25", "remaining_time": "0:20:56"}
{"current_steps": 380, "total_steps": 480, "loss": 4.0388, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.166166492719124e-06, "epoch": 2.38, "percentage": 79.17, "elapsed_time": "1:12:22", "remaining_time": "0:19:02"}
{"current_steps": 390, "total_steps": 480, "loss": 4.0767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.213259692436367e-06, "epoch": 2.44, "percentage": 81.25, "elapsed_time": "1:14:20", "remaining_time": "0:17:09"}
{"current_steps": 400, "total_steps": 480, "loss": 3.9709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3493649053890326e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "1:16:16", "remaining_time": "0:15:15"}
{"current_steps": 410, "total_steps": 480, "loss": 3.9714, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.578181461682794e-06, "epoch": 2.56, "percentage": 85.42, "elapsed_time": "1:18:12", "remaining_time": "0:13:21"}
{"current_steps": 420, "total_steps": 480, "loss": 4.0366, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9030116872178316e-06, "epoch": 2.62, "percentage": 87.5, "elapsed_time": "1:20:10", "remaining_time": "0:11:27"}
{"current_steps": 430, "total_steps": 480, "loss": 3.9307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3267467626223606e-06, "epoch": 2.69, "percentage": 89.58, "elapsed_time": "1:22:07", "remaining_time": "0:09:33"}
{"current_steps": 440, "total_steps": 480, "loss": 4.0751, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.51854342773295e-07, "epoch": 2.75, "percentage": 91.67, "elapsed_time": "1:24:05", "remaining_time": "0:07:38"}
{"current_steps": 450, "total_steps": 480, "loss": 4.0141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.803679899192392e-07, "epoch": 2.81, "percentage": 93.75, "elapsed_time": "1:26:02", "remaining_time": "0:05:44"}
{"current_steps": 460, "total_steps": 480, "loss": 3.9676, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1387846565474045e-07, "epoch": 2.88, "percentage": 95.83, "elapsed_time": "1:27:59", "remaining_time": "0:03:49"}
{"current_steps": 470, "total_steps": 480, "loss": 4.0041, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.352691903491303e-08, "epoch": 2.94, "percentage": 97.92, "elapsed_time": "1:29:56", "remaining_time": "0:01:54"}
{"current_steps": 480, "total_steps": 480, "loss": 3.909, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:31:52", "remaining_time": "0:00:00"}
{"current_steps": 480, "total_steps": 480, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:31:52", "remaining_time": "0:00:00"}