prosecalign
/

phi3m0128-cds-0.8-kendall-onof-decrease-corr-max-2-simpo-max1500-default

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:caccd1d257c08975a4bdce849e481458c3fa7cb875e557131170383c57d0ae00
 size 25200088

 version https://git-lfs.github.com/spec/v1
+oid sha256:50c7e2c3e6fdab2bc4329583f4f85e857cd58124735b41aec54a4fe333a5cb8d
 size 25200088

trainer_log.jsonl CHANGED Viewed

@@ -148,3 +148,9 @@
 {"current_steps": 1240, "total_steps": 1500, "loss": 0.1833, "accuracy": 0.9624999761581421, "learning_rate": 3.615893495987335e-07, "epoch": 1.0653201547056295, "percentage": 82.67, "elapsed_time": "2:43:05", "remaining_time": "0:34:11", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1250, "total_steps": 1500, "loss": 0.1952, "accuracy": 0.9750000238418579, "learning_rate": 3.3493649053890325e-07, "epoch": 1.0739149119037388, "percentage": 83.33, "elapsed_time": "2:44:19", "remaining_time": "0:32:51", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1250, "total_steps": 1500, "eval_loss": 0.21290886402130127, "epoch": 1.0739149119037388, "percentage": 83.33, "elapsed_time": "2:44:45", "remaining_time": "0:32:57", "throughput": "0.00", "total_tokens": 0}

 {"current_steps": 1240, "total_steps": 1500, "loss": 0.1833, "accuracy": 0.9624999761581421, "learning_rate": 3.615893495987335e-07, "epoch": 1.0653201547056295, "percentage": 82.67, "elapsed_time": "2:43:05", "remaining_time": "0:34:11", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1250, "total_steps": 1500, "loss": 0.1952, "accuracy": 0.9750000238418579, "learning_rate": 3.3493649053890325e-07, "epoch": 1.0739149119037388, "percentage": 83.33, "elapsed_time": "2:44:19", "remaining_time": "0:32:51", "throughput": "0.00", "total_tokens": 0}
 {"current_steps": 1250, "total_steps": 1500, "eval_loss": 0.21290886402130127, "epoch": 1.0739149119037388, "percentage": 83.33, "elapsed_time": "2:44:45", "remaining_time": "0:32:57", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1260, "total_steps": 1500, "loss": 0.1889, "accuracy": 0.9375, "learning_rate": 3.092332998903416e-07, "epoch": 1.0825096691018479, "percentage": 84.0, "elapsed_time": "2:46:06", "remaining_time": "0:31:38", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1270, "total_steps": 1500, "loss": 0.1759, "accuracy": 0.9125000238418579, "learning_rate": 2.844910519219632e-07, "epoch": 1.091104426299957, "percentage": 84.67, "elapsed_time": "2:47:20", "remaining_time": "0:30:18", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1280, "total_steps": 1500, "loss": 0.1962, "accuracy": 0.9624999761581421, "learning_rate": 2.6072059940146775e-07, "epoch": 1.0996991834980663, "percentage": 85.33, "elapsed_time": "2:48:36", "remaining_time": "0:28:58", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1290, "total_steps": 1500, "loss": 0.1807, "accuracy": 0.925000011920929, "learning_rate": 2.3793236883495164e-07, "epoch": 1.1082939406961754, "percentage": 86.0, "elapsed_time": "2:49:50", "remaining_time": "0:27:38", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1300, "total_steps": 1500, "loss": 0.2061, "accuracy": 0.949999988079071, "learning_rate": 2.1613635589349756e-07, "epoch": 1.1168886978942845, "percentage": 86.67, "elapsed_time": "2:51:00", "remaining_time": "0:26:18", "throughput": "0.00", "total_tokens": 0}
+{"current_steps": 1300, "total_steps": 1500, "eval_loss": 0.21187786757946014, "epoch": 1.1168886978942845, "percentage": 86.67, "elapsed_time": "2:51:26", "remaining_time": "0:26:22", "throughput": "0.00", "total_tokens": 0}