rbelanec commited on
Commit
77e6497
verified
1 Parent(s): cad1d0b

Training in progress, step 39400

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd43b691acd8093caca4884390fe6ea1227fe0595b216975e280e465d819cf1e
3
  size 460928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b33c2e0d57dc724d86cdd27a525c80e21278244ee4d1369e41eb0300906ec592
3
  size 460928
trainer_log.jsonl CHANGED
@@ -8034,3 +8034,44 @@
8034
  {"current_steps": 39195, "total_steps": 40000, "loss": 0.0, "lr": 0.0003004457579719011, "epoch": 277.98395721925135, "percentage": 97.99, "elapsed_time": "4:46:31", "remaining_time": "0:05:53", "throughput": 5820.81, "total_tokens": 100067176}
8035
  {"current_steps": 39200, "total_steps": 40000, "loss": 0.0, "lr": 0.00029673093086405867, "epoch": 278.01426024955435, "percentage": 98.0, "elapsed_time": "4:46:33", "remaining_time": "0:05:50", "throughput": 5820.69, "total_tokens": 100077240}
8036
  {"current_steps": 39200, "total_steps": 40000, "eval_loss": 0.7423530220985413, "epoch": 278.01426024955435, "percentage": 98.0, "elapsed_time": "4:46:36", "remaining_time": "0:05:50", "throughput": 5819.58, "total_tokens": 100077240}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8034
  {"current_steps": 39195, "total_steps": 40000, "loss": 0.0, "lr": 0.0003004457579719011, "epoch": 277.98395721925135, "percentage": 97.99, "elapsed_time": "4:46:31", "remaining_time": "0:05:53", "throughput": 5820.81, "total_tokens": 100067176}
8035
  {"current_steps": 39200, "total_steps": 40000, "loss": 0.0, "lr": 0.00029673093086405867, "epoch": 278.01426024955435, "percentage": 98.0, "elapsed_time": "4:46:33", "remaining_time": "0:05:50", "throughput": 5820.69, "total_tokens": 100077240}
8036
  {"current_steps": 39200, "total_steps": 40000, "eval_loss": 0.7423530220985413, "epoch": 278.01426024955435, "percentage": 98.0, "elapsed_time": "4:46:36", "remaining_time": "0:05:50", "throughput": 5819.58, "total_tokens": 100077240}
8037
+ {"current_steps": 39205, "total_steps": 40000, "loss": 0.0, "lr": 0.00029303918988159426, "epoch": 278.0499108734403, "percentage": 98.01, "elapsed_time": "4:46:40", "remaining_time": "0:05:48", "throughput": 5819.1, "total_tokens": 100089848}
8038
+ {"current_steps": 39210, "total_steps": 40000, "loss": 0.0, "lr": 0.0002893705355938192, "epoch": 278.0855614973262, "percentage": 98.02, "elapsed_time": "4:46:42", "remaining_time": "0:05:46", "throughput": 5819.11, "total_tokens": 100101784}
8039
+ {"current_steps": 39215, "total_steps": 40000, "loss": 0.0, "lr": 0.0002857249685664975, "epoch": 278.1212121212121, "percentage": 98.04, "elapsed_time": "4:46:44", "remaining_time": "0:05:44", "throughput": 5819.1, "total_tokens": 100113624}
8040
+ {"current_steps": 39220, "total_steps": 40000, "loss": 0.0, "lr": 0.0002821024893618129, "epoch": 278.15686274509807, "percentage": 98.05, "elapsed_time": "4:46:46", "remaining_time": "0:05:42", "throughput": 5819.08, "total_tokens": 100124984}
8041
+ {"current_steps": 39225, "total_steps": 40000, "loss": 0.0, "lr": 0.0002785030985383852, "epoch": 278.19251336898395, "percentage": 98.06, "elapsed_time": "4:46:48", "remaining_time": "0:05:39", "throughput": 5819.08, "total_tokens": 100136888}
8042
+ {"current_steps": 39230, "total_steps": 40000, "loss": 0.0, "lr": 0.00027492679665130356, "epoch": 278.2281639928699, "percentage": 98.08, "elapsed_time": "4:46:50", "remaining_time": "0:05:37", "throughput": 5819.18, "total_tokens": 100150936}
8043
+ {"current_steps": 39235, "total_steps": 40000, "loss": 0.0, "lr": 0.000271373584252077, "epoch": 278.2638146167558, "percentage": 98.09, "elapsed_time": "4:46:52", "remaining_time": "0:05:35", "throughput": 5819.23, "total_tokens": 100163768}
8044
+ {"current_steps": 39240, "total_steps": 40000, "loss": 0.0, "lr": 0.00026784346188865046, "epoch": 278.2994652406417, "percentage": 98.1, "elapsed_time": "4:46:54", "remaining_time": "0:05:33", "throughput": 5819.28, "total_tokens": 100176792}
8045
+ {"current_steps": 39245, "total_steps": 40000, "loss": 0.0, "lr": 0.0002643364301054218, "epoch": 278.3351158645276, "percentage": 98.11, "elapsed_time": "4:46:56", "remaining_time": "0:05:31", "throughput": 5819.37, "total_tokens": 100190648}
8046
+ {"current_steps": 39250, "total_steps": 40000, "loss": 0.0, "lr": 0.0002608524894431918, "epoch": 278.37076648841355, "percentage": 98.12, "elapsed_time": "4:46:58", "remaining_time": "0:05:29", "throughput": 5819.46, "total_tokens": 100204376}
8047
+ {"current_steps": 39255, "total_steps": 40000, "loss": 0.0, "lr": 0.000257391640439264, "epoch": 278.4064171122995, "percentage": 98.14, "elapsed_time": "4:47:00", "remaining_time": "0:05:26", "throughput": 5819.53, "total_tokens": 100217688}
8048
+ {"current_steps": 39260, "total_steps": 40000, "loss": 0.0, "lr": 0.00025395388362732806, "epoch": 278.4420677361854, "percentage": 98.15, "elapsed_time": "4:47:02", "remaining_time": "0:05:24", "throughput": 5819.59, "total_tokens": 100230680}
8049
+ {"current_steps": 39265, "total_steps": 40000, "loss": 0.0, "lr": 0.00025053921953751, "epoch": 278.4777183600713, "percentage": 98.16, "elapsed_time": "4:47:05", "remaining_time": "0:05:22", "throughput": 5819.63, "total_tokens": 100243448}
8050
+ {"current_steps": 39270, "total_steps": 40000, "loss": 0.0, "lr": 0.00024714764869643855, "epoch": 278.5133689839572, "percentage": 98.17, "elapsed_time": "4:47:07", "remaining_time": "0:05:20", "throughput": 5819.67, "total_tokens": 100256024}
8051
+ {"current_steps": 39275, "total_steps": 40000, "loss": 0.0, "lr": 0.0002437791716270954, "epoch": 278.54901960784315, "percentage": 98.19, "elapsed_time": "4:47:09", "remaining_time": "0:05:18", "throughput": 5819.67, "total_tokens": 100267832}
8052
+ {"current_steps": 39280, "total_steps": 40000, "loss": 0.0, "lr": 0.00024043378884896493, "epoch": 278.58467023172904, "percentage": 98.2, "elapsed_time": "4:47:11", "remaining_time": "0:05:15", "throughput": 5819.66, "total_tokens": 100279320}
8053
+ {"current_steps": 39285, "total_steps": 40000, "loss": 0.0, "lr": 0.00023711150087793453, "epoch": 278.620320855615, "percentage": 98.21, "elapsed_time": "4:47:13", "remaining_time": "0:05:13", "throughput": 5819.78, "total_tokens": 100293944}
8054
+ {"current_steps": 39290, "total_steps": 40000, "loss": 0.0, "lr": 0.000233812308226361, "epoch": 278.65597147950086, "percentage": 98.22, "elapsed_time": "4:47:15", "remaining_time": "0:05:11", "throughput": 5819.88, "total_tokens": 100308024}
8055
+ {"current_steps": 39295, "total_steps": 40000, "loss": 0.0, "lr": 0.00023053621140300406, "epoch": 278.6916221033868, "percentage": 98.24, "elapsed_time": "4:47:17", "remaining_time": "0:05:09", "throughput": 5819.9, "total_tokens": 100320312}
8056
+ {"current_steps": 39300, "total_steps": 40000, "loss": 0.0, "lr": 0.00022728321091307623, "epoch": 278.72727272727275, "percentage": 98.25, "elapsed_time": "4:47:19", "remaining_time": "0:05:07", "throughput": 5819.94, "total_tokens": 100333016}
8057
+ {"current_steps": 39305, "total_steps": 40000, "loss": 0.0, "lr": 0.0002240533072582429, "epoch": 278.76292335115863, "percentage": 98.26, "elapsed_time": "4:47:21", "remaining_time": "0:05:04", "throughput": 5819.98, "total_tokens": 100345624}
8058
+ {"current_steps": 39310, "total_steps": 40000, "loss": 0.0, "lr": 0.00022084650093658897, "epoch": 278.7985739750446, "percentage": 98.28, "elapsed_time": "4:47:23", "remaining_time": "0:05:02", "throughput": 5819.95, "total_tokens": 100356824}
8059
+ {"current_steps": 39315, "total_steps": 40000, "loss": 0.0, "lr": 0.0002176627924426522, "epoch": 278.83422459893046, "percentage": 98.29, "elapsed_time": "4:47:25", "remaining_time": "0:05:00", "throughput": 5820.03, "total_tokens": 100370328}
8060
+ {"current_steps": 39320, "total_steps": 40000, "loss": 0.0, "lr": 0.0002145021822673898, "epoch": 278.8698752228164, "percentage": 98.3, "elapsed_time": "4:47:27", "remaining_time": "0:04:58", "throughput": 5820.11, "total_tokens": 100383864}
8061
+ {"current_steps": 39325, "total_steps": 40000, "loss": 0.0, "lr": 0.00021136467089822862, "epoch": 278.9055258467023, "percentage": 98.31, "elapsed_time": "4:47:29", "remaining_time": "0:04:56", "throughput": 5820.19, "total_tokens": 100397464}
8062
+ {"current_steps": 39330, "total_steps": 40000, "loss": 0.0, "lr": 0.00020825025881898162, "epoch": 278.94117647058823, "percentage": 98.32, "elapsed_time": "4:47:31", "remaining_time": "0:04:53", "throughput": 5820.19, "total_tokens": 100409432}
8063
+ {"current_steps": 39335, "total_steps": 40000, "loss": 0.0, "lr": 0.0002051589465099479, "epoch": 278.9768270944742, "percentage": 98.34, "elapsed_time": "4:47:33", "remaining_time": "0:04:51", "throughput": 5820.23, "total_tokens": 100422008}
8064
+ {"current_steps": 39340, "total_steps": 40000, "loss": 0.0, "lr": 0.0002020907344478462, "epoch": 279.0071301247772, "percentage": 98.35, "elapsed_time": "4:47:36", "remaining_time": "0:04:49", "throughput": 5820.13, "total_tokens": 100432304}
8065
+ {"current_steps": 39345, "total_steps": 40000, "loss": 0.0, "lr": 0.0001990456231058313, "epoch": 279.0427807486631, "percentage": 98.36, "elapsed_time": "4:47:38", "remaining_time": "0:04:47", "throughput": 5820.18, "total_tokens": 100445232}
8066
+ {"current_steps": 39350, "total_steps": 40000, "loss": 0.0, "lr": 0.00019602361295349423, "epoch": 279.078431372549, "percentage": 98.38, "elapsed_time": "4:47:40", "remaining_time": "0:04:45", "throughput": 5820.22, "total_tokens": 100458096}
8067
+ {"current_steps": 39355, "total_steps": 40000, "loss": 0.0, "lr": 0.0001930247044568789, "epoch": 279.11408199643495, "percentage": 98.39, "elapsed_time": "4:47:42", "remaining_time": "0:04:42", "throughput": 5820.28, "total_tokens": 100471152}
8068
+ {"current_steps": 39360, "total_steps": 40000, "loss": 0.0, "lr": 0.00019004889807843205, "epoch": 279.14973262032083, "percentage": 98.4, "elapsed_time": "4:47:44", "remaining_time": "0:04:40", "throughput": 5820.28, "total_tokens": 100483088}
8069
+ {"current_steps": 39365, "total_steps": 40000, "loss": 0.0, "lr": 0.00018709619427708656, "epoch": 279.1853832442068, "percentage": 98.41, "elapsed_time": "4:47:46", "remaining_time": "0:04:38", "throughput": 5820.34, "total_tokens": 100496240}
8070
+ {"current_steps": 39370, "total_steps": 40000, "loss": 0.0, "lr": 0.00018416659350817822, "epoch": 279.2210338680927, "percentage": 98.42, "elapsed_time": "4:47:48", "remaining_time": "0:04:36", "throughput": 5820.39, "total_tokens": 100509200}
8071
+ {"current_steps": 39375, "total_steps": 40000, "loss": 0.0, "lr": 0.00018126009622346229, "epoch": 279.2566844919786, "percentage": 98.44, "elapsed_time": "4:47:50", "remaining_time": "0:04:34", "throughput": 5820.32, "total_tokens": 100521136}
8072
+ {"current_steps": 39380, "total_steps": 40000, "loss": 0.0, "lr": 0.00017837670287119687, "epoch": 279.29233511586455, "percentage": 98.45, "elapsed_time": "4:47:52", "remaining_time": "0:04:31", "throughput": 5820.35, "total_tokens": 100533616}
8073
+ {"current_steps": 39385, "total_steps": 40000, "loss": 0.0, "lr": 0.00017551641389602633, "epoch": 279.32798573975043, "percentage": 98.46, "elapsed_time": "4:47:54", "remaining_time": "0:04:29", "throughput": 5820.42, "total_tokens": 100546864}
8074
+ {"current_steps": 39390, "total_steps": 40000, "loss": 0.0, "lr": 0.00017267922973903115, "epoch": 279.3636363636364, "percentage": 98.47, "elapsed_time": "4:47:56", "remaining_time": "0:04:27", "throughput": 5820.42, "total_tokens": 100558800}
8075
+ {"current_steps": 39395, "total_steps": 40000, "loss": 0.0, "lr": 0.00016986515083774467, "epoch": 279.39928698752226, "percentage": 98.49, "elapsed_time": "4:47:58", "remaining_time": "0:04:25", "throughput": 5820.45, "total_tokens": 100571184}
8076
+ {"current_steps": 39400, "total_steps": 40000, "loss": 0.0, "lr": 0.00016707417762611975, "epoch": 279.4349376114082, "percentage": 98.5, "elapsed_time": "4:48:01", "remaining_time": "0:04:23", "throughput": 5820.55, "total_tokens": 100585296}
8077
+ {"current_steps": 39400, "total_steps": 40000, "eval_loss": 0.7336431741714478, "epoch": 279.4349376114082, "percentage": 98.5, "elapsed_time": "4:48:04", "remaining_time": "0:04:23", "throughput": 5819.45, "total_tokens": 100585296}