rbelanec commited on
Commit
d91a13a
verified
1 Parent(s): 71d2830

Training in progress, step 39800

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a60f7d4f447da7e141444485085146501db2197068a3da5e4bced8b61e3bad0
3
  size 18124968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe4d2c2a2ad4d5be81b2bc0e82aadf15b2a926ad2d302d5c5e344877ed7f02a8
3
  size 18124968
trainer_log.jsonl CHANGED
@@ -8116,3 +8116,44 @@
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 10.450046192424443, "percentage": 98.99, "elapsed_time": "14:07:36", "remaining_time": "0:08:40", "throughput": 704.21, "total_tokens": 35813504}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0002, "lr": 1.2397742806111168e-08, "epoch": 10.45136597597994, "percentage": 99.0, "elapsed_time": "14:07:40", "remaining_time": "0:08:33", "throughput": 704.24, "total_tokens": 35817824}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.2110472470521927, "epoch": 10.45136597597994, "percentage": 99.0, "elapsed_time": "14:09:09", "remaining_time": "0:08:34", "throughput": 703.0, "total_tokens": 35817824}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 10.450046192424443, "percentage": 98.99, "elapsed_time": "14:07:36", "remaining_time": "0:08:40", "throughput": 704.21, "total_tokens": 35813504}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0002, "lr": 1.2397742806111168e-08, "epoch": 10.45136597597994, "percentage": 99.0, "elapsed_time": "14:07:40", "remaining_time": "0:08:33", "throughput": 704.24, "total_tokens": 35817824}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.2110472470521927, "epoch": 10.45136597597994, "percentage": 99.0, "elapsed_time": "14:09:09", "remaining_time": "0:08:34", "throughput": 703.0, "total_tokens": 35817824}
8119
+ {"current_steps": 39605, "total_steps": 40000, "loss": 0.0, "lr": 1.209052442764369e-08, "epoch": 10.452685759535436, "percentage": 99.01, "elapsed_time": "14:09:15", "remaining_time": "0:08:28", "throughput": 703.01, "total_tokens": 35822336}
8120
+ {"current_steps": 39610, "total_steps": 40000, "loss": 0.0, "lr": 1.17871594988328e-08, "epoch": 10.454005543090933, "percentage": 99.02, "elapsed_time": "14:09:19", "remaining_time": "0:08:21", "throughput": 703.03, "total_tokens": 35826560}
8121
+ {"current_steps": 39615, "total_steps": 40000, "loss": 0.0, "lr": 1.1487648066466072e-08, "epoch": 10.45532532664643, "percentage": 99.04, "elapsed_time": "14:09:24", "remaining_time": "0:08:15", "throughput": 703.07, "total_tokens": 35831104}
8122
+ {"current_steps": 39620, "total_steps": 40000, "loss": 0.0001, "lr": 1.1191990176728784e-08, "epoch": 10.456645110201928, "percentage": 99.05, "elapsed_time": "14:09:28", "remaining_time": "0:08:08", "throughput": 703.11, "total_tokens": 35836032}
8123
+ {"current_steps": 39625, "total_steps": 40000, "loss": 0.0013, "lr": 1.0900185875215018e-08, "epoch": 10.457964893757424, "percentage": 99.06, "elapsed_time": "14:09:32", "remaining_time": "0:08:02", "throughput": 703.14, "total_tokens": 35840608}
8124
+ {"current_steps": 39630, "total_steps": 40000, "loss": 0.0, "lr": 1.0612235206924891e-08, "epoch": 10.45928467731292, "percentage": 99.08, "elapsed_time": "14:09:36", "remaining_time": "0:07:55", "throughput": 703.17, "total_tokens": 35845280}
8125
+ {"current_steps": 39635, "total_steps": 40000, "loss": 0.0054, "lr": 1.0328138216264549e-08, "epoch": 10.460604460868417, "percentage": 99.09, "elapsed_time": "14:09:40", "remaining_time": "0:07:49", "throughput": 703.21, "total_tokens": 35849984}
8126
+ {"current_steps": 39640, "total_steps": 40000, "loss": 0.0, "lr": 1.004789494704339e-08, "epoch": 10.461924244423914, "percentage": 99.1, "elapsed_time": "14:09:44", "remaining_time": "0:07:43", "throughput": 703.24, "total_tokens": 35854464}
8127
+ {"current_steps": 39645, "total_steps": 40000, "loss": 0.0, "lr": 9.771505442482397e-09, "epoch": 10.463244027979412, "percentage": 99.11, "elapsed_time": "14:09:48", "remaining_time": "0:07:36", "throughput": 703.27, "total_tokens": 35858848}
8128
+ {"current_steps": 39650, "total_steps": 40000, "loss": 0.0003, "lr": 9.498969745200259e-09, "epoch": 10.464563811534909, "percentage": 99.12, "elapsed_time": "14:09:53", "remaining_time": "0:07:30", "throughput": 703.3, "total_tokens": 35863488}
8129
+ {"current_steps": 39655, "total_steps": 40000, "loss": 0.0, "lr": 9.230287897230017e-09, "epoch": 10.465883595090405, "percentage": 99.14, "elapsed_time": "14:09:57", "remaining_time": "0:07:23", "throughput": 703.34, "total_tokens": 35868288}
8130
+ {"current_steps": 39660, "total_steps": 40000, "loss": 0.0, "lr": 8.965459940002419e-09, "epoch": 10.467203378645902, "percentage": 99.15, "elapsed_time": "14:10:01", "remaining_time": "0:07:17", "throughput": 703.37, "total_tokens": 35872832}
8131
+ {"current_steps": 39665, "total_steps": 40000, "loss": 0.0001, "lr": 8.704485914357019e-09, "epoch": 10.468523162201398, "percentage": 99.16, "elapsed_time": "14:10:05", "remaining_time": "0:07:10", "throughput": 703.4, "total_tokens": 35877216}
8132
+ {"current_steps": 39670, "total_steps": 40000, "loss": 0.0, "lr": 8.447365860539402e-09, "epoch": 10.469842945756897, "percentage": 99.17, "elapsed_time": "14:10:09", "remaining_time": "0:07:04", "throughput": 703.43, "total_tokens": 35881824}
8133
+ {"current_steps": 39675, "total_steps": 40000, "loss": 0.0, "lr": 8.194099818201184e-09, "epoch": 10.471162729312393, "percentage": 99.19, "elapsed_time": "14:10:13", "remaining_time": "0:06:57", "throughput": 703.47, "total_tokens": 35886432}
8134
+ {"current_steps": 39680, "total_steps": 40000, "loss": 0.0, "lr": 7.944687826400011e-09, "epoch": 10.47248251286789, "percentage": 99.2, "elapsed_time": "14:10:17", "remaining_time": "0:06:51", "throughput": 703.5, "total_tokens": 35890912}
8135
+ {"current_steps": 39685, "total_steps": 40000, "loss": 0.0, "lr": 7.699129923599557e-09, "epoch": 10.473802296423386, "percentage": 99.21, "elapsed_time": "14:10:22", "remaining_time": "0:06:44", "throughput": 703.53, "total_tokens": 35895648}
8136
+ {"current_steps": 39690, "total_steps": 40000, "loss": 0.0284, "lr": 7.457426147663982e-09, "epoch": 10.475122079978883, "percentage": 99.22, "elapsed_time": "14:10:26", "remaining_time": "0:06:38", "throughput": 703.56, "total_tokens": 35900160}
8137
+ {"current_steps": 39695, "total_steps": 40000, "loss": 0.0, "lr": 7.219576535871797e-09, "epoch": 10.47644186353438, "percentage": 99.24, "elapsed_time": "14:10:30", "remaining_time": "0:06:32", "throughput": 703.59, "total_tokens": 35904576}
8138
+ {"current_steps": 39700, "total_steps": 40000, "loss": 0.0, "lr": 6.985581124896445e-09, "epoch": 10.477761647089878, "percentage": 99.25, "elapsed_time": "14:10:34", "remaining_time": "0:06:25", "throughput": 703.62, "total_tokens": 35909056}
8139
+ {"current_steps": 39705, "total_steps": 40000, "loss": 0.0, "lr": 6.755439950828501e-09, "epoch": 10.479081430645374, "percentage": 99.26, "elapsed_time": "14:10:38", "remaining_time": "0:06:19", "throughput": 703.65, "total_tokens": 35913312}
8140
+ {"current_steps": 39710, "total_steps": 40000, "loss": 0.0027, "lr": 6.5291530491562444e-09, "epoch": 10.480401214200871, "percentage": 99.28, "elapsed_time": "14:10:42", "remaining_time": "0:06:12", "throughput": 703.68, "total_tokens": 35917632}
8141
+ {"current_steps": 39715, "total_steps": 40000, "loss": 0.0, "lr": 6.3067204547739845e-09, "epoch": 10.481720997756367, "percentage": 99.29, "elapsed_time": "14:10:46", "remaining_time": "0:06:06", "throughput": 703.71, "total_tokens": 35922432}
8142
+ {"current_steps": 39720, "total_steps": 40000, "loss": 0.0, "lr": 6.088142201987612e-09, "epoch": 10.483040781311864, "percentage": 99.3, "elapsed_time": "14:10:51", "remaining_time": "0:05:59", "throughput": 703.74, "total_tokens": 35926752}
8143
+ {"current_steps": 39725, "total_steps": 40000, "loss": 0.0, "lr": 5.873418324503499e-09, "epoch": 10.484360564867362, "percentage": 99.31, "elapsed_time": "14:10:55", "remaining_time": "0:05:53", "throughput": 703.77, "total_tokens": 35931328}
8144
+ {"current_steps": 39730, "total_steps": 40000, "loss": 0.0, "lr": 5.6625488554340465e-09, "epoch": 10.485680348422859, "percentage": 99.33, "elapsed_time": "14:10:59", "remaining_time": "0:05:46", "throughput": 703.81, "total_tokens": 35936032}
8145
+ {"current_steps": 39735, "total_steps": 40000, "loss": 0.0, "lr": 5.455533827297688e-09, "epoch": 10.487000131978355, "percentage": 99.34, "elapsed_time": "14:11:03", "remaining_time": "0:05:40", "throughput": 703.84, "total_tokens": 35940768}
8146
+ {"current_steps": 39740, "total_steps": 40000, "loss": 0.0, "lr": 5.252373272018885e-09, "epoch": 10.488319915533852, "percentage": 99.35, "elapsed_time": "14:11:07", "remaining_time": "0:05:34", "throughput": 703.88, "total_tokens": 35945536}
8147
+ {"current_steps": 39745, "total_steps": 40000, "loss": 0.0001, "lr": 5.053067220925356e-09, "epoch": 10.489639699089349, "percentage": 99.36, "elapsed_time": "14:11:11", "remaining_time": "0:05:27", "throughput": 703.91, "total_tokens": 35950016}
8148
+ {"current_steps": 39750, "total_steps": 40000, "loss": 0.002, "lr": 4.857615704759177e-09, "epoch": 10.490959482644847, "percentage": 99.38, "elapsed_time": "14:11:15", "remaining_time": "0:05:21", "throughput": 703.94, "total_tokens": 35954304}
8149
+ {"current_steps": 39755, "total_steps": 40000, "loss": 0.0, "lr": 4.666018753654577e-09, "epoch": 10.492279266200343, "percentage": 99.39, "elapsed_time": "14:11:20", "remaining_time": "0:05:14", "throughput": 703.97, "total_tokens": 35958720}
8150
+ {"current_steps": 39760, "total_steps": 40000, "loss": 0.0, "lr": 4.478276397162917e-09, "epoch": 10.49359904975584, "percentage": 99.4, "elapsed_time": "14:11:24", "remaining_time": "0:05:08", "throughput": 704.0, "total_tokens": 35963328}
8151
+ {"current_steps": 39765, "total_steps": 40000, "loss": 0.0, "lr": 4.294388664233262e-09, "epoch": 10.494918833311337, "percentage": 99.41, "elapsed_time": "14:11:28", "remaining_time": "0:05:01", "throughput": 704.03, "total_tokens": 35967744}
8152
+ {"current_steps": 39770, "total_steps": 40000, "loss": 0.0, "lr": 4.114355583223484e-09, "epoch": 10.496238616866833, "percentage": 99.42, "elapsed_time": "14:11:32", "remaining_time": "0:04:55", "throughput": 704.06, "total_tokens": 35972320}
8153
+ {"current_steps": 39775, "total_steps": 40000, "loss": 0.0, "lr": 3.9381771818974845e-09, "epoch": 10.497558400422331, "percentage": 99.44, "elapsed_time": "14:11:36", "remaining_time": "0:04:49", "throughput": 704.1, "total_tokens": 35976960}
8154
+ {"current_steps": 39780, "total_steps": 40000, "loss": 0.0, "lr": 3.765853487427973e-09, "epoch": 10.498878183977828, "percentage": 99.45, "elapsed_time": "14:11:40", "remaining_time": "0:04:42", "throughput": 704.13, "total_tokens": 35981472}
8155
+ {"current_steps": 39785, "total_steps": 40000, "loss": 0.0, "lr": 3.5973845263825857e-09, "epoch": 10.500197967533325, "percentage": 99.46, "elapsed_time": "14:11:44", "remaining_time": "0:04:36", "throughput": 704.16, "total_tokens": 35985984}
8156
+ {"current_steps": 39790, "total_steps": 40000, "loss": 0.0003, "lr": 3.4327703247488684e-09, "epoch": 10.501517751088821, "percentage": 99.48, "elapsed_time": "14:11:49", "remaining_time": "0:04:29", "throughput": 704.19, "total_tokens": 35990592}
8157
+ {"current_steps": 39795, "total_steps": 40000, "loss": 0.0, "lr": 3.2720109079037443e-09, "epoch": 10.502837534644318, "percentage": 99.49, "elapsed_time": "14:11:53", "remaining_time": "0:04:23", "throughput": 704.22, "total_tokens": 35994880}
8158
+ {"current_steps": 39800, "total_steps": 40000, "loss": 0.0, "lr": 3.1151063006468193e-09, "epoch": 10.504157318199816, "percentage": 99.5, "elapsed_time": "14:11:57", "remaining_time": "0:04:16", "throughput": 704.26, "total_tokens": 35999840}
8159
+ {"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.21051521599292755, "epoch": 10.504157318199816, "percentage": 99.5, "elapsed_time": "14:13:26", "remaining_time": "0:04:17", "throughput": 703.03, "total_tokens": 35999840}