ben81828 commited on
Commit
a488eaf
1 Parent(s): 6e7604d

Training in progress, step 14000

Browse files
adapter_config.json CHANGED
@@ -19,7 +19,7 @@
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
- "target_modules": "^(?!.*patch_embed).*(?:fc1|proj|down_proj|v_proj|k_proj|gate_proj|fc2|up_proj|q_proj|qkv|o_proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
 
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
+ "target_modules": "^(?!.*patch_embed).*(?:q_proj|down_proj|v_proj|k_proj|proj|gate_proj|fc2|qkv|fc1|up_proj|o_proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:842d19408675a89febd630114553cd9b4d98a4002c752e3bccbc5e07a6b917e4
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82daaadabd3d76d3fe195abd053ac405d68cd4f117d0902ce6fb7c2a017220ff
3
  size 29034840
trainer_log.jsonl CHANGED
@@ -3099,3 +3099,19 @@
3099
  {"current_steps": 13945, "total_steps": 16324, "loss": 0.1866, "lr": 5.6957356636411606e-06, "epoch": 1.7084622216777434, "percentage": 85.43, "elapsed_time": "20:07:39", "remaining_time": "3:26:01", "throughput": 1287.56, "total_tokens": 93295648}
3100
  {"current_steps": 13950, "total_steps": 16324, "loss": 0.2211, "lr": 5.67228193452185e-06, "epoch": 1.7090747603442469, "percentage": 85.46, "elapsed_time": "20:08:39", "remaining_time": "3:25:41", "throughput": 1286.94, "total_tokens": 93328616}
3101
  {"current_steps": 13950, "total_steps": 16324, "eval_loss": 0.10547046363353729, "epoch": 1.7090747603442469, "percentage": 85.46, "elapsed_time": "20:08:59", "remaining_time": "3:25:44", "throughput": 1286.59, "total_tokens": 93328616}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3099
  {"current_steps": 13945, "total_steps": 16324, "loss": 0.1866, "lr": 5.6957356636411606e-06, "epoch": 1.7084622216777434, "percentage": 85.43, "elapsed_time": "20:07:39", "remaining_time": "3:26:01", "throughput": 1287.56, "total_tokens": 93295648}
3100
  {"current_steps": 13950, "total_steps": 16324, "loss": 0.2211, "lr": 5.67228193452185e-06, "epoch": 1.7090747603442469, "percentage": 85.46, "elapsed_time": "20:08:39", "remaining_time": "3:25:41", "throughput": 1286.94, "total_tokens": 93328616}
3101
  {"current_steps": 13950, "total_steps": 16324, "eval_loss": 0.10547046363353729, "epoch": 1.7090747603442469, "percentage": 85.46, "elapsed_time": "20:08:59", "remaining_time": "3:25:44", "throughput": 1286.59, "total_tokens": 93328616}
3102
+ {"current_steps": 13955, "total_steps": 16324, "loss": 0.195, "lr": 5.648873689493367e-06, "epoch": 1.7096872990107501, "percentage": 85.49, "elapsed_time": "20:10:04", "remaining_time": "3:25:25", "throughput": 1285.9, "total_tokens": 93362448}
3103
+ {"current_steps": 13960, "total_steps": 16324, "loss": 0.19, "lr": 5.625510952574614e-06, "epoch": 1.7102998376772534, "percentage": 85.52, "elapsed_time": "20:11:04", "remaining_time": "3:25:05", "throughput": 1285.31, "total_tokens": 93396232}
3104
+ {"current_steps": 13965, "total_steps": 16324, "loss": 0.2068, "lr": 5.602193747737766e-06, "epoch": 1.7109123763437566, "percentage": 85.55, "elapsed_time": "20:12:05", "remaining_time": "3:24:44", "throughput": 1284.69, "total_tokens": 93429472}
3105
+ {"current_steps": 13970, "total_steps": 16324, "loss": 0.2117, "lr": 5.578922098908313e-06, "epoch": 1.71152491501026, "percentage": 85.58, "elapsed_time": "20:13:04", "remaining_time": "3:24:24", "throughput": 1284.1, "total_tokens": 93463024}
3106
+ {"current_steps": 13975, "total_steps": 16324, "loss": 0.2151, "lr": 5.555696029965008e-06, "epoch": 1.7121374536767635, "percentage": 85.61, "elapsed_time": "20:14:04", "remaining_time": "3:24:04", "throughput": 1283.52, "total_tokens": 93496712}
3107
+ {"current_steps": 13955, "total_steps": 16324, "loss": 0.1962, "lr": 5.648873689493367e-06, "epoch": 1.7096872990107501, "percentage": 85.49, "elapsed_time": "0:02:15", "remaining_time": "0:00:23", "throughput": 687931.8, "total_tokens": 93362448}
3108
+ {"current_steps": 13960, "total_steps": 16324, "loss": 0.1886, "lr": 5.625510952574614e-06, "epoch": 1.7102998376772534, "percentage": 85.52, "elapsed_time": "0:03:14", "remaining_time": "0:00:32", "throughput": 479315.28, "total_tokens": 93396232}
3109
+ {"current_steps": 13965, "total_steps": 16324, "loss": 0.2069, "lr": 5.602193747737766e-06, "epoch": 1.7109123763437566, "percentage": 85.55, "elapsed_time": "0:04:13", "remaining_time": "0:00:42", "throughput": 368796.47, "total_tokens": 93429472}
3110
+ {"current_steps": 13970, "total_steps": 16324, "loss": 0.2115, "lr": 5.578922098908313e-06, "epoch": 1.71152491501026, "percentage": 85.58, "elapsed_time": "0:05:12", "remaining_time": "0:00:52", "throughput": 299499.76, "total_tokens": 93463024}
3111
+ {"current_steps": 13975, "total_steps": 16324, "loss": 0.2147, "lr": 5.555696029965008e-06, "epoch": 1.7121374536767635, "percentage": 85.61, "elapsed_time": "0:06:12", "remaining_time": "0:01:02", "throughput": 251246.74, "total_tokens": 93496712}
3112
+ {"current_steps": 13980, "total_steps": 16324, "loss": 0.1828, "lr": 5.532515564739782e-06, "epoch": 1.7127499923432667, "percentage": 85.64, "elapsed_time": "0:07:10", "remaining_time": "0:01:12", "throughput": 217164.88, "total_tokens": 93530696}
3113
+ {"current_steps": 13985, "total_steps": 16324, "loss": 0.2053, "lr": 5.509380727017838e-06, "epoch": 1.71336253100977, "percentage": 85.67, "elapsed_time": "0:08:08", "remaining_time": "0:01:21", "throughput": 191538.07, "total_tokens": 93564600}
3114
+ {"current_steps": 13990, "total_steps": 16324, "loss": 0.2141, "lr": 5.486291540537502e-06, "epoch": 1.7139750696762732, "percentage": 85.7, "elapsed_time": "0:09:07", "remaining_time": "0:01:31", "throughput": 170921.4, "total_tokens": 93598104}
3115
+ {"current_steps": 13995, "total_steps": 16324, "loss": 0.209, "lr": 5.463248028990314e-06, "epoch": 1.7145876083427767, "percentage": 85.73, "elapsed_time": "0:10:05", "remaining_time": "0:01:40", "throughput": 154669.51, "total_tokens": 93632056}
3116
+ {"current_steps": 14000, "total_steps": 16324, "loss": 0.1834, "lr": 5.440250216020892e-06, "epoch": 1.71520014700928, "percentage": 85.76, "elapsed_time": "0:11:02", "remaining_time": "0:01:50", "throughput": 141297.07, "total_tokens": 93666120}
3117
+ {"current_steps": 14000, "total_steps": 16324, "eval_loss": 0.11137460172176361, "epoch": 1.71520014700928, "percentage": 85.76, "elapsed_time": "0:11:49", "remaining_time": "0:01:57", "throughput": 132003.38, "total_tokens": 93666120}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd6b407c58d92cb06e29ce14f60edd12858669cee4d66ecc849960861e23ddfc
3
  size 7480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d51c48e0ccbe31be89b570cbf6ae244b8dae4eca073bac488dbc23b6892eb7c1
3
  size 7480