rbelanec commited on
Commit
bc77551
·
verified ·
1 Parent(s): 36c8034

Training in progress, step 7

Browse files
adapter_config.json CHANGED
@@ -2,17 +2,15 @@
2
  "auto_mapping": null,
3
  "base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
4
  "inference_mode": true,
5
- "modules_to_save": null,
6
- "num_attention_heads": 32,
7
- "num_layers": 16,
8
- "num_transformer_submodules": 1,
9
- "num_virtual_tokens": 100,
10
- "peft_type": "PROMPT_TUNING",
11
- "prompt_tuning_init": "RANDOM",
12
- "prompt_tuning_init_text": null,
13
  "revision": null,
14
- "task_type": "CAUSAL_LM",
15
- "token_dim": 2048,
16
- "tokenizer_kwargs": null,
17
- "tokenizer_name_or_path": null
 
 
 
 
 
18
  }
 
2
  "auto_mapping": null,
3
  "base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
4
  "inference_mode": true,
5
+ "peft_type": "BITFIT",
 
 
 
 
 
 
 
6
  "revision": null,
7
+ "target_modules": [
8
+ "o_proj",
9
+ "v_proj",
10
+ "up_proj",
11
+ "down_proj",
12
+ "q_proj",
13
+ "k_proj"
14
+ ],
15
+ "task_type": "CAUSAL_LM"
16
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:381ac486feb832b85b5ae58370baf8911395fc6fd8877adb7d2c163c189028ff
3
- size 409728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3600bb7753b69736bac46476f75e761b3238d11469128d2998224056aa029f5b
3
+ size 2818586248
trainer_log.jsonl CHANGED
@@ -1,36 +1,9 @@
1
- {"current_steps": 5, "total_steps": 125, "loss": 0.6336, "lr": 0.009230769230769232, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:01", "remaining_time": "0:00:29", "throughput": 1768.9, "total_tokens": 2144}
2
- {"current_steps": 7, "total_steps": 125, "eval_loss": 17.433813095092773, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:00:02", "remaining_time": "0:00:33", "throughput": 1434.46, "total_tokens": 2880}
3
- {"current_steps": 10, "total_steps": 125, "loss": 14.0966, "lr": 0.02076923076923077, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:00:03", "remaining_time": "0:00:39", "throughput": 1194.47, "total_tokens": 4128}
4
- {"current_steps": 14, "total_steps": 125, "eval_loss": 12.298744201660156, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:00:05", "remaining_time": "0:00:43", "throughput": 1087.69, "total_tokens": 5920}
5
- {"current_steps": 15, "total_steps": 125, "loss": 13.2344, "lr": 0.029994099395745794, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:00:06", "remaining_time": "0:00:46", "throughput": 992.81, "total_tokens": 6240}
6
- {"current_steps": 20, "total_steps": 125, "loss": 11.3325, "lr": 0.029788065277773536, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:00:06", "remaining_time": "0:00:35", "throughput": 1201.54, "total_tokens": 8096}
7
- {"current_steps": 21, "total_steps": 125, "eval_loss": 10.65202808380127, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:00:07", "remaining_time": "0:00:36", "throughput": 1127.76, "total_tokens": 8416}
8
- {"current_steps": 25, "total_steps": 125, "loss": 9.9997, "lr": 0.02929162684084344, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:00:08", "remaining_time": "0:00:34", "throughput": 1187.38, "total_tokens": 10112}
9
- {"current_steps": 28, "total_steps": 125, "eval_loss": 7.229786396026611, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:00:09", "remaining_time": "0:00:32", "throughput": 1195.87, "total_tokens": 11264}
10
- {"current_steps": 30, "total_steps": 125, "loss": 7.8597, "lr": 0.028514533018536285, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:00:10", "remaining_time": "0:00:32", "throughput": 1169.54, "total_tokens": 12032}
11
- {"current_steps": 35, "total_steps": 125, "loss": 5.6722, "lr": 0.02747204418453818, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:10", "remaining_time": "0:00:27", "throughput": 1290.01, "total_tokens": 13824}
12
- {"current_steps": 35, "total_steps": 125, "eval_loss": 4.140727519989014, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:11", "remaining_time": "0:00:29", "throughput": 1216.38, "total_tokens": 13824}
13
- {"current_steps": 40, "total_steps": 125, "loss": 3.6164, "lr": 0.026184632473247482, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:00:12", "remaining_time": "0:00:26", "throughput": 1267.08, "total_tokens": 15840}
14
- {"current_steps": 42, "total_steps": 125, "eval_loss": 1.8772838115692139, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:00:13", "remaining_time": "0:00:26", "throughput": 1249.48, "total_tokens": 16672}
15
- {"current_steps": 45, "total_steps": 125, "loss": 2.2571, "lr": 0.024677579753131316, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:00:14", "remaining_time": "0:00:25", "throughput": 1255.32, "total_tokens": 17920}
16
- {"current_steps": 49, "total_steps": 125, "eval_loss": 1.421391487121582, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:00:15", "remaining_time": "0:00:23", "throughput": 1261.46, "total_tokens": 19296}
17
- {"current_steps": 50, "total_steps": 125, "loss": 1.2803, "lr": 0.022980481147730047, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:00:16", "remaining_time": "0:00:24", "throughput": 1226.28, "total_tokens": 19712}
18
- {"current_steps": 55, "total_steps": 125, "loss": 1.0613, "lr": 0.021126663854039943, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:00:16", "remaining_time": "0:00:21", "throughput": 1326.01, "total_tokens": 21952}
19
- {"current_steps": 56, "total_steps": 125, "eval_loss": 0.8889466524124146, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:00:17", "remaining_time": "0:00:21", "throughput": 1289.81, "total_tokens": 22432}
20
- {"current_steps": 60, "total_steps": 125, "loss": 0.8296, "lr": 0.01915253267137274, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:00:18", "remaining_time": "0:00:19", "throughput": 1309.09, "total_tokens": 24160}
21
- {"current_steps": 63, "total_steps": 125, "eval_loss": 0.7238854169845581, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:00:19", "remaining_time": "0:00:19", "throughput": 1314.93, "total_tokens": 25504}
22
- {"current_steps": 65, "total_steps": 125, "loss": 0.6751, "lr": 0.017096855093032493, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:00:20", "remaining_time": "0:00:18", "throughput": 1282.75, "total_tokens": 26112}
23
- {"current_steps": 70, "total_steps": 125, "loss": 0.516, "lr": 0.015, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:20", "remaining_time": "0:00:16", "throughput": 1347.62, "total_tokens": 28064}
24
- {"current_steps": 70, "total_steps": 125, "eval_loss": 0.5987562537193298, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:21", "remaining_time": "0:00:16", "throughput": 1305.71, "total_tokens": 28064}
25
- {"current_steps": 75, "total_steps": 125, "loss": 0.5519, "lr": 0.012903144906967513, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:00:23", "remaining_time": "0:00:15", "throughput": 1293.03, "total_tokens": 29824}
26
- {"current_steps": 77, "total_steps": 125, "eval_loss": 0.447248637676239, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:00:23", "remaining_time": "0:00:14", "throughput": 1282.51, "total_tokens": 30720}
27
- {"current_steps": 80, "total_steps": 125, "loss": 0.471, "lr": 0.01084746732862726, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:00:25", "remaining_time": "0:00:14", "throughput": 1266.24, "total_tokens": 31904}
28
- {"current_steps": 84, "total_steps": 125, "eval_loss": 0.42409372329711914, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:00:26", "remaining_time": "0:00:12", "throughput": 1276.88, "total_tokens": 33504}
29
- {"current_steps": 85, "total_steps": 125, "loss": 0.44, "lr": 0.008873336145960059, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:00:27", "remaining_time": "0:00:12", "throughput": 1233.02, "total_tokens": 33984}
30
- {"current_steps": 90, "total_steps": 125, "loss": 0.407, "lr": 0.007019518852269953, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:00:28", "remaining_time": "0:00:10", "throughput": 1275.28, "total_tokens": 35776}
31
- {"current_steps": 91, "total_steps": 125, "eval_loss": 0.4211791157722473, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:00:28", "remaining_time": "0:00:10", "throughput": 1252.36, "total_tokens": 36128}
32
- {"current_steps": 95, "total_steps": 125, "loss": 0.4525, "lr": 0.005322420246868689, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:00:30", "remaining_time": "0:00:09", "throughput": 1247.46, "total_tokens": 37472}
33
- {"current_steps": 98, "total_steps": 125, "eval_loss": 0.4424297511577606, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:00:31", "remaining_time": "0:00:08", "throughput": 1244.27, "total_tokens": 38592}
34
- {"current_steps": 100, "total_steps": 125, "loss": 0.419, "lr": 0.003815367526752516, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:08", "throughput": 1221.95, "total_tokens": 39328}
35
- {"current_steps": 105, "total_steps": 125, "loss": 0.3686, "lr": 0.0025279558154618197, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:06", "throughput": 1263.56, "total_tokens": 41280}
36
- {"current_steps": 105, "total_steps": 125, "eval_loss": 0.4465864300727844, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:00:33", "remaining_time": "0:00:06", "throughput": 1236.42, "total_tokens": 41280}
 
1
+ {"current_steps": 5, "total_steps": 125, "loss": 1.0409, "lr": 1.5384615384615387e-05, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:04", "remaining_time": "0:01:57", "throughput": 436.96, "total_tokens": 2144}
2
+ {"current_steps": 7, "total_steps": 125, "eval_loss": 0.3513108789920807, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:00:05", "remaining_time": "0:01:39", "throughput": 487.23, "total_tokens": 2880}
3
+ {"current_steps": 10, "total_steps": 125, "loss": 0.4086, "lr": 3.461538461538462e-05, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:00:44", "remaining_time": "0:08:27", "throughput": 93.46, "total_tokens": 4128}
4
+ {"current_steps": 14, "total_steps": 125, "eval_loss": 1.1120651960372925, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:00:45", "remaining_time": "0:06:01", "throughput": 129.93, "total_tokens": 5920}
5
+ {"current_steps": 15, "total_steps": 125, "loss": 0.8807, "lr": 4.999016565957633e-05, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:01:12", "remaining_time": "0:08:53", "throughput": 85.84, "total_tokens": 6240}
6
+ {"current_steps": 20, "total_steps": 125, "loss": 0.9267, "lr": 4.96467754629559e-05, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:01:13", "remaining_time": "0:06:26", "throughput": 109.92, "total_tokens": 8096}
7
+ {"current_steps": 21, "total_steps": 125, "eval_loss": 0.35109928250312805, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:01:14", "remaining_time": "0:06:09", "throughput": 112.93, "total_tokens": 8416}
8
+ {"current_steps": 25, "total_steps": 125, "loss": 0.7142, "lr": 4.881937806807241e-05, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:01:35", "remaining_time": "0:06:22", "throughput": 105.64, "total_tokens": 10112}
9
+ {"current_steps": 28, "total_steps": 125, "eval_loss": 0.38589543104171753, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:01:36", "remaining_time": "0:05:35", "throughput": 116.17, "total_tokens": 11264}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a278118e6fbd86fea05c581f414086048fa3eb0ac46a84c1900dc7fd26cd5ff7
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c15b67a3611dd4d5558e3b4704da8db313ee1413361e088cac2394f243a8d3e4
3
  size 6097