Training in progress, step 7
Browse files- adapter_config.json +10 -12
- adapter_model.safetensors +2 -2
- trainer_log.jsonl +9 -36
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -2,17 +2,15 @@
|
|
| 2 |
"auto_mapping": null,
|
| 3 |
"base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
|
| 4 |
"inference_mode": true,
|
| 5 |
-
"
|
| 6 |
-
"num_attention_heads": 32,
|
| 7 |
-
"num_layers": 16,
|
| 8 |
-
"num_transformer_submodules": 1,
|
| 9 |
-
"num_virtual_tokens": 100,
|
| 10 |
-
"peft_type": "PROMPT_TUNING",
|
| 11 |
-
"prompt_tuning_init": "RANDOM",
|
| 12 |
-
"prompt_tuning_init_text": null,
|
| 13 |
"revision": null,
|
| 14 |
-
"
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
}
|
|
|
|
| 2 |
"auto_mapping": null,
|
| 3 |
"base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
|
| 4 |
"inference_mode": true,
|
| 5 |
+
"peft_type": "BITFIT",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"revision": null,
|
| 7 |
+
"target_modules": [
|
| 8 |
+
"o_proj",
|
| 9 |
+
"v_proj",
|
| 10 |
+
"up_proj",
|
| 11 |
+
"down_proj",
|
| 12 |
+
"q_proj",
|
| 13 |
+
"k_proj"
|
| 14 |
+
],
|
| 15 |
+
"task_type": "CAUSAL_LM"
|
| 16 |
}
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3600bb7753b69736bac46476f75e761b3238d11469128d2998224056aa029f5b
|
| 3 |
+
size 2818586248
|
trainer_log.jsonl
CHANGED
|
@@ -1,36 +1,9 @@
|
|
| 1 |
-
{"current_steps": 5, "total_steps": 125, "loss":
|
| 2 |
-
{"current_steps": 7, "total_steps": 125, "eval_loss":
|
| 3 |
-
{"current_steps": 10, "total_steps": 125, "loss":
|
| 4 |
-
{"current_steps": 14, "total_steps": 125, "eval_loss":
|
| 5 |
-
{"current_steps": 15, "total_steps": 125, "loss":
|
| 6 |
-
{"current_steps": 20, "total_steps": 125, "loss":
|
| 7 |
-
{"current_steps": 21, "total_steps": 125, "eval_loss":
|
| 8 |
-
{"current_steps": 25, "total_steps": 125, "loss":
|
| 9 |
-
{"current_steps": 28, "total_steps": 125, "eval_loss":
|
| 10 |
-
{"current_steps": 30, "total_steps": 125, "loss": 7.8597, "lr": 0.028514533018536285, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:00:10", "remaining_time": "0:00:32", "throughput": 1169.54, "total_tokens": 12032}
|
| 11 |
-
{"current_steps": 35, "total_steps": 125, "loss": 5.6722, "lr": 0.02747204418453818, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:10", "remaining_time": "0:00:27", "throughput": 1290.01, "total_tokens": 13824}
|
| 12 |
-
{"current_steps": 35, "total_steps": 125, "eval_loss": 4.140727519989014, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:11", "remaining_time": "0:00:29", "throughput": 1216.38, "total_tokens": 13824}
|
| 13 |
-
{"current_steps": 40, "total_steps": 125, "loss": 3.6164, "lr": 0.026184632473247482, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:00:12", "remaining_time": "0:00:26", "throughput": 1267.08, "total_tokens": 15840}
|
| 14 |
-
{"current_steps": 42, "total_steps": 125, "eval_loss": 1.8772838115692139, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:00:13", "remaining_time": "0:00:26", "throughput": 1249.48, "total_tokens": 16672}
|
| 15 |
-
{"current_steps": 45, "total_steps": 125, "loss": 2.2571, "lr": 0.024677579753131316, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:00:14", "remaining_time": "0:00:25", "throughput": 1255.32, "total_tokens": 17920}
|
| 16 |
-
{"current_steps": 49, "total_steps": 125, "eval_loss": 1.421391487121582, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:00:15", "remaining_time": "0:00:23", "throughput": 1261.46, "total_tokens": 19296}
|
| 17 |
-
{"current_steps": 50, "total_steps": 125, "loss": 1.2803, "lr": 0.022980481147730047, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:00:16", "remaining_time": "0:00:24", "throughput": 1226.28, "total_tokens": 19712}
|
| 18 |
-
{"current_steps": 55, "total_steps": 125, "loss": 1.0613, "lr": 0.021126663854039943, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:00:16", "remaining_time": "0:00:21", "throughput": 1326.01, "total_tokens": 21952}
|
| 19 |
-
{"current_steps": 56, "total_steps": 125, "eval_loss": 0.8889466524124146, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:00:17", "remaining_time": "0:00:21", "throughput": 1289.81, "total_tokens": 22432}
|
| 20 |
-
{"current_steps": 60, "total_steps": 125, "loss": 0.8296, "lr": 0.01915253267137274, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:00:18", "remaining_time": "0:00:19", "throughput": 1309.09, "total_tokens": 24160}
|
| 21 |
-
{"current_steps": 63, "total_steps": 125, "eval_loss": 0.7238854169845581, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:00:19", "remaining_time": "0:00:19", "throughput": 1314.93, "total_tokens": 25504}
|
| 22 |
-
{"current_steps": 65, "total_steps": 125, "loss": 0.6751, "lr": 0.017096855093032493, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:00:20", "remaining_time": "0:00:18", "throughput": 1282.75, "total_tokens": 26112}
|
| 23 |
-
{"current_steps": 70, "total_steps": 125, "loss": 0.516, "lr": 0.015, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:20", "remaining_time": "0:00:16", "throughput": 1347.62, "total_tokens": 28064}
|
| 24 |
-
{"current_steps": 70, "total_steps": 125, "eval_loss": 0.5987562537193298, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:21", "remaining_time": "0:00:16", "throughput": 1305.71, "total_tokens": 28064}
|
| 25 |
-
{"current_steps": 75, "total_steps": 125, "loss": 0.5519, "lr": 0.012903144906967513, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:00:23", "remaining_time": "0:00:15", "throughput": 1293.03, "total_tokens": 29824}
|
| 26 |
-
{"current_steps": 77, "total_steps": 125, "eval_loss": 0.447248637676239, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:00:23", "remaining_time": "0:00:14", "throughput": 1282.51, "total_tokens": 30720}
|
| 27 |
-
{"current_steps": 80, "total_steps": 125, "loss": 0.471, "lr": 0.01084746732862726, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:00:25", "remaining_time": "0:00:14", "throughput": 1266.24, "total_tokens": 31904}
|
| 28 |
-
{"current_steps": 84, "total_steps": 125, "eval_loss": 0.42409372329711914, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:00:26", "remaining_time": "0:00:12", "throughput": 1276.88, "total_tokens": 33504}
|
| 29 |
-
{"current_steps": 85, "total_steps": 125, "loss": 0.44, "lr": 0.008873336145960059, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:00:27", "remaining_time": "0:00:12", "throughput": 1233.02, "total_tokens": 33984}
|
| 30 |
-
{"current_steps": 90, "total_steps": 125, "loss": 0.407, "lr": 0.007019518852269953, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:00:28", "remaining_time": "0:00:10", "throughput": 1275.28, "total_tokens": 35776}
|
| 31 |
-
{"current_steps": 91, "total_steps": 125, "eval_loss": 0.4211791157722473, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:00:28", "remaining_time": "0:00:10", "throughput": 1252.36, "total_tokens": 36128}
|
| 32 |
-
{"current_steps": 95, "total_steps": 125, "loss": 0.4525, "lr": 0.005322420246868689, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:00:30", "remaining_time": "0:00:09", "throughput": 1247.46, "total_tokens": 37472}
|
| 33 |
-
{"current_steps": 98, "total_steps": 125, "eval_loss": 0.4424297511577606, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:00:31", "remaining_time": "0:00:08", "throughput": 1244.27, "total_tokens": 38592}
|
| 34 |
-
{"current_steps": 100, "total_steps": 125, "loss": 0.419, "lr": 0.003815367526752516, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:08", "throughput": 1221.95, "total_tokens": 39328}
|
| 35 |
-
{"current_steps": 105, "total_steps": 125, "loss": 0.3686, "lr": 0.0025279558154618197, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:06", "throughput": 1263.56, "total_tokens": 41280}
|
| 36 |
-
{"current_steps": 105, "total_steps": 125, "eval_loss": 0.4465864300727844, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:00:33", "remaining_time": "0:00:06", "throughput": 1236.42, "total_tokens": 41280}
|
|
|
|
| 1 |
+
{"current_steps": 5, "total_steps": 125, "loss": 1.0409, "lr": 1.5384615384615387e-05, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:04", "remaining_time": "0:01:57", "throughput": 436.96, "total_tokens": 2144}
|
| 2 |
+
{"current_steps": 7, "total_steps": 125, "eval_loss": 0.3513108789920807, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:00:05", "remaining_time": "0:01:39", "throughput": 487.23, "total_tokens": 2880}
|
| 3 |
+
{"current_steps": 10, "total_steps": 125, "loss": 0.4086, "lr": 3.461538461538462e-05, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:00:44", "remaining_time": "0:08:27", "throughput": 93.46, "total_tokens": 4128}
|
| 4 |
+
{"current_steps": 14, "total_steps": 125, "eval_loss": 1.1120651960372925, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:00:45", "remaining_time": "0:06:01", "throughput": 129.93, "total_tokens": 5920}
|
| 5 |
+
{"current_steps": 15, "total_steps": 125, "loss": 0.8807, "lr": 4.999016565957633e-05, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:01:12", "remaining_time": "0:08:53", "throughput": 85.84, "total_tokens": 6240}
|
| 6 |
+
{"current_steps": 20, "total_steps": 125, "loss": 0.9267, "lr": 4.96467754629559e-05, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:01:13", "remaining_time": "0:06:26", "throughput": 109.92, "total_tokens": 8096}
|
| 7 |
+
{"current_steps": 21, "total_steps": 125, "eval_loss": 0.35109928250312805, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:01:14", "remaining_time": "0:06:09", "throughput": 112.93, "total_tokens": 8416}
|
| 8 |
+
{"current_steps": 25, "total_steps": 125, "loss": 0.7142, "lr": 4.881937806807241e-05, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:01:35", "remaining_time": "0:06:22", "throughput": 105.64, "total_tokens": 10112}
|
| 9 |
+
{"current_steps": 28, "total_steps": 125, "eval_loss": 0.38589543104171753, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:01:36", "remaining_time": "0:05:35", "throughput": 116.17, "total_tokens": 11264}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6097
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c15b67a3611dd4d5558e3b4704da8db313ee1413361e088cac2394f243a8d3e4
|
| 3 |
size 6097
|