Training in progress, step 7

Browse files

Files changed (4) hide show

adapter_config.json +10 -12
adapter_model.safetensors +2 -2
trainer_log.jsonl +9 -36
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -2,17 +2,15 @@
   "auto_mapping": null,
   "base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
   "inference_mode": true,
-  "modules_to_save": null,
-  "num_attention_heads": 32,
-  "num_layers": 16,
-  "num_transformer_submodules": 1,
-  "num_virtual_tokens": 100,
-  "peft_type": "PROMPT_TUNING",
-  "prompt_tuning_init": "RANDOM",
-  "prompt_tuning_init_text": null,
   "revision": null,
-  "task_type": "CAUSAL_LM",
-  "token_dim": 2048,
-  "tokenizer_kwargs": null,
-  "tokenizer_name_or_path": null
 }

   "auto_mapping": null,
   "base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
   "inference_mode": true,
+  "peft_type": "BITFIT",
   "revision": null,
+  "target_modules": [
+    "o_proj",
+    "v_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:381ac486feb832b85b5ae58370baf8911395fc6fd8877adb7d2c163c189028ff
-size 409728

 version https://git-lfs.github.com/spec/v1
+oid sha256:3600bb7753b69736bac46476f75e761b3238d11469128d2998224056aa029f5b
+size 2818586248

trainer_log.jsonl CHANGED Viewed

@@ -1,36 +1,9 @@
-{"current_steps": 5, "total_steps": 125, "loss": 0.6336, "lr": 0.009230769230769232, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:01", "remaining_time": "0:00:29", "throughput": 1768.9, "total_tokens": 2144}
-{"current_steps": 7, "total_steps": 125, "eval_loss": 17.433813095092773, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:00:02", "remaining_time": "0:00:33", "throughput": 1434.46, "total_tokens": 2880}
-{"current_steps": 10, "total_steps": 125, "loss": 14.0966, "lr": 0.02076923076923077, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:00:03", "remaining_time": "0:00:39", "throughput": 1194.47, "total_tokens": 4128}
-{"current_steps": 14, "total_steps": 125, "eval_loss": 12.298744201660156, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:00:05", "remaining_time": "0:00:43", "throughput": 1087.69, "total_tokens": 5920}
-{"current_steps": 15, "total_steps": 125, "loss": 13.2344, "lr": 0.029994099395745794, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:00:06", "remaining_time": "0:00:46", "throughput": 992.81, "total_tokens": 6240}
-{"current_steps": 20, "total_steps": 125, "loss": 11.3325, "lr": 0.029788065277773536, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:00:06", "remaining_time": "0:00:35", "throughput": 1201.54, "total_tokens": 8096}
-{"current_steps": 21, "total_steps": 125, "eval_loss": 10.65202808380127, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:00:07", "remaining_time": "0:00:36", "throughput": 1127.76, "total_tokens": 8416}
-{"current_steps": 25, "total_steps": 125, "loss": 9.9997, "lr": 0.02929162684084344, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:00:08", "remaining_time": "0:00:34", "throughput": 1187.38, "total_tokens": 10112}
-{"current_steps": 28, "total_steps": 125, "eval_loss": 7.229786396026611, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:00:09", "remaining_time": "0:00:32", "throughput": 1195.87, "total_tokens": 11264}
-{"current_steps": 30, "total_steps": 125, "loss": 7.8597, "lr": 0.028514533018536285, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:00:10", "remaining_time": "0:00:32", "throughput": 1169.54, "total_tokens": 12032}
-{"current_steps": 35, "total_steps": 125, "loss": 5.6722, "lr": 0.02747204418453818, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:10", "remaining_time": "0:00:27", "throughput": 1290.01, "total_tokens": 13824}
-{"current_steps": 35, "total_steps": 125, "eval_loss": 4.140727519989014, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:11", "remaining_time": "0:00:29", "throughput": 1216.38, "total_tokens": 13824}
-{"current_steps": 40, "total_steps": 125, "loss": 3.6164, "lr": 0.026184632473247482, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:00:12", "remaining_time": "0:00:26", "throughput": 1267.08, "total_tokens": 15840}
-{"current_steps": 42, "total_steps": 125, "eval_loss": 1.8772838115692139, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:00:13", "remaining_time": "0:00:26", "throughput": 1249.48, "total_tokens": 16672}
-{"current_steps": 45, "total_steps": 125, "loss": 2.2571, "lr": 0.024677579753131316, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:00:14", "remaining_time": "0:00:25", "throughput": 1255.32, "total_tokens": 17920}
-{"current_steps": 49, "total_steps": 125, "eval_loss": 1.421391487121582, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:00:15", "remaining_time": "0:00:23", "throughput": 1261.46, "total_tokens": 19296}
-{"current_steps": 50, "total_steps": 125, "loss": 1.2803, "lr": 0.022980481147730047, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:00:16", "remaining_time": "0:00:24", "throughput": 1226.28, "total_tokens": 19712}
-{"current_steps": 55, "total_steps": 125, "loss": 1.0613, "lr": 0.021126663854039943, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:00:16", "remaining_time": "0:00:21", "throughput": 1326.01, "total_tokens": 21952}
-{"current_steps": 56, "total_steps": 125, "eval_loss": 0.8889466524124146, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:00:17", "remaining_time": "0:00:21", "throughput": 1289.81, "total_tokens": 22432}
-{"current_steps": 60, "total_steps": 125, "loss": 0.8296, "lr": 0.01915253267137274, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:00:18", "remaining_time": "0:00:19", "throughput": 1309.09, "total_tokens": 24160}
-{"current_steps": 63, "total_steps": 125, "eval_loss": 0.7238854169845581, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:00:19", "remaining_time": "0:00:19", "throughput": 1314.93, "total_tokens": 25504}
-{"current_steps": 65, "total_steps": 125, "loss": 0.6751, "lr": 0.017096855093032493, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:00:20", "remaining_time": "0:00:18", "throughput": 1282.75, "total_tokens": 26112}
-{"current_steps": 70, "total_steps": 125, "loss": 0.516, "lr": 0.015, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:20", "remaining_time": "0:00:16", "throughput": 1347.62, "total_tokens": 28064}
-{"current_steps": 70, "total_steps": 125, "eval_loss": 0.5987562537193298, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:21", "remaining_time": "0:00:16", "throughput": 1305.71, "total_tokens": 28064}
-{"current_steps": 75, "total_steps": 125, "loss": 0.5519, "lr": 0.012903144906967513, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:00:23", "remaining_time": "0:00:15", "throughput": 1293.03, "total_tokens": 29824}
-{"current_steps": 77, "total_steps": 125, "eval_loss": 0.447248637676239, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:00:23", "remaining_time": "0:00:14", "throughput": 1282.51, "total_tokens": 30720}
-{"current_steps": 80, "total_steps": 125, "loss": 0.471, "lr": 0.01084746732862726, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:00:25", "remaining_time": "0:00:14", "throughput": 1266.24, "total_tokens": 31904}
-{"current_steps": 84, "total_steps": 125, "eval_loss": 0.42409372329711914, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:00:26", "remaining_time": "0:00:12", "throughput": 1276.88, "total_tokens": 33504}
-{"current_steps": 85, "total_steps": 125, "loss": 0.44, "lr": 0.008873336145960059, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:00:27", "remaining_time": "0:00:12", "throughput": 1233.02, "total_tokens": 33984}
-{"current_steps": 90, "total_steps": 125, "loss": 0.407, "lr": 0.007019518852269953, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:00:28", "remaining_time": "0:00:10", "throughput": 1275.28, "total_tokens": 35776}
-{"current_steps": 91, "total_steps": 125, "eval_loss": 0.4211791157722473, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:00:28", "remaining_time": "0:00:10", "throughput": 1252.36, "total_tokens": 36128}
-{"current_steps": 95, "total_steps": 125, "loss": 0.4525, "lr": 0.005322420246868689, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:00:30", "remaining_time": "0:00:09", "throughput": 1247.46, "total_tokens": 37472}
-{"current_steps": 98, "total_steps": 125, "eval_loss": 0.4424297511577606, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:00:31", "remaining_time": "0:00:08", "throughput": 1244.27, "total_tokens": 38592}
-{"current_steps": 100, "total_steps": 125, "loss": 0.419, "lr": 0.003815367526752516, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:08", "throughput": 1221.95, "total_tokens": 39328}
-{"current_steps": 105, "total_steps": 125, "loss": 0.3686, "lr": 0.0025279558154618197, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:06", "throughput": 1263.56, "total_tokens": 41280}
-{"current_steps": 105, "total_steps": 125, "eval_loss": 0.4465864300727844, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:00:33", "remaining_time": "0:00:06", "throughput": 1236.42, "total_tokens": 41280}

+{"current_steps": 5, "total_steps": 125, "loss": 1.0409, "lr": 1.5384615384615387e-05, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:04", "remaining_time": "0:01:57", "throughput": 436.96, "total_tokens": 2144}
+{"current_steps": 7, "total_steps": 125, "eval_loss": 0.3513108789920807, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:00:05", "remaining_time": "0:01:39", "throughput": 487.23, "total_tokens": 2880}
+{"current_steps": 10, "total_steps": 125, "loss": 0.4086, "lr": 3.461538461538462e-05, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:00:44", "remaining_time": "0:08:27", "throughput": 93.46, "total_tokens": 4128}
+{"current_steps": 14, "total_steps": 125, "eval_loss": 1.1120651960372925, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:00:45", "remaining_time": "0:06:01", "throughput": 129.93, "total_tokens": 5920}
+{"current_steps": 15, "total_steps": 125, "loss": 0.8807, "lr": 4.999016565957633e-05, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:01:12", "remaining_time": "0:08:53", "throughput": 85.84, "total_tokens": 6240}
+{"current_steps": 20, "total_steps": 125, "loss": 0.9267, "lr": 4.96467754629559e-05, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:01:13", "remaining_time": "0:06:26", "throughput": 109.92, "total_tokens": 8096}
+{"current_steps": 21, "total_steps": 125, "eval_loss": 0.35109928250312805, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:01:14", "remaining_time": "0:06:09", "throughput": 112.93, "total_tokens": 8416}
+{"current_steps": 25, "total_steps": 125, "loss": 0.7142, "lr": 4.881937806807241e-05, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:01:35", "remaining_time": "0:06:22", "throughput": 105.64, "total_tokens": 10112}
+{"current_steps": 28, "total_steps": 125, "eval_loss": 0.38589543104171753, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:01:36", "remaining_time": "0:05:35", "throughput": 116.17, "total_tokens": 11264}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a278118e6fbd86fea05c581f414086048fa3eb0ac46a84c1900dc7fd26cd5ff7
 size 6097

 version https://git-lfs.github.com/spec/v1
+oid sha256:c15b67a3611dd4d5558e3b4704da8db313ee1413361e088cac2394f243a8d3e4
 size 6097