Training in progress, epoch 0
Browse files- config.json +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +17 -28
- training_args.bin +1 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "meta-llama/
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "meta-llama/Llama-3.1-8B",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c74315e5a056e88ff103c3e86ada7db46535465923dce44b3b38e5ca8105d3e
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4258a71a45181f681f31fc24d4b0a4b702d42cb409c36a3170d7a4f78026b8f2
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56bbaaa5827e2f6f55a63c60bf320c548e7b615bab65f579f19a3e7c8cbdc155
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5de28addb512035861e5374d4c8cc657bc7d9572113b969dc149eb1c77d06a3f
|
3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
@@ -1,28 +1,17 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 246, "loss": 0.
|
2 |
-
{"current_steps": 20, "total_steps": 246, "loss": 0.
|
3 |
-
{"current_steps": 30, "total_steps": 246, "loss": 0.
|
4 |
-
{"current_steps": 40, "total_steps": 246, "loss": 0.
|
5 |
-
{"current_steps": 50, "total_steps": 246, "loss": 0.
|
6 |
-
{"current_steps": 60, "total_steps": 246, "loss": 0.
|
7 |
-
{"current_steps": 70, "total_steps": 246, "loss": 0.
|
8 |
-
{"current_steps": 80, "total_steps": 246, "loss": 0.
|
9 |
-
{"current_steps": 82, "total_steps": 246, "eval_loss": 0.
|
10 |
-
{"current_steps": 90, "total_steps": 246, "loss": 0.
|
11 |
-
{"current_steps": 100, "total_steps": 246, "loss": 0.
|
12 |
-
{"current_steps": 110, "total_steps": 246, "loss": 0.
|
13 |
-
{"current_steps": 120, "total_steps": 246, "loss": 0.
|
14 |
-
{"current_steps": 130, "total_steps": 246, "loss": 0.
|
15 |
-
{"current_steps": 140, "total_steps": 246, "loss": 0.
|
16 |
-
{"current_steps": 150, "total_steps": 246, "loss": 0.
|
17 |
-
{"current_steps": 160, "total_steps": 246, "loss": 0.
|
18 |
-
{"current_steps": 164, "total_steps": 246, "eval_loss": 0.5244549512863159, "epoch": 1.9939301972685888, "percentage": 66.67, "elapsed_time": "1:08:30", "remaining_time": "0:34:15"}
|
19 |
-
{"current_steps": 170, "total_steps": 246, "loss": 0.5184, "lr": 5e-06, "epoch": 2.069802731411229, "percentage": 69.11, "elapsed_time": "1:12:33", "remaining_time": "0:32:26"}
|
20 |
-
{"current_steps": 180, "total_steps": 246, "loss": 0.4489, "lr": 5e-06, "epoch": 2.191198786039454, "percentage": 73.17, "elapsed_time": "1:16:36", "remaining_time": "0:28:05"}
|
21 |
-
{"current_steps": 190, "total_steps": 246, "loss": 0.4489, "lr": 5e-06, "epoch": 2.3125948406676784, "percentage": 77.24, "elapsed_time": "1:20:37", "remaining_time": "0:23:45"}
|
22 |
-
{"current_steps": 200, "total_steps": 246, "loss": 0.4513, "lr": 5e-06, "epoch": 2.433990895295903, "percentage": 81.3, "elapsed_time": "1:24:39", "remaining_time": "0:19:28"}
|
23 |
-
{"current_steps": 210, "total_steps": 246, "loss": 0.4488, "lr": 5e-06, "epoch": 2.5553869499241273, "percentage": 85.37, "elapsed_time": "1:28:42", "remaining_time": "0:15:12"}
|
24 |
-
{"current_steps": 220, "total_steps": 246, "loss": 0.4505, "lr": 5e-06, "epoch": 2.676783004552352, "percentage": 89.43, "elapsed_time": "1:32:44", "remaining_time": "0:10:57"}
|
25 |
-
{"current_steps": 230, "total_steps": 246, "loss": 0.4487, "lr": 5e-06, "epoch": 2.7981790591805766, "percentage": 93.5, "elapsed_time": "1:36:45", "remaining_time": "0:06:43"}
|
26 |
-
{"current_steps": 240, "total_steps": 246, "loss": 0.4516, "lr": 5e-06, "epoch": 2.919575113808801, "percentage": 97.56, "elapsed_time": "1:40:47", "remaining_time": "0:02:31"}
|
27 |
-
{"current_steps": 246, "total_steps": 246, "eval_loss": 0.5186759829521179, "epoch": 2.992412746585736, "percentage": 100.0, "elapsed_time": "1:45:28", "remaining_time": "0:00:00"}
|
28 |
-
{"current_steps": 246, "total_steps": 246, "epoch": 2.992412746585736, "percentage": 100.0, "elapsed_time": "1:47:03", "remaining_time": "0:00:00"}
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 246, "loss": 0.7721, "lr": 5e-06, "epoch": 0.12139605462822459, "percentage": 4.07, "elapsed_time": "0:04:01", "remaining_time": "1:34:49"}
|
2 |
+
{"current_steps": 20, "total_steps": 246, "loss": 0.6638, "lr": 5e-06, "epoch": 0.24279210925644917, "percentage": 8.13, "elapsed_time": "0:08:02", "remaining_time": "1:30:52"}
|
3 |
+
{"current_steps": 30, "total_steps": 246, "loss": 0.6289, "lr": 5e-06, "epoch": 0.36418816388467373, "percentage": 12.2, "elapsed_time": "0:12:04", "remaining_time": "1:26:53"}
|
4 |
+
{"current_steps": 40, "total_steps": 246, "loss": 0.6062, "lr": 5e-06, "epoch": 0.48558421851289835, "percentage": 16.26, "elapsed_time": "0:16:05", "remaining_time": "1:22:54"}
|
5 |
+
{"current_steps": 50, "total_steps": 246, "loss": 0.5886, "lr": 5e-06, "epoch": 0.6069802731411229, "percentage": 20.33, "elapsed_time": "0:20:05", "remaining_time": "1:18:46"}
|
6 |
+
{"current_steps": 60, "total_steps": 246, "loss": 0.5763, "lr": 5e-06, "epoch": 0.7283763277693475, "percentage": 24.39, "elapsed_time": "0:24:07", "remaining_time": "1:14:48"}
|
7 |
+
{"current_steps": 70, "total_steps": 246, "loss": 0.5635, "lr": 5e-06, "epoch": 0.849772382397572, "percentage": 28.46, "elapsed_time": "0:28:08", "remaining_time": "1:10:45"}
|
8 |
+
{"current_steps": 80, "total_steps": 246, "loss": 0.5578, "lr": 5e-06, "epoch": 0.9711684370257967, "percentage": 32.52, "elapsed_time": "0:32:08", "remaining_time": "1:06:42"}
|
9 |
+
{"current_steps": 82, "total_steps": 246, "eval_loss": 0.5475569367408752, "epoch": 0.9954476479514416, "percentage": 33.33, "elapsed_time": "0:33:31", "remaining_time": "1:07:02"}
|
10 |
+
{"current_steps": 90, "total_steps": 246, "loss": 0.5739, "lr": 5e-06, "epoch": 1.095599393019727, "percentage": 36.59, "elapsed_time": "0:38:25", "remaining_time": "1:06:36"}
|
11 |
+
{"current_steps": 100, "total_steps": 246, "loss": 0.5098, "lr": 5e-06, "epoch": 1.2169954476479514, "percentage": 40.65, "elapsed_time": "0:42:26", "remaining_time": "1:01:58"}
|
12 |
+
{"current_steps": 110, "total_steps": 246, "loss": 0.5099, "lr": 5e-06, "epoch": 1.338391502276176, "percentage": 44.72, "elapsed_time": "0:46:27", "remaining_time": "0:57:26"}
|
13 |
+
{"current_steps": 120, "total_steps": 246, "loss": 0.5086, "lr": 5e-06, "epoch": 1.4597875569044005, "percentage": 48.78, "elapsed_time": "0:50:28", "remaining_time": "0:52:59"}
|
14 |
+
{"current_steps": 130, "total_steps": 246, "loss": 0.4988, "lr": 5e-06, "epoch": 1.5811836115326252, "percentage": 52.85, "elapsed_time": "0:54:28", "remaining_time": "0:48:36"}
|
15 |
+
{"current_steps": 140, "total_steps": 246, "loss": 0.4973, "lr": 5e-06, "epoch": 1.7025796661608497, "percentage": 56.91, "elapsed_time": "0:58:29", "remaining_time": "0:44:17"}
|
16 |
+
{"current_steps": 150, "total_steps": 246, "loss": 0.5006, "lr": 5e-06, "epoch": 1.8239757207890743, "percentage": 60.98, "elapsed_time": "1:02:31", "remaining_time": "0:40:00"}
|
17 |
+
{"current_steps": 160, "total_steps": 246, "loss": 0.4962, "lr": 5e-06, "epoch": 1.945371775417299, "percentage": 65.04, "elapsed_time": "1:06:30", "remaining_time": "0:35:44"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c132689d811b51e7252bdcc70f5db4823234c5598d83a9277894faa93faeb501
|
3 |
size 7224
|