mikeam commited on
Commit
1c0fa25
·
verified ·
1 Parent(s): a466a7f

Training in progress, epoch 0

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "meta-llama/Llama-3.1-8B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9947da4ca9bbaf4614728e57196e7bde53b1499682a8c4e8ff1244725ecc197
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c74315e5a056e88ff103c3e86ada7db46535465923dce44b3b38e5ca8105d3e
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8e6bb93244c976206a100e2cea05775158989e0cf7d6006a2015d462a0b9022
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4258a71a45181f681f31fc24d4b0a4b702d42cb409c36a3170d7a4f78026b8f2
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac395f5a45038bcea248c17a0921cc2a84ce58c52b3ccab3abe1dfa827efbf49
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56bbaaa5827e2f6f55a63c60bf320c548e7b615bab65f579f19a3e7c8cbdc155
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2add1a620622523d16cf38e76cff60719aeabda30e43f7792c40ea792faadf01
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de28addb512035861e5374d4c8cc657bc7d9572113b969dc149eb1c77d06a3f
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,28 +1,17 @@
1
- {"current_steps": 10, "total_steps": 246, "loss": 0.775, "lr": 5e-06, "epoch": 0.12139605462822459, "percentage": 4.07, "elapsed_time": "0:04:01", "remaining_time": "1:34:54"}
2
- {"current_steps": 20, "total_steps": 246, "loss": 0.6633, "lr": 5e-06, "epoch": 0.24279210925644917, "percentage": 8.13, "elapsed_time": "0:08:01", "remaining_time": "1:30:37"}
3
- {"current_steps": 30, "total_steps": 246, "loss": 0.6218, "lr": 5e-06, "epoch": 0.36418816388467373, "percentage": 12.2, "elapsed_time": "0:12:02", "remaining_time": "1:26:41"}
4
- {"current_steps": 40, "total_steps": 246, "loss": 0.6021, "lr": 5e-06, "epoch": 0.48558421851289835, "percentage": 16.26, "elapsed_time": "0:16:03", "remaining_time": "1:22:40"}
5
- {"current_steps": 50, "total_steps": 246, "loss": 0.5861, "lr": 5e-06, "epoch": 0.6069802731411229, "percentage": 20.33, "elapsed_time": "0:20:03", "remaining_time": "1:18:37"}
6
- {"current_steps": 60, "total_steps": 246, "loss": 0.575, "lr": 5e-06, "epoch": 0.7283763277693475, "percentage": 24.39, "elapsed_time": "0:24:03", "remaining_time": "1:14:35"}
7
- {"current_steps": 70, "total_steps": 246, "loss": 0.5628, "lr": 5e-06, "epoch": 0.849772382397572, "percentage": 28.46, "elapsed_time": "0:28:04", "remaining_time": "1:10:34"}
8
- {"current_steps": 80, "total_steps": 246, "loss": 0.5574, "lr": 5e-06, "epoch": 0.9711684370257967, "percentage": 32.52, "elapsed_time": "0:32:03", "remaining_time": "1:06:31"}
9
- {"current_steps": 82, "total_steps": 246, "eval_loss": 0.5473940372467041, "epoch": 0.9954476479514416, "percentage": 33.33, "elapsed_time": "0:33:26", "remaining_time": "1:06:53"}
10
- {"current_steps": 90, "total_steps": 246, "loss": 0.5738, "lr": 5e-06, "epoch": 1.095599393019727, "percentage": 36.59, "elapsed_time": "0:38:18", "remaining_time": "1:06:24"}
11
- {"current_steps": 100, "total_steps": 246, "loss": 0.51, "lr": 5e-06, "epoch": 1.2169954476479514, "percentage": 40.65, "elapsed_time": "0:42:17", "remaining_time": "1:01:45"}
12
- {"current_steps": 110, "total_steps": 246, "loss": 0.5103, "lr": 5e-06, "epoch": 1.338391502276176, "percentage": 44.72, "elapsed_time": "0:46:18", "remaining_time": "0:57:15"}
13
- {"current_steps": 120, "total_steps": 246, "loss": 0.5091, "lr": 5e-06, "epoch": 1.4597875569044005, "percentage": 48.78, "elapsed_time": "0:50:18", "remaining_time": "0:52:49"}
14
- {"current_steps": 130, "total_steps": 246, "loss": 0.4993, "lr": 5e-06, "epoch": 1.5811836115326252, "percentage": 52.85, "elapsed_time": "0:54:18", "remaining_time": "0:48:27"}
15
- {"current_steps": 140, "total_steps": 246, "loss": 0.4977, "lr": 5e-06, "epoch": 1.7025796661608497, "percentage": 56.91, "elapsed_time": "0:58:17", "remaining_time": "0:44:08"}
16
- {"current_steps": 150, "total_steps": 246, "loss": 0.5013, "lr": 5e-06, "epoch": 1.8239757207890743, "percentage": 60.98, "elapsed_time": "1:02:16", "remaining_time": "0:39:51"}
17
- {"current_steps": 160, "total_steps": 246, "loss": 0.4967, "lr": 5e-06, "epoch": 1.945371775417299, "percentage": 65.04, "elapsed_time": "1:06:17", "remaining_time": "0:35:38"}
18
- {"current_steps": 164, "total_steps": 246, "eval_loss": 0.5244549512863159, "epoch": 1.9939301972685888, "percentage": 66.67, "elapsed_time": "1:08:30", "remaining_time": "0:34:15"}
19
- {"current_steps": 170, "total_steps": 246, "loss": 0.5184, "lr": 5e-06, "epoch": 2.069802731411229, "percentage": 69.11, "elapsed_time": "1:12:33", "remaining_time": "0:32:26"}
20
- {"current_steps": 180, "total_steps": 246, "loss": 0.4489, "lr": 5e-06, "epoch": 2.191198786039454, "percentage": 73.17, "elapsed_time": "1:16:36", "remaining_time": "0:28:05"}
21
- {"current_steps": 190, "total_steps": 246, "loss": 0.4489, "lr": 5e-06, "epoch": 2.3125948406676784, "percentage": 77.24, "elapsed_time": "1:20:37", "remaining_time": "0:23:45"}
22
- {"current_steps": 200, "total_steps": 246, "loss": 0.4513, "lr": 5e-06, "epoch": 2.433990895295903, "percentage": 81.3, "elapsed_time": "1:24:39", "remaining_time": "0:19:28"}
23
- {"current_steps": 210, "total_steps": 246, "loss": 0.4488, "lr": 5e-06, "epoch": 2.5553869499241273, "percentage": 85.37, "elapsed_time": "1:28:42", "remaining_time": "0:15:12"}
24
- {"current_steps": 220, "total_steps": 246, "loss": 0.4505, "lr": 5e-06, "epoch": 2.676783004552352, "percentage": 89.43, "elapsed_time": "1:32:44", "remaining_time": "0:10:57"}
25
- {"current_steps": 230, "total_steps": 246, "loss": 0.4487, "lr": 5e-06, "epoch": 2.7981790591805766, "percentage": 93.5, "elapsed_time": "1:36:45", "remaining_time": "0:06:43"}
26
- {"current_steps": 240, "total_steps": 246, "loss": 0.4516, "lr": 5e-06, "epoch": 2.919575113808801, "percentage": 97.56, "elapsed_time": "1:40:47", "remaining_time": "0:02:31"}
27
- {"current_steps": 246, "total_steps": 246, "eval_loss": 0.5186759829521179, "epoch": 2.992412746585736, "percentage": 100.0, "elapsed_time": "1:45:28", "remaining_time": "0:00:00"}
28
- {"current_steps": 246, "total_steps": 246, "epoch": 2.992412746585736, "percentage": 100.0, "elapsed_time": "1:47:03", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 246, "loss": 0.7721, "lr": 5e-06, "epoch": 0.12139605462822459, "percentage": 4.07, "elapsed_time": "0:04:01", "remaining_time": "1:34:49"}
2
+ {"current_steps": 20, "total_steps": 246, "loss": 0.6638, "lr": 5e-06, "epoch": 0.24279210925644917, "percentage": 8.13, "elapsed_time": "0:08:02", "remaining_time": "1:30:52"}
3
+ {"current_steps": 30, "total_steps": 246, "loss": 0.6289, "lr": 5e-06, "epoch": 0.36418816388467373, "percentage": 12.2, "elapsed_time": "0:12:04", "remaining_time": "1:26:53"}
4
+ {"current_steps": 40, "total_steps": 246, "loss": 0.6062, "lr": 5e-06, "epoch": 0.48558421851289835, "percentage": 16.26, "elapsed_time": "0:16:05", "remaining_time": "1:22:54"}
5
+ {"current_steps": 50, "total_steps": 246, "loss": 0.5886, "lr": 5e-06, "epoch": 0.6069802731411229, "percentage": 20.33, "elapsed_time": "0:20:05", "remaining_time": "1:18:46"}
6
+ {"current_steps": 60, "total_steps": 246, "loss": 0.5763, "lr": 5e-06, "epoch": 0.7283763277693475, "percentage": 24.39, "elapsed_time": "0:24:07", "remaining_time": "1:14:48"}
7
+ {"current_steps": 70, "total_steps": 246, "loss": 0.5635, "lr": 5e-06, "epoch": 0.849772382397572, "percentage": 28.46, "elapsed_time": "0:28:08", "remaining_time": "1:10:45"}
8
+ {"current_steps": 80, "total_steps": 246, "loss": 0.5578, "lr": 5e-06, "epoch": 0.9711684370257967, "percentage": 32.52, "elapsed_time": "0:32:08", "remaining_time": "1:06:42"}
9
+ {"current_steps": 82, "total_steps": 246, "eval_loss": 0.5475569367408752, "epoch": 0.9954476479514416, "percentage": 33.33, "elapsed_time": "0:33:31", "remaining_time": "1:07:02"}
10
+ {"current_steps": 90, "total_steps": 246, "loss": 0.5739, "lr": 5e-06, "epoch": 1.095599393019727, "percentage": 36.59, "elapsed_time": "0:38:25", "remaining_time": "1:06:36"}
11
+ {"current_steps": 100, "total_steps": 246, "loss": 0.5098, "lr": 5e-06, "epoch": 1.2169954476479514, "percentage": 40.65, "elapsed_time": "0:42:26", "remaining_time": "1:01:58"}
12
+ {"current_steps": 110, "total_steps": 246, "loss": 0.5099, "lr": 5e-06, "epoch": 1.338391502276176, "percentage": 44.72, "elapsed_time": "0:46:27", "remaining_time": "0:57:26"}
13
+ {"current_steps": 120, "total_steps": 246, "loss": 0.5086, "lr": 5e-06, "epoch": 1.4597875569044005, "percentage": 48.78, "elapsed_time": "0:50:28", "remaining_time": "0:52:59"}
14
+ {"current_steps": 130, "total_steps": 246, "loss": 0.4988, "lr": 5e-06, "epoch": 1.5811836115326252, "percentage": 52.85, "elapsed_time": "0:54:28", "remaining_time": "0:48:36"}
15
+ {"current_steps": 140, "total_steps": 246, "loss": 0.4973, "lr": 5e-06, "epoch": 1.7025796661608497, "percentage": 56.91, "elapsed_time": "0:58:29", "remaining_time": "0:44:17"}
16
+ {"current_steps": 150, "total_steps": 246, "loss": 0.5006, "lr": 5e-06, "epoch": 1.8239757207890743, "percentage": 60.98, "elapsed_time": "1:02:31", "remaining_time": "0:40:00"}
17
+ {"current_steps": 160, "total_steps": 246, "loss": 0.4962, "lr": 5e-06, "epoch": 1.945371775417299, "percentage": 65.04, "elapsed_time": "1:06:30", "remaining_time": "0:35:44"}
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3bcdb0cafad7d25eda78003ba92ecc4fafa5b4bba0964f9c6f6a341b2cd5682
3
  size 7224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c132689d811b51e7252bdcc70f5db4823234c5598d83a9277894faa93faeb501
3
  size 7224