sedrickkeh commited on
Commit
963705b
·
verified ·
1 Parent(s): e8c865c

Training in progress, epoch 1

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:508ea172c265d559b15d2fd9e2f5a2500241393c1f6e99a25abab57f291ad6cb
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b9764647013fde9c3a69bebec5ebfc0293b3d85d923f03718f141916380ffc
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fdd688cc61e35b71346a3605cb982569326960c6aa1233c53597e93290e4d2e
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae8ffa64dca13106d2ac401979adf36da8370e9d326f698f52058d6329d1fdb
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a5c6944e5c1f2ba40c569fc54f997c50cb382fe6df8b444ab944d6cf4b8f26
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ada9d9c2d76440ec5e3d41951f087fd1787c589f3456d57a1758f4a448f1c41
3
  size 4540516344
trainer_log.jsonl CHANGED
@@ -1,54 +1,18 @@
1
- {"current_steps": 10, "total_steps": 504, "loss": 0.8829, "lr": 2.631578947368421e-07, "epoch": 0.05952380952380952, "percentage": 1.98, "elapsed_time": "0:05:36", "remaining_time": "4:37:07"}
2
- {"current_steps": 20, "total_steps": 504, "loss": 0.8087, "lr": 5.263157894736842e-07, "epoch": 0.11904761904761904, "percentage": 3.97, "elapsed_time": "0:11:05", "remaining_time": "4:28:33"}
3
- {"current_steps": 30, "total_steps": 504, "loss": 0.7296, "lr": 7.894736842105263e-07, "epoch": 0.17857142857142858, "percentage": 5.95, "elapsed_time": "0:16:34", "remaining_time": "4:21:51"}
4
- {"current_steps": 40, "total_steps": 504, "loss": 0.6712, "lr": 1.0526315789473683e-06, "epoch": 0.23809523809523808, "percentage": 7.94, "elapsed_time": "0:22:03", "remaining_time": "4:15:57"}
5
- {"current_steps": 50, "total_steps": 504, "loss": 0.6321, "lr": 1.3157894736842106e-06, "epoch": 0.2976190476190476, "percentage": 9.92, "elapsed_time": "0:27:34", "remaining_time": "4:10:26"}
6
- {"current_steps": 60, "total_steps": 504, "loss": 0.6026, "lr": 1.5789473684210526e-06, "epoch": 0.35714285714285715, "percentage": 11.9, "elapsed_time": "0:33:04", "remaining_time": "4:04:42"}
7
- {"current_steps": 70, "total_steps": 504, "loss": 0.5884, "lr": 1.8421052631578946e-06, "epoch": 0.4166666666666667, "percentage": 13.89, "elapsed_time": "0:38:33", "remaining_time": "3:59:05"}
8
- {"current_steps": 80, "total_steps": 504, "loss": 0.5783, "lr": 1.9996767546702485e-06, "epoch": 0.47619047619047616, "percentage": 15.87, "elapsed_time": "0:44:03", "remaining_time": "3:53:29"}
9
- {"current_steps": 90, "total_steps": 504, "loss": 0.5706, "lr": 1.996043443883064e-06, "epoch": 0.5357142857142857, "percentage": 17.86, "elapsed_time": "0:49:31", "remaining_time": "3:47:47"}
10
- {"current_steps": 100, "total_steps": 504, "loss": 0.5598, "lr": 1.988392397752233e-06, "epoch": 0.5952380952380952, "percentage": 19.84, "elapsed_time": "0:55:02", "remaining_time": "3:42:20"}
11
- {"current_steps": 110, "total_steps": 504, "loss": 0.5569, "lr": 1.9767648201496052e-06, "epoch": 0.6547619047619048, "percentage": 21.83, "elapsed_time": "1:00:32", "remaining_time": "3:36:51"}
12
- {"current_steps": 120, "total_steps": 504, "loss": 0.5477, "lr": 1.961223330122206e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "1:06:02", "remaining_time": "3:31:19"}
13
- {"current_steps": 130, "total_steps": 504, "loss": 0.5459, "lr": 1.941851624664209e-06, "epoch": 0.7738095238095238, "percentage": 25.79, "elapsed_time": "1:11:32", "remaining_time": "3:25:49"}
14
- {"current_steps": 140, "total_steps": 504, "loss": 0.5383, "lr": 1.9187540279759314e-06, "epoch": 0.8333333333333334, "percentage": 27.78, "elapsed_time": "1:17:02", "remaining_time": "3:20:18"}
15
- {"current_steps": 150, "total_steps": 504, "loss": 0.5337, "lr": 1.8920549296372686e-06, "epoch": 0.8928571428571429, "percentage": 29.76, "elapsed_time": "1:22:30", "remaining_time": "3:14:43"}
16
- {"current_steps": 160, "total_steps": 504, "loss": 0.528, "lr": 1.861898114721218e-06, "epoch": 0.9523809523809523, "percentage": 31.75, "elapsed_time": "1:27:58", "remaining_time": "3:09:09"}
17
- {"current_steps": 168, "total_steps": 504, "eval_loss": 0.0656253919005394, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "1:34:17", "remaining_time": "3:08:34"}
18
- {"current_steps": 170, "total_steps": 504, "loss": 0.5223, "lr": 1.8284459894551025e-06, "epoch": 1.0119047619047619, "percentage": 33.73, "elapsed_time": "1:36:06", "remaining_time": "3:08:50"}
19
- {"current_steps": 180, "total_steps": 504, "loss": 0.5032, "lr": 1.7918787065996015e-06, "epoch": 1.0714285714285714, "percentage": 35.71, "elapsed_time": "1:41:34", "remaining_time": "3:02:50"}
20
- {"current_steps": 190, "total_steps": 504, "loss": 0.5004, "lr": 1.7523931952557666e-06, "epoch": 1.130952380952381, "percentage": 37.7, "elapsed_time": "1:47:02", "remaining_time": "2:56:54"}
21
- {"current_steps": 200, "total_steps": 504, "loss": 0.498, "lr": 1.7102021003248955e-06, "epoch": 1.1904761904761905, "percentage": 39.68, "elapsed_time": "1:52:31", "remaining_time": "2:51:02"}
22
- {"current_steps": 210, "total_steps": 504, "loss": 0.4987, "lr": 1.6655326373326793e-06, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "1:58:00", "remaining_time": "2:45:12"}
23
- {"current_steps": 220, "total_steps": 504, "loss": 0.493, "lr": 1.6186253687848507e-06, "epoch": 1.3095238095238095, "percentage": 43.65, "elapsed_time": "2:03:29", "remaining_time": "2:39:25"}
24
- {"current_steps": 230, "total_steps": 504, "loss": 0.4923, "lr": 1.569732908644127e-06, "epoch": 1.369047619047619, "percentage": 45.63, "elapsed_time": "2:08:57", "remaining_time": "2:33:38"}
25
- {"current_steps": 240, "total_steps": 504, "loss": 0.4902, "lr": 1.5191185619053519e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "2:14:26", "remaining_time": "2:27:52"}
26
- {"current_steps": 250, "total_steps": 504, "loss": 0.4863, "lr": 1.4670549065952552e-06, "epoch": 1.4880952380952381, "percentage": 49.6, "elapsed_time": "2:19:55", "remaining_time": "2:22:09"}
27
- {"current_steps": 260, "total_steps": 504, "loss": 0.4845, "lr": 1.4138223258333096e-06, "epoch": 1.5476190476190477, "percentage": 51.59, "elapsed_time": "2:25:24", "remaining_time": "2:16:27"}
28
- {"current_steps": 270, "total_steps": 504, "loss": 0.4823, "lr": 1.3597074978591206e-06, "epoch": 1.6071428571428572, "percentage": 53.57, "elapsed_time": "2:30:53", "remaining_time": "2:10:46"}
29
- {"current_steps": 280, "total_steps": 504, "loss": 0.4826, "lr": 1.3050018521581279e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "2:36:21", "remaining_time": "2:05:05"}
30
- {"current_steps": 290, "total_steps": 504, "loss": 0.4817, "lr": 1.2499999999999999e-06, "epoch": 1.7261904761904763, "percentage": 57.54, "elapsed_time": "2:41:50", "remaining_time": "1:59:25"}
31
- {"current_steps": 300, "total_steps": 504, "loss": 0.4763, "lr": 1.1949981478418721e-06, "epoch": 1.7857142857142856, "percentage": 59.52, "elapsed_time": "2:47:19", "remaining_time": "1:53:46"}
32
- {"current_steps": 310, "total_steps": 504, "loss": 0.4751, "lr": 1.1402925021408796e-06, "epoch": 1.8452380952380953, "percentage": 61.51, "elapsed_time": "2:52:47", "remaining_time": "1:48:07"}
33
- {"current_steps": 320, "total_steps": 504, "loss": 0.4743, "lr": 1.0861776741666901e-06, "epoch": 1.9047619047619047, "percentage": 63.49, "elapsed_time": "2:58:16", "remaining_time": "1:42:30"}
34
- {"current_steps": 330, "total_steps": 504, "loss": 0.4738, "lr": 1.032945093404745e-06, "epoch": 1.9642857142857144, "percentage": 65.48, "elapsed_time": "3:03:45", "remaining_time": "1:36:53"}
35
- {"current_steps": 336, "total_steps": 504, "eval_loss": 0.061109066009521484, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "3:08:57", "remaining_time": "1:34:28"}
36
- {"current_steps": 340, "total_steps": 504, "loss": 0.4626, "lr": 9.80881438094648e-07, "epoch": 2.0238095238095237, "percentage": 67.46, "elapsed_time": "3:11:58", "remaining_time": "1:32:36"}
37
- {"current_steps": 350, "total_steps": 504, "loss": 0.4514, "lr": 9.302670913558731e-07, "epoch": 2.0833333333333335, "percentage": 69.44, "elapsed_time": "3:17:27", "remaining_time": "1:26:52"}
38
- {"current_steps": 360, "total_steps": 504, "loss": 0.4466, "lr": 8.813746312151494e-07, "epoch": 2.142857142857143, "percentage": 71.43, "elapsed_time": "3:22:55", "remaining_time": "1:21:10"}
39
- {"current_steps": 370, "total_steps": 504, "loss": 0.4461, "lr": 8.344673626673205e-07, "epoch": 2.2023809523809526, "percentage": 73.41, "elapsed_time": "3:28:24", "remaining_time": "1:15:28"}
40
- {"current_steps": 380, "total_steps": 504, "loss": 0.4491, "lr": 7.897978996751046e-07, "epoch": 2.261904761904762, "percentage": 75.4, "elapsed_time": "3:33:53", "remaining_time": "1:09:47"}
41
- {"current_steps": 390, "total_steps": 504, "loss": 0.4443, "lr": 7.476068047442332e-07, "epoch": 2.3214285714285716, "percentage": 77.38, "elapsed_time": "3:39:22", "remaining_time": "1:04:07"}
42
- {"current_steps": 400, "total_steps": 504, "loss": 0.4435, "lr": 7.081212934003984e-07, "epoch": 2.380952380952381, "percentage": 79.37, "elapsed_time": "3:44:49", "remaining_time": "0:58:27"}
43
- {"current_steps": 410, "total_steps": 504, "loss": 0.4428, "lr": 6.715540105448972e-07, "epoch": 2.4404761904761907, "percentage": 81.35, "elapsed_time": "3:50:17", "remaining_time": "0:52:47"}
44
- {"current_steps": 420, "total_steps": 504, "loss": 0.4427, "lr": 6.381018852787821e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "3:55:46", "remaining_time": "0:47:09"}
45
- {"current_steps": 430, "total_steps": 504, "loss": 0.4443, "lr": 6.079450703627314e-07, "epoch": 2.5595238095238093, "percentage": 85.32, "elapsed_time": "4:01:14", "remaining_time": "0:41:30"}
46
- {"current_steps": 440, "total_steps": 504, "loss": 0.4448, "lr": 5.812459720240681e-07, "epoch": 2.619047619047619, "percentage": 87.3, "elapsed_time": "4:06:43", "remaining_time": "0:35:53"}
47
- {"current_steps": 450, "total_steps": 504, "loss": 0.4425, "lr": 5.581483753357905e-07, "epoch": 2.678571428571429, "percentage": 89.29, "elapsed_time": "4:12:12", "remaining_time": "0:30:15"}
48
- {"current_steps": 460, "total_steps": 504, "loss": 0.4438, "lr": 5.387766698777935e-07, "epoch": 2.738095238095238, "percentage": 91.27, "elapsed_time": "4:17:41", "remaining_time": "0:24:38"}
49
- {"current_steps": 470, "total_steps": 504, "loss": 0.4423, "lr": 5.232351798503945e-07, "epoch": 2.7976190476190474, "percentage": 93.25, "elapsed_time": "4:23:10", "remaining_time": "0:19:02"}
50
- {"current_steps": 480, "total_steps": 504, "loss": 0.4416, "lr": 5.116076022477671e-07, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "4:28:39", "remaining_time": "0:13:25"}
51
- {"current_steps": 490, "total_steps": 504, "loss": 0.4385, "lr": 5.039565561169362e-07, "epoch": 2.9166666666666665, "percentage": 97.22, "elapsed_time": "4:34:08", "remaining_time": "0:07:49"}
52
- {"current_steps": 500, "total_steps": 504, "loss": 0.4405, "lr": 5.003232453297512e-07, "epoch": 2.9761904761904763, "percentage": 99.21, "elapsed_time": "4:39:37", "remaining_time": "0:02:14"}
53
- {"current_steps": 504, "total_steps": 504, "eval_loss": 0.05991922318935394, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "4:44:35", "remaining_time": "0:00:00"}
54
- {"current_steps": 504, "total_steps": 504, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "4:45:17", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 504, "loss": 0.8828, "lr": 2.631578947368421e-07, "epoch": 0.05952380952380952, "percentage": 1.98, "elapsed_time": "0:05:39", "remaining_time": "4:39:47"}
2
+ {"current_steps": 20, "total_steps": 504, "loss": 0.8087, "lr": 5.263157894736842e-07, "epoch": 0.11904761904761904, "percentage": 3.97, "elapsed_time": "0:11:09", "remaining_time": "4:30:12"}
3
+ {"current_steps": 30, "total_steps": 504, "loss": 0.7295, "lr": 7.894736842105263e-07, "epoch": 0.17857142857142858, "percentage": 5.95, "elapsed_time": "0:16:38", "remaining_time": "4:22:50"}
4
+ {"current_steps": 40, "total_steps": 504, "loss": 0.6712, "lr": 1.0526315789473683e-06, "epoch": 0.23809523809523808, "percentage": 7.94, "elapsed_time": "0:22:07", "remaining_time": "4:16:44"}
5
+ {"current_steps": 50, "total_steps": 504, "loss": 0.6325, "lr": 1.3157894736842106e-06, "epoch": 0.2976190476190476, "percentage": 9.92, "elapsed_time": "0:27:36", "remaining_time": "4:10:43"}
6
+ {"current_steps": 60, "total_steps": 504, "loss": 0.6019, "lr": 1.5789473684210526e-06, "epoch": 0.35714285714285715, "percentage": 11.9, "elapsed_time": "0:33:05", "remaining_time": "4:04:50"}
7
+ {"current_steps": 70, "total_steps": 504, "loss": 0.5851, "lr": 1.8421052631578946e-06, "epoch": 0.4166666666666667, "percentage": 13.89, "elapsed_time": "0:38:34", "remaining_time": "3:59:07"}
8
+ {"current_steps": 80, "total_steps": 504, "loss": 0.5748, "lr": 1.9996767546702485e-06, "epoch": 0.47619047619047616, "percentage": 15.87, "elapsed_time": "0:44:02", "remaining_time": "3:53:23"}
9
+ {"current_steps": 90, "total_steps": 504, "loss": 0.5678, "lr": 1.996043443883064e-06, "epoch": 0.5357142857142857, "percentage": 17.86, "elapsed_time": "0:49:31", "remaining_time": "3:47:49"}
10
+ {"current_steps": 100, "total_steps": 504, "loss": 0.5575, "lr": 1.988392397752233e-06, "epoch": 0.5952380952380952, "percentage": 19.84, "elapsed_time": "0:55:00", "remaining_time": "3:42:15"}
11
+ {"current_steps": 110, "total_steps": 504, "loss": 0.5551, "lr": 1.9767648201496052e-06, "epoch": 0.6547619047619048, "percentage": 21.83, "elapsed_time": "1:00:30", "remaining_time": "3:36:42"}
12
+ {"current_steps": 120, "total_steps": 504, "loss": 0.5468, "lr": 1.961223330122206e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "1:05:57", "remaining_time": "3:31:05"}
13
+ {"current_steps": 130, "total_steps": 504, "loss": 0.5452, "lr": 1.941851624664209e-06, "epoch": 0.7738095238095238, "percentage": 25.79, "elapsed_time": "1:11:27", "remaining_time": "3:25:34"}
14
+ {"current_steps": 140, "total_steps": 504, "loss": 0.5381, "lr": 1.9187540279759314e-06, "epoch": 0.8333333333333334, "percentage": 27.78, "elapsed_time": "1:16:56", "remaining_time": "3:20:01"}
15
+ {"current_steps": 150, "total_steps": 504, "loss": 0.5341, "lr": 1.8920549296372686e-06, "epoch": 0.8928571428571429, "percentage": 29.76, "elapsed_time": "1:22:26", "remaining_time": "3:14:33"}
16
+ {"current_steps": 160, "total_steps": 504, "loss": 0.5294, "lr": 1.861898114721218e-06, "epoch": 0.9523809523809523, "percentage": 31.75, "elapsed_time": "1:27:55", "remaining_time": "3:09:03"}
17
+ {"current_steps": 168, "total_steps": 504, "eval_loss": 0.06584873795509338, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "1:34:14", "remaining_time": "3:08:28"}
18
+ {"current_steps": 170, "total_steps": 504, "loss": 0.5242, "lr": 1.8284459894551025e-06, "epoch": 1.0119047619047619, "percentage": 33.73, "elapsed_time": "1:36:04", "remaining_time": "3:08:46"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:081d29460e3c00397f114a395be1a1582876ee72eb0ad4177baaffb8b3447791
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6f96eaf6461b1b291c755e7e3b767fac0e0f608e769d896ea52944015ebaf6
3
  size 7288