sedrickkeh commited on
Commit
5926430
1 Parent(s): ab8e5ce

Training in progress, epoch 1

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edb96e1d96439883184a705d183e4727b2fa1ffaeb2cfe902ad47c9678db072b
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ceb83a822514418c85c85f11a2afcf83106708f1601b8d708d1489979762468
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbc1619336dbf832289dfa4c1dc5ad4c6c441a867be50ac31a67484c25a1bc28
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6943bf53c42c4c17f1d40a763d87ae3f4e6a413415d5ae62bda815cb6f864a52
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaa0f019abfca79b870910b4e63efcb91487a12442e726054fa4ddc6d4c38ca1
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653a97732bbf16a96518bca56c8daf90f3eb0cd0c43a5b170899a370169a6b6f
3
  size 4540516344
trainer_log.jsonl CHANGED
@@ -48,3 +48,54 @@
48
  {"current_steps": 480, "total_steps": 1479, "loss": 0.5568, "learning_rate": 8e-06, "epoch": 0.9721518987341772, "percentage": 32.45, "elapsed_time": "7:28:19", "remaining_time": "15:33:04"}
49
  {"current_steps": 490, "total_steps": 1479, "loss": 0.554, "learning_rate": 8e-06, "epoch": 0.9924050632911392, "percentage": 33.13, "elapsed_time": "7:37:41", "remaining_time": "15:23:46"}
50
  {"current_steps": 493, "total_steps": 1479, "eval_loss": 0.06964311003684998, "epoch": 0.9984810126582279, "percentage": 33.33, "elapsed_time": "7:49:38", "remaining_time": "15:39:17"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  {"current_steps": 480, "total_steps": 1479, "loss": 0.5568, "learning_rate": 8e-06, "epoch": 0.9721518987341772, "percentage": 32.45, "elapsed_time": "7:28:19", "remaining_time": "15:33:04"}
49
  {"current_steps": 490, "total_steps": 1479, "loss": 0.554, "learning_rate": 8e-06, "epoch": 0.9924050632911392, "percentage": 33.13, "elapsed_time": "7:37:41", "remaining_time": "15:23:46"}
50
  {"current_steps": 493, "total_steps": 1479, "eval_loss": 0.06964311003684998, "epoch": 0.9984810126582279, "percentage": 33.33, "elapsed_time": "7:49:38", "remaining_time": "15:39:17"}
51
+ {"current_steps": 500, "total_steps": 1479, "loss": 0.4992, "learning_rate": 8e-06, "epoch": 1.0126582278481013, "percentage": 33.81, "elapsed_time": "7:56:36", "remaining_time": "15:33:12"}
52
+ {"current_steps": 510, "total_steps": 1479, "loss": 0.4536, "learning_rate": 8e-06, "epoch": 1.0329113924050632, "percentage": 34.48, "elapsed_time": "8:05:58", "remaining_time": "15:23:21"}
53
+ {"current_steps": 520, "total_steps": 1479, "loss": 0.447, "learning_rate": 8e-06, "epoch": 1.0531645569620254, "percentage": 35.16, "elapsed_time": "8:15:20", "remaining_time": "15:13:31"}
54
+ {"current_steps": 530, "total_steps": 1479, "loss": 0.4475, "learning_rate": 8e-06, "epoch": 1.0734177215189873, "percentage": 35.84, "elapsed_time": "8:24:41", "remaining_time": "15:03:41"}
55
+ {"current_steps": 540, "total_steps": 1479, "loss": 0.4429, "learning_rate": 8e-06, "epoch": 1.0936708860759494, "percentage": 36.51, "elapsed_time": "8:34:01", "remaining_time": "14:53:50"}
56
+ {"current_steps": 550, "total_steps": 1479, "loss": 0.4466, "learning_rate": 8e-06, "epoch": 1.1139240506329113, "percentage": 37.19, "elapsed_time": "8:43:21", "remaining_time": "14:44:00"}
57
+ {"current_steps": 560, "total_steps": 1479, "loss": 0.4446, "learning_rate": 8e-06, "epoch": 1.1341772151898735, "percentage": 37.86, "elapsed_time": "8:52:43", "remaining_time": "14:34:14"}
58
+ {"current_steps": 570, "total_steps": 1479, "loss": 0.4521, "learning_rate": 8e-06, "epoch": 1.1544303797468354, "percentage": 38.54, "elapsed_time": "9:02:05", "remaining_time": "14:24:30"}
59
+ {"current_steps": 580, "total_steps": 1479, "loss": 0.4521, "learning_rate": 8e-06, "epoch": 1.1746835443037975, "percentage": 39.22, "elapsed_time": "9:11:27", "remaining_time": "14:14:46"}
60
+ {"current_steps": 590, "total_steps": 1479, "loss": 0.4475, "learning_rate": 8e-06, "epoch": 1.1949367088607594, "percentage": 39.89, "elapsed_time": "9:20:49", "remaining_time": "14:05:02"}
61
+ {"current_steps": 600, "total_steps": 1479, "loss": 0.4561, "learning_rate": 8e-06, "epoch": 1.2151898734177216, "percentage": 40.57, "elapsed_time": "9:30:10", "remaining_time": "13:55:18"}
62
+ {"current_steps": 610, "total_steps": 1479, "loss": 0.4527, "learning_rate": 8e-06, "epoch": 1.2354430379746835, "percentage": 41.24, "elapsed_time": "9:39:31", "remaining_time": "13:45:35"}
63
+ {"current_steps": 620, "total_steps": 1479, "loss": 0.4563, "learning_rate": 8e-06, "epoch": 1.2556962025316456, "percentage": 41.92, "elapsed_time": "9:48:53", "remaining_time": "13:35:54"}
64
+ {"current_steps": 630, "total_steps": 1479, "loss": 0.4562, "learning_rate": 8e-06, "epoch": 1.2759493670886077, "percentage": 42.6, "elapsed_time": "9:58:14", "remaining_time": "13:26:12"}
65
+ {"current_steps": 640, "total_steps": 1479, "loss": 0.4556, "learning_rate": 8e-06, "epoch": 1.2962025316455696, "percentage": 43.27, "elapsed_time": "10:07:35", "remaining_time": "13:16:31"}
66
+ {"current_steps": 650, "total_steps": 1479, "loss": 0.4594, "learning_rate": 8e-06, "epoch": 1.3164556962025316, "percentage": 43.95, "elapsed_time": "10:16:57", "remaining_time": "13:06:51"}
67
+ {"current_steps": 660, "total_steps": 1479, "loss": 0.4487, "learning_rate": 8e-06, "epoch": 1.3367088607594937, "percentage": 44.62, "elapsed_time": "10:26:18", "remaining_time": "12:57:11"}
68
+ {"current_steps": 670, "total_steps": 1479, "loss": 0.4633, "learning_rate": 8e-06, "epoch": 1.3569620253164558, "percentage": 45.3, "elapsed_time": "10:35:39", "remaining_time": "12:47:32"}
69
+ {"current_steps": 680, "total_steps": 1479, "loss": 0.4604, "learning_rate": 8e-06, "epoch": 1.3772151898734177, "percentage": 45.98, "elapsed_time": "10:45:01", "remaining_time": "12:37:54"}
70
+ {"current_steps": 690, "total_steps": 1479, "loss": 0.4593, "learning_rate": 8e-06, "epoch": 1.3974683544303796, "percentage": 46.65, "elapsed_time": "10:54:22", "remaining_time": "12:28:15"}
71
+ {"current_steps": 700, "total_steps": 1479, "loss": 0.4561, "learning_rate": 8e-06, "epoch": 1.4177215189873418, "percentage": 47.33, "elapsed_time": "11:03:43", "remaining_time": "12:18:38"}
72
+ {"current_steps": 710, "total_steps": 1479, "loss": 0.4616, "learning_rate": 8e-06, "epoch": 1.437974683544304, "percentage": 48.01, "elapsed_time": "11:13:06", "remaining_time": "12:09:02"}
73
+ {"current_steps": 720, "total_steps": 1479, "loss": 0.4623, "learning_rate": 8e-06, "epoch": 1.4582278481012658, "percentage": 48.68, "elapsed_time": "11:22:28", "remaining_time": "11:59:26"}
74
+ {"current_steps": 730, "total_steps": 1479, "loss": 0.4659, "learning_rate": 8e-06, "epoch": 1.4784810126582277, "percentage": 49.36, "elapsed_time": "11:31:49", "remaining_time": "11:49:49"}
75
+ {"current_steps": 740, "total_steps": 1479, "loss": 0.4584, "learning_rate": 8e-06, "epoch": 1.4987341772151899, "percentage": 50.03, "elapsed_time": "11:41:11", "remaining_time": "11:40:14"}
76
+ {"current_steps": 750, "total_steps": 1479, "loss": 0.461, "learning_rate": 8e-06, "epoch": 1.518987341772152, "percentage": 50.71, "elapsed_time": "11:50:31", "remaining_time": "11:30:38"}
77
+ {"current_steps": 760, "total_steps": 1479, "loss": 0.4614, "learning_rate": 8e-06, "epoch": 1.539240506329114, "percentage": 51.39, "elapsed_time": "11:59:54", "remaining_time": "11:21:03"}
78
+ {"current_steps": 770, "total_steps": 1479, "loss": 0.4605, "learning_rate": 8e-06, "epoch": 1.5594936708860758, "percentage": 52.06, "elapsed_time": "12:09:14", "remaining_time": "11:11:28"}
79
+ {"current_steps": 780, "total_steps": 1479, "loss": 0.461, "learning_rate": 8e-06, "epoch": 1.579746835443038, "percentage": 52.74, "elapsed_time": "12:18:34", "remaining_time": "11:01:52"}
80
+ {"current_steps": 790, "total_steps": 1479, "loss": 0.464, "learning_rate": 8e-06, "epoch": 1.6, "percentage": 53.41, "elapsed_time": "12:27:55", "remaining_time": "10:52:18"}
81
+ {"current_steps": 800, "total_steps": 1479, "loss": 0.4585, "learning_rate": 8e-06, "epoch": 1.620253164556962, "percentage": 54.09, "elapsed_time": "12:37:16", "remaining_time": "10:42:44"}
82
+ {"current_steps": 810, "total_steps": 1479, "loss": 0.4505, "learning_rate": 8e-06, "epoch": 1.640506329113924, "percentage": 54.77, "elapsed_time": "12:46:38", "remaining_time": "10:33:10"}
83
+ {"current_steps": 820, "total_steps": 1479, "loss": 0.4582, "learning_rate": 8e-06, "epoch": 1.660759493670886, "percentage": 55.44, "elapsed_time": "12:55:59", "remaining_time": "10:23:38"}
84
+ {"current_steps": 830, "total_steps": 1479, "loss": 0.4546, "learning_rate": 8e-06, "epoch": 1.6810126582278482, "percentage": 56.12, "elapsed_time": "13:05:21", "remaining_time": "10:14:05"}
85
+ {"current_steps": 840, "total_steps": 1479, "loss": 0.457, "learning_rate": 8e-06, "epoch": 1.70126582278481, "percentage": 56.8, "elapsed_time": "13:14:43", "remaining_time": "10:04:33"}
86
+ {"current_steps": 850, "total_steps": 1479, "loss": 0.46, "learning_rate": 8e-06, "epoch": 1.721518987341772, "percentage": 57.47, "elapsed_time": "13:24:04", "remaining_time": "9:55:00"}
87
+ {"current_steps": 860, "total_steps": 1479, "loss": 0.4609, "learning_rate": 8e-06, "epoch": 1.7417721518987341, "percentage": 58.15, "elapsed_time": "13:33:26", "remaining_time": "9:45:29"}
88
+ {"current_steps": 870, "total_steps": 1479, "loss": 0.4656, "learning_rate": 8e-06, "epoch": 1.7620253164556963, "percentage": 58.82, "elapsed_time": "13:42:49", "remaining_time": "9:35:58"}
89
+ {"current_steps": 880, "total_steps": 1479, "loss": 0.4674, "learning_rate": 8e-06, "epoch": 1.7822784810126582, "percentage": 59.5, "elapsed_time": "13:52:10", "remaining_time": "9:26:27"}
90
+ {"current_steps": 890, "total_steps": 1479, "loss": 0.4653, "learning_rate": 8e-06, "epoch": 1.80253164556962, "percentage": 60.18, "elapsed_time": "14:01:33", "remaining_time": "9:16:56"}
91
+ {"current_steps": 900, "total_steps": 1479, "loss": 0.4643, "learning_rate": 8e-06, "epoch": 1.8227848101265822, "percentage": 60.85, "elapsed_time": "14:10:56", "remaining_time": "9:07:26"}
92
+ {"current_steps": 910, "total_steps": 1479, "loss": 0.4668, "learning_rate": 8e-06, "epoch": 1.8430379746835444, "percentage": 61.53, "elapsed_time": "14:20:17", "remaining_time": "8:57:55"}
93
+ {"current_steps": 920, "total_steps": 1479, "loss": 0.4652, "learning_rate": 8e-06, "epoch": 1.8632911392405065, "percentage": 62.2, "elapsed_time": "14:29:40", "remaining_time": "8:48:25"}
94
+ {"current_steps": 930, "total_steps": 1479, "loss": 0.4623, "learning_rate": 8e-06, "epoch": 1.8835443037974684, "percentage": 62.88, "elapsed_time": "14:39:03", "remaining_time": "8:38:55"}
95
+ {"current_steps": 940, "total_steps": 1479, "loss": 0.4659, "learning_rate": 8e-06, "epoch": 1.9037974683544303, "percentage": 63.56, "elapsed_time": "14:48:26", "remaining_time": "8:29:25"}
96
+ {"current_steps": 950, "total_steps": 1479, "loss": 0.4669, "learning_rate": 8e-06, "epoch": 1.9240506329113924, "percentage": 64.23, "elapsed_time": "14:57:48", "remaining_time": "8:19:56"}
97
+ {"current_steps": 960, "total_steps": 1479, "loss": 0.4708, "learning_rate": 8e-06, "epoch": 1.9443037974683546, "percentage": 64.91, "elapsed_time": "15:07:09", "remaining_time": "8:10:26"}
98
+ {"current_steps": 970, "total_steps": 1479, "loss": 0.4691, "learning_rate": 8e-06, "epoch": 1.9645569620253165, "percentage": 65.58, "elapsed_time": "15:16:31", "remaining_time": "8:00:56"}
99
+ {"current_steps": 980, "total_steps": 1479, "loss": 0.4701, "learning_rate": 8e-06, "epoch": 1.9848101265822784, "percentage": 66.26, "elapsed_time": "15:25:53", "remaining_time": "7:51:26"}
100
+ {"current_steps": 987, "total_steps": 1479, "eval_loss": 0.07048599421977997, "epoch": 1.998987341772152, "percentage": 66.73, "elapsed_time": "15:41:21", "remaining_time": "7:49:15"}
101
+ {"current_steps": 990, "total_steps": 1479, "loss": 0.4369, "learning_rate": 8e-06, "epoch": 2.0050632911392405, "percentage": 66.94, "elapsed_time": "15:44:49", "remaining_time": "7:46:41"}