sedrickkeh commited on
Commit
8bb843c
1 Parent(s): a6f3686

Training in progress, epoch 2

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f3a107c6e0ed1fbc7b3549332ebb23acf5adcaf57da83b02a5028d2d89a35af
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:577eff2dccce1fdd8c57437ed22f7c59d51919116fcabe7244f3aac41168a366
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9539d496e4ecc57d57646198b1bf89e75c324985231a43339485b45aeaf0451
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d1c1caebb7ccb1db823366f8e271ed35e4901c28267cc4d9e358a5808dd7937
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22df2dc13287015c1f94270ab9681fe415da739e2d8f7b9d9f6040023318e623
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73780b4fc568926cfa4e3f8207adbd23922720b4dbee3145c01be39cafe2ba44
3
  size 4540516344
trainer_log.jsonl CHANGED
@@ -134,3 +134,70 @@
134
  {"current_steps": 1330, "total_steps": 2013, "loss": 0.3556, "lr": 1.7742374487261275e-06, "epoch": 1.9804651162790696, "percentage": 66.07, "elapsed_time": "12:01:54", "remaining_time": "6:10:43"}
135
  {"current_steps": 1340, "total_steps": 2013, "loss": 0.3593, "lr": 1.7410584567235063e-06, "epoch": 1.9953488372093022, "percentage": 66.57, "elapsed_time": "12:07:16", "remaining_time": "6:05:15"}
136
  {"current_steps": 1342, "total_steps": 2013, "eval_loss": 0.05562544986605644, "epoch": 1.9983255813953489, "percentage": 66.67, "elapsed_time": "12:16:08", "remaining_time": "6:08:04"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  {"current_steps": 1330, "total_steps": 2013, "loss": 0.3556, "lr": 1.7742374487261275e-06, "epoch": 1.9804651162790696, "percentage": 66.07, "elapsed_time": "12:01:54", "remaining_time": "6:10:43"}
135
  {"current_steps": 1340, "total_steps": 2013, "loss": 0.3593, "lr": 1.7410584567235063e-06, "epoch": 1.9953488372093022, "percentage": 66.57, "elapsed_time": "12:07:16", "remaining_time": "6:05:15"}
136
  {"current_steps": 1342, "total_steps": 2013, "eval_loss": 0.05562544986605644, "epoch": 1.9983255813953489, "percentage": 66.67, "elapsed_time": "12:16:08", "remaining_time": "6:08:04"}
137
+ {"current_steps": 1350, "total_steps": 2013, "loss": 0.2974, "lr": 1.7081518478491024e-06, "epoch": 2.0111627906976746, "percentage": 67.06, "elapsed_time": "12:21:16", "remaining_time": "6:04:03"}
138
+ {"current_steps": 1360, "total_steps": 2013, "loss": 0.2705, "lr": 1.6755265058733625e-06, "epoch": 2.026046511627907, "percentage": 67.56, "elapsed_time": "12:26:36", "remaining_time": "5:58:28"}
139
+ {"current_steps": 1370, "total_steps": 2013, "loss": 0.2715, "lr": 1.6431912386333337e-06, "epoch": 2.0409302325581393, "percentage": 68.06, "elapsed_time": "12:31:58", "remaining_time": "5:52:56"}
140
+ {"current_steps": 1380, "total_steps": 2013, "loss": 0.2711, "lr": 1.61115477565483e-06, "epoch": 2.055813953488372, "percentage": 68.55, "elapsed_time": "12:37:21", "remaining_time": "5:47:23"}
141
+ {"current_steps": 1390, "total_steps": 2013, "loss": 0.2678, "lr": 1.5794257657957149e-06, "epoch": 2.0706976744186045, "percentage": 69.05, "elapsed_time": "12:42:42", "remaining_time": "5:41:50"}
142
+ {"current_steps": 1400, "total_steps": 2013, "loss": 0.2715, "lr": 1.5480127749109867e-06, "epoch": 2.085581395348837, "percentage": 69.55, "elapsed_time": "12:48:04", "remaining_time": "5:36:18"}
143
+ {"current_steps": 1410, "total_steps": 2013, "loss": 0.268, "lr": 1.516924283540257e-06, "epoch": 2.1004651162790697, "percentage": 70.04, "elapsed_time": "12:53:26", "remaining_time": "5:30:46"}
144
+ {"current_steps": 1420, "total_steps": 2013, "loss": 0.2666, "lr": 1.486168684618268e-06, "epoch": 2.1153488372093023, "percentage": 70.54, "elapsed_time": "12:58:49", "remaining_time": "5:25:14"}
145
+ {"current_steps": 1430, "total_steps": 2013, "loss": 0.2698, "lr": 1.4557542812090574e-06, "epoch": 2.130232558139535, "percentage": 71.04, "elapsed_time": "13:04:09", "remaining_time": "5:19:41"}
146
+ {"current_steps": 1440, "total_steps": 2013, "loss": 0.2675, "lr": 1.4256892842643893e-06, "epoch": 2.1451162790697675, "percentage": 71.54, "elapsed_time": "13:09:31", "remaining_time": "5:14:09"}
147
+ {"current_steps": 1450, "total_steps": 2013, "loss": 0.2687, "lr": 1.3959818104070452e-06, "epoch": 2.16, "percentage": 72.03, "elapsed_time": "13:14:54", "remaining_time": "5:08:38"}
148
+ {"current_steps": 1460, "total_steps": 2013, "loss": 0.2707, "lr": 1.3666398797395948e-06, "epoch": 2.1748837209302327, "percentage": 72.53, "elapsed_time": "13:20:14", "remaining_time": "5:03:06"}
149
+ {"current_steps": 1470, "total_steps": 2013, "loss": 0.2665, "lr": 1.3376714136792034e-06, "epoch": 2.1897674418604653, "percentage": 73.03, "elapsed_time": "13:25:34", "remaining_time": "4:57:34"}
150
+ {"current_steps": 1480, "total_steps": 2013, "loss": 0.2699, "lr": 1.3090842328191053e-06, "epoch": 2.2046511627906975, "percentage": 73.52, "elapsed_time": "13:30:57", "remaining_time": "4:52:03"}
151
+ {"current_steps": 1490, "total_steps": 2013, "loss": 0.2709, "lr": 1.280886054817277e-06, "epoch": 2.21953488372093, "percentage": 74.02, "elapsed_time": "13:36:19", "remaining_time": "4:46:32"}
152
+ {"current_steps": 1500, "total_steps": 2013, "loss": 0.2712, "lr": 1.2530844923129096e-06, "epoch": 2.2344186046511627, "percentage": 74.52, "elapsed_time": "13:41:41", "remaining_time": "4:41:00"}
153
+ {"current_steps": 1510, "total_steps": 2013, "loss": 0.2705, "lr": 1.225687050871231e-06, "epoch": 2.2493023255813953, "percentage": 75.01, "elapsed_time": "13:47:03", "remaining_time": "4:35:30"}
154
+ {"current_steps": 1520, "total_steps": 2013, "loss": 0.2701, "lr": 1.1987011269572357e-06, "epoch": 2.264186046511628, "percentage": 75.51, "elapsed_time": "13:52:25", "remaining_time": "4:29:59"}
155
+ {"current_steps": 1530, "total_steps": 2013, "loss": 0.2672, "lr": 1.1721340059388617e-06, "epoch": 2.2790697674418605, "percentage": 76.01, "elapsed_time": "13:57:47", "remaining_time": "4:24:28"}
156
+ {"current_steps": 1540, "total_steps": 2013, "loss": 0.2696, "lr": 1.1459928601201756e-06, "epoch": 2.293953488372093, "percentage": 76.5, "elapsed_time": "14:03:08", "remaining_time": "4:18:57"}
157
+ {"current_steps": 1550, "total_steps": 2013, "loss": 0.2689, "lr": 1.1202847468050597e-06, "epoch": 2.3088372093023257, "percentage": 77.0, "elapsed_time": "14:08:30", "remaining_time": "4:13:27"}
158
+ {"current_steps": 1560, "total_steps": 2013, "loss": 0.2701, "lr": 1.0950166063919694e-06, "epoch": 2.3237209302325583, "percentage": 77.5, "elapsed_time": "14:13:51", "remaining_time": "4:07:56"}
159
+ {"current_steps": 1570, "total_steps": 2013, "loss": 0.2676, "lr": 1.0701952605002275e-06, "epoch": 2.3386046511627905, "percentage": 77.99, "elapsed_time": "14:19:12", "remaining_time": "4:02:26"}
160
+ {"current_steps": 1580, "total_steps": 2013, "loss": 0.2661, "lr": 1.045827410128407e-06, "epoch": 2.353488372093023, "percentage": 78.49, "elapsed_time": "14:24:33", "remaining_time": "3:56:56"}
161
+ {"current_steps": 1590, "total_steps": 2013, "loss": 0.2689, "lr": 1.0219196338452623e-06, "epoch": 2.3683720930232557, "percentage": 78.99, "elapsed_time": "14:29:55", "remaining_time": "3:51:25"}
162
+ {"current_steps": 1600, "total_steps": 2013, "loss": 0.2676, "lr": 9.984783860137213e-07, "epoch": 2.3832558139534883, "percentage": 79.48, "elapsed_time": "14:35:15", "remaining_time": "3:45:55"}
163
+ {"current_steps": 1610, "total_steps": 2013, "loss": 0.2681, "lr": 9.75509995048404e-07, "epoch": 2.398139534883721, "percentage": 79.98, "elapsed_time": "14:40:37", "remaining_time": "3:40:25"}
164
+ {"current_steps": 1620, "total_steps": 2013, "loss": 0.2695, "lr": 9.53020661707148e-07, "epoch": 2.4130232558139535, "percentage": 80.48, "elapsed_time": "14:45:58", "remaining_time": "3:34:55"}
165
+ {"current_steps": 1630, "total_steps": 2013, "loss": 0.2661, "lr": 9.310164574169911e-07, "epoch": 2.427906976744186, "percentage": 80.97, "elapsed_time": "14:51:18", "remaining_time": "3:29:25"}
166
+ {"current_steps": 1640, "total_steps": 2013, "loss": 0.2682, "lr": 9.095033226350787e-07, "epoch": 2.4427906976744187, "percentage": 81.47, "elapsed_time": "14:56:39", "remaining_time": "3:23:56"}
167
+ {"current_steps": 1650, "total_steps": 2013, "loss": 0.2683, "lr": 8.884870652449176e-07, "epoch": 2.4576744186046513, "percentage": 81.97, "elapsed_time": "15:02:00", "remaining_time": "3:18:26"}
168
+ {"current_steps": 1660, "total_steps": 2013, "loss": 0.2676, "lr": 8.679733589884308e-07, "epoch": 2.472558139534884, "percentage": 82.46, "elapsed_time": "15:07:21", "remaining_time": "3:12:57"}
169
+ {"current_steps": 1670, "total_steps": 2013, "loss": 0.2675, "lr": 8.479677419342195e-07, "epoch": 2.4874418604651165, "percentage": 82.96, "elapsed_time": "15:12:43", "remaining_time": "3:07:27"}
170
+ {"current_steps": 1680, "total_steps": 2013, "loss": 0.2691, "lr": 8.284756149824561e-07, "epoch": 2.5023255813953487, "percentage": 83.46, "elapsed_time": "15:18:04", "remaining_time": "3:01:58"}
171
+ {"current_steps": 1690, "total_steps": 2013, "loss": 0.269, "lr": 8.095022404068078e-07, "epoch": 2.5172093023255813, "percentage": 83.95, "elapsed_time": "15:23:24", "remaining_time": "2:56:29"}
172
+ {"current_steps": 1700, "total_steps": 2013, "loss": 0.2687, "lr": 7.910527404337846e-07, "epoch": 2.532093023255814, "percentage": 84.45, "elapsed_time": "15:28:47", "remaining_time": "2:51:00"}
173
+ {"current_steps": 1710, "total_steps": 2013, "loss": 0.2687, "lr": 7.731320958598944e-07, "epoch": 2.5469767441860465, "percentage": 84.95, "elapsed_time": "15:34:09", "remaining_time": "2:45:31"}
174
+ {"current_steps": 1720, "total_steps": 2013, "loss": 0.2686, "lr": 7.557451447069862e-07, "epoch": 2.561860465116279, "percentage": 85.44, "elapsed_time": "15:39:30", "remaining_time": "2:40:02"}
175
+ {"current_steps": 1730, "total_steps": 2013, "loss": 0.273, "lr": 7.388965809161264e-07, "epoch": 2.5767441860465117, "percentage": 85.94, "elapsed_time": "15:44:51", "remaining_time": "2:34:33"}
176
+ {"current_steps": 1740, "total_steps": 2013, "loss": 0.2702, "lr": 7.225909530803849e-07, "epoch": 2.5916279069767443, "percentage": 86.44, "elapsed_time": "15:50:12", "remaining_time": "2:29:05"}
177
+ {"current_steps": 1750, "total_steps": 2013, "loss": 0.2682, "lr": 7.068326632168529e-07, "epoch": 2.606511627906977, "percentage": 86.93, "elapsed_time": "15:55:34", "remaining_time": "2:23:36"}
178
+ {"current_steps": 1760, "total_steps": 2013, "loss": 0.268, "lr": 6.91625965578234e-07, "epoch": 2.6213953488372095, "percentage": 87.43, "elapsed_time": "16:00:57", "remaining_time": "2:18:08"}
179
+ {"current_steps": 1770, "total_steps": 2013, "loss": 0.2678, "lr": 6.769749655043278e-07, "epoch": 2.6362790697674416, "percentage": 87.93, "elapsed_time": "16:06:18", "remaining_time": "2:12:39"}
180
+ {"current_steps": 1780, "total_steps": 2013, "loss": 0.2701, "lr": 6.628836183137136e-07, "epoch": 2.6511627906976747, "percentage": 88.43, "elapsed_time": "16:11:39", "remaining_time": "2:07:11"}
181
+ {"current_steps": 1790, "total_steps": 2013, "loss": 0.2687, "lr": 6.493557282359362e-07, "epoch": 2.666046511627907, "percentage": 88.92, "elapsed_time": "16:16:59", "remaining_time": "2:01:42"}
182
+ {"current_steps": 1800, "total_steps": 2013, "loss": 0.268, "lr": 6.363949473844831e-07, "epoch": 2.6809302325581394, "percentage": 89.42, "elapsed_time": "16:22:19", "remaining_time": "1:56:14"}
183
+ {"current_steps": 1810, "total_steps": 2013, "loss": 0.2677, "lr": 6.240047747708234e-07, "epoch": 2.695813953488372, "percentage": 89.92, "elapsed_time": "16:27:41", "remaining_time": "1:50:46"}
184
+ {"current_steps": 1820, "total_steps": 2013, "loss": 0.2681, "lr": 6.121885553597864e-07, "epoch": 2.7106976744186047, "percentage": 90.41, "elapsed_time": "16:33:02", "remaining_time": "1:45:18"}
185
+ {"current_steps": 1830, "total_steps": 2013, "loss": 0.2696, "lr": 6.009494791665193e-07, "epoch": 2.7255813953488373, "percentage": 90.91, "elapsed_time": "16:38:25", "remaining_time": "1:39:50"}
186
+ {"current_steps": 1840, "total_steps": 2013, "loss": 0.2707, "lr": 5.902905803952853e-07, "epoch": 2.74046511627907, "percentage": 91.41, "elapsed_time": "16:43:44", "remaining_time": "1:34:22"}
187
+ {"current_steps": 1850, "total_steps": 2013, "loss": 0.2682, "lr": 5.802147366203209e-07, "epoch": 2.7553488372093025, "percentage": 91.9, "elapsed_time": "16:49:05", "remaining_time": "1:28:54"}
188
+ {"current_steps": 1860, "total_steps": 2013, "loss": 0.2682, "lr": 5.707246680089786e-07, "epoch": 2.7702325581395346, "percentage": 92.4, "elapsed_time": "16:54:26", "remaining_time": "1:23:26"}
189
+ {"current_steps": 1870, "total_steps": 2013, "loss": 0.2679, "lr": 5.618229365873664e-07, "epoch": 2.7851162790697677, "percentage": 92.9, "elapsed_time": "16:59:45", "remaining_time": "1:17:58"}
190
+ {"current_steps": 1880, "total_steps": 2013, "loss": 0.2671, "lr": 5.535119455486798e-07, "epoch": 2.8, "percentage": 93.39, "elapsed_time": "17:05:06", "remaining_time": "1:12:31"}
191
+ {"current_steps": 1890, "total_steps": 2013, "loss": 0.2691, "lr": 5.457939386044124e-07, "epoch": 2.8148837209302324, "percentage": 93.89, "elapsed_time": "17:10:28", "remaining_time": "1:07:03"}
192
+ {"current_steps": 1900, "total_steps": 2013, "loss": 0.2691, "lr": 5.386709993786254e-07, "epoch": 2.829767441860465, "percentage": 94.39, "elapsed_time": "17:15:49", "remaining_time": "1:01:36"}
193
+ {"current_steps": 1910, "total_steps": 2013, "loss": 0.2678, "lr": 5.321450508454304e-07, "epoch": 2.8446511627906976, "percentage": 94.88, "elapsed_time": "17:21:09", "remaining_time": "0:56:08"}
194
+ {"current_steps": 1920, "total_steps": 2013, "loss": 0.2668, "lr": 5.262178548098479e-07, "epoch": 2.8595348837209302, "percentage": 95.38, "elapsed_time": "17:26:31", "remaining_time": "0:50:41"}
195
+ {"current_steps": 1930, "total_steps": 2013, "loss": 0.2662, "lr": 5.208910114321729e-07, "epoch": 2.874418604651163, "percentage": 95.88, "elapsed_time": "17:31:50", "remaining_time": "0:45:14"}
196
+ {"current_steps": 1940, "total_steps": 2013, "loss": 0.2669, "lr": 5.161659587959818e-07, "epoch": 2.8893023255813954, "percentage": 96.37, "elapsed_time": "17:37:09", "remaining_time": "0:39:46"}
197
+ {"current_steps": 1950, "total_steps": 2013, "loss": 0.2663, "lr": 5.120439725198932e-07, "epoch": 2.904186046511628, "percentage": 96.87, "elapsed_time": "17:42:31", "remaining_time": "0:34:19"}
198
+ {"current_steps": 1960, "total_steps": 2013, "loss": 0.2632, "lr": 5.085261654131918e-07, "epoch": 2.9190697674418606, "percentage": 97.37, "elapsed_time": "17:47:51", "remaining_time": "0:28:52"}
199
+ {"current_steps": 1970, "total_steps": 2013, "loss": 0.2705, "lr": 5.056134871754014e-07, "epoch": 2.933953488372093, "percentage": 97.86, "elapsed_time": "17:53:13", "remaining_time": "0:23:25"}
200
+ {"current_steps": 1980, "total_steps": 2013, "loss": 0.2675, "lr": 5.03306724139899e-07, "epoch": 2.948837209302326, "percentage": 98.36, "elapsed_time": "17:58:31", "remaining_time": "0:17:58"}
201
+ {"current_steps": 1990, "total_steps": 2013, "loss": 0.2652, "lr": 5.016064990616251e-07, "epoch": 2.963720930232558, "percentage": 98.86, "elapsed_time": "18:03:52", "remaining_time": "0:12:31"}
202
+ {"current_steps": 2000, "total_steps": 2013, "loss": 0.2666, "lr": 5.005132709489625e-07, "epoch": 2.9786046511627906, "percentage": 99.35, "elapsed_time": "18:09:14", "remaining_time": "0:07:04"}
203
+ {"current_steps": 2010, "total_steps": 2013, "loss": 0.2664, "lr": 5.000273349398159e-07, "epoch": 2.993488372093023, "percentage": 99.85, "elapsed_time": "18:14:34", "remaining_time": "0:01:38"}