Training in progress, step 700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 289512208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b9031cffc344e8840f4c7143987fa4e58be60af4e5110d63d50416a3f8b59f3
|
3 |
size 289512208
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 147781972
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3586c1b40b78d2d911170eb1a15bda1bb7e14d32d622befc2e331d34b42a7aff
|
3 |
size 147781972
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b885f396904d7214b2eeb837fb3989cd5db4deae210b67eca24ef3c766dfa24
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aa4ab3cdb1a9e7e00fd89c904cf6ae8c19a72f37f60ac96d0d021814a6f0bd4
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4263,6 +4263,714 @@
|
|
4263 |
"eval_samples_per_second": 4.035,
|
4264 |
"eval_steps_per_second": 1.009,
|
4265 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4266 |
}
|
4267 |
],
|
4268 |
"logging_steps": 1,
|
@@ -4291,7 +4999,7 @@
|
|
4291 |
"attributes": {}
|
4292 |
}
|
4293 |
},
|
4294 |
-
"total_flos": 3.
|
4295 |
"train_batch_size": 4,
|
4296 |
"trial_name": null,
|
4297 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.2046868801116943,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-700",
|
4 |
+
"epoch": 0.04449564975567121,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4263 |
"eval_samples_per_second": 4.035,
|
4264 |
"eval_steps_per_second": 1.009,
|
4265 |
"step": 600
|
4266 |
+
},
|
4267 |
+
{
|
4268 |
+
"epoch": 0.03820269357594057,
|
4269 |
+
"grad_norm": 0.2633255422115326,
|
4270 |
+
"learning_rate": 3.820213483066737e-05,
|
4271 |
+
"loss": 1.1605,
|
4272 |
+
"step": 601
|
4273 |
+
},
|
4274 |
+
{
|
4275 |
+
"epoch": 0.03826625878987724,
|
4276 |
+
"grad_norm": 0.24185825884342194,
|
4277 |
+
"learning_rate": 3.7904999416234864e-05,
|
4278 |
+
"loss": 1.2412,
|
4279 |
+
"step": 602
|
4280 |
+
},
|
4281 |
+
{
|
4282 |
+
"epoch": 0.03832982400381391,
|
4283 |
+
"grad_norm": 0.26332393288612366,
|
4284 |
+
"learning_rate": 3.7608753611846446e-05,
|
4285 |
+
"loss": 1.222,
|
4286 |
+
"step": 603
|
4287 |
+
},
|
4288 |
+
{
|
4289 |
+
"epoch": 0.03839338921775059,
|
4290 |
+
"grad_norm": 0.258789986371994,
|
4291 |
+
"learning_rate": 3.731340166169635e-05,
|
4292 |
+
"loss": 1.105,
|
4293 |
+
"step": 604
|
4294 |
+
},
|
4295 |
+
{
|
4296 |
+
"epoch": 0.03845695443168726,
|
4297 |
+
"grad_norm": 0.2542060315608978,
|
4298 |
+
"learning_rate": 3.701894779717286e-05,
|
4299 |
+
"loss": 1.149,
|
4300 |
+
"step": 605
|
4301 |
+
},
|
4302 |
+
{
|
4303 |
+
"epoch": 0.03852051964562393,
|
4304 |
+
"grad_norm": 0.2608526647090912,
|
4305 |
+
"learning_rate": 3.6725396236797935e-05,
|
4306 |
+
"loss": 1.1996,
|
4307 |
+
"step": 606
|
4308 |
+
},
|
4309 |
+
{
|
4310 |
+
"epoch": 0.03858408485956061,
|
4311 |
+
"grad_norm": 0.24727903306484222,
|
4312 |
+
"learning_rate": 3.64327511861663e-05,
|
4313 |
+
"loss": 1.1608,
|
4314 |
+
"step": 607
|
4315 |
+
},
|
4316 |
+
{
|
4317 |
+
"epoch": 0.038647650073497276,
|
4318 |
+
"grad_norm": 0.2504411041736603,
|
4319 |
+
"learning_rate": 3.614101683788575e-05,
|
4320 |
+
"loss": 1.1245,
|
4321 |
+
"step": 608
|
4322 |
+
},
|
4323 |
+
{
|
4324 |
+
"epoch": 0.03871121528743395,
|
4325 |
+
"grad_norm": 0.26074525713920593,
|
4326 |
+
"learning_rate": 3.585019737151669e-05,
|
4327 |
+
"loss": 1.1894,
|
4328 |
+
"step": 609
|
4329 |
+
},
|
4330 |
+
{
|
4331 |
+
"epoch": 0.03877478050137063,
|
4332 |
+
"grad_norm": 0.2569214105606079,
|
4333 |
+
"learning_rate": 3.5560296953512295e-05,
|
4334 |
+
"loss": 1.2079,
|
4335 |
+
"step": 610
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 0.038838345715307296,
|
4339 |
+
"grad_norm": 0.2611664831638336,
|
4340 |
+
"learning_rate": 3.52713197371591e-05,
|
4341 |
+
"loss": 1.1224,
|
4342 |
+
"step": 611
|
4343 |
+
},
|
4344 |
+
{
|
4345 |
+
"epoch": 0.03890191092924397,
|
4346 |
+
"grad_norm": 0.25584879517555237,
|
4347 |
+
"learning_rate": 3.498326986251717e-05,
|
4348 |
+
"loss": 1.3047,
|
4349 |
+
"step": 612
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 0.03896547614318065,
|
4353 |
+
"grad_norm": 0.24464906752109528,
|
4354 |
+
"learning_rate": 3.4696151456360956e-05,
|
4355 |
+
"loss": 1.0912,
|
4356 |
+
"step": 613
|
4357 |
+
},
|
4358 |
+
{
|
4359 |
+
"epoch": 0.039029041357117315,
|
4360 |
+
"grad_norm": 0.25548434257507324,
|
4361 |
+
"learning_rate": 3.4409968632120126e-05,
|
4362 |
+
"loss": 1.2166,
|
4363 |
+
"step": 614
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 0.03909260657105399,
|
4367 |
+
"grad_norm": 0.25605612993240356,
|
4368 |
+
"learning_rate": 3.4124725489820645e-05,
|
4369 |
+
"loss": 1.2629,
|
4370 |
+
"step": 615
|
4371 |
+
},
|
4372 |
+
{
|
4373 |
+
"epoch": 0.039156171784990666,
|
4374 |
+
"grad_norm": 0.24373793601989746,
|
4375 |
+
"learning_rate": 3.3840426116026044e-05,
|
4376 |
+
"loss": 1.1917,
|
4377 |
+
"step": 616
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 0.039219736998927335,
|
4381 |
+
"grad_norm": 0.24588941037654877,
|
4382 |
+
"learning_rate": 3.3557074583778814e-05,
|
4383 |
+
"loss": 1.286,
|
4384 |
+
"step": 617
|
4385 |
+
},
|
4386 |
+
{
|
4387 |
+
"epoch": 0.03928330221286401,
|
4388 |
+
"grad_norm": 0.25556549429893494,
|
4389 |
+
"learning_rate": 3.327467495254225e-05,
|
4390 |
+
"loss": 1.2295,
|
4391 |
+
"step": 618
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 0.039346867426800686,
|
4395 |
+
"grad_norm": 0.2570589780807495,
|
4396 |
+
"learning_rate": 3.299323126814191e-05,
|
4397 |
+
"loss": 1.2417,
|
4398 |
+
"step": 619
|
4399 |
+
},
|
4400 |
+
{
|
4401 |
+
"epoch": 0.039410432640737354,
|
4402 |
+
"grad_norm": 0.24832259118556976,
|
4403 |
+
"learning_rate": 3.2712747562708115e-05,
|
4404 |
+
"loss": 1.2996,
|
4405 |
+
"step": 620
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 0.03947399785467403,
|
4409 |
+
"grad_norm": 0.2418624311685562,
|
4410 |
+
"learning_rate": 3.243322785461781e-05,
|
4411 |
+
"loss": 1.2418,
|
4412 |
+
"step": 621
|
4413 |
+
},
|
4414 |
+
{
|
4415 |
+
"epoch": 0.039537563068610705,
|
4416 |
+
"grad_norm": 0.2648262083530426,
|
4417 |
+
"learning_rate": 3.215467614843719e-05,
|
4418 |
+
"loss": 1.2913,
|
4419 |
+
"step": 622
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 0.039601128282547374,
|
4423 |
+
"grad_norm": 0.2682283818721771,
|
4424 |
+
"learning_rate": 3.187709643486427e-05,
|
4425 |
+
"loss": 1.2148,
|
4426 |
+
"step": 623
|
4427 |
+
},
|
4428 |
+
{
|
4429 |
+
"epoch": 0.03966469349648405,
|
4430 |
+
"grad_norm": 0.26762086153030396,
|
4431 |
+
"learning_rate": 3.160049269067174e-05,
|
4432 |
+
"loss": 1.2949,
|
4433 |
+
"step": 624
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 0.039728258710420725,
|
4437 |
+
"grad_norm": 0.25577932596206665,
|
4438 |
+
"learning_rate": 3.132486887864992e-05,
|
4439 |
+
"loss": 1.12,
|
4440 |
+
"step": 625
|
4441 |
+
},
|
4442 |
+
{
|
4443 |
+
"epoch": 0.03979182392435739,
|
4444 |
+
"grad_norm": 0.2690037190914154,
|
4445 |
+
"learning_rate": 3.105022894755003e-05,
|
4446 |
+
"loss": 1.1813,
|
4447 |
+
"step": 626
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 0.03985538913829407,
|
4451 |
+
"grad_norm": 0.25239890813827515,
|
4452 |
+
"learning_rate": 3.077657683202779e-05,
|
4453 |
+
"loss": 1.2678,
|
4454 |
+
"step": 627
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 0.039918954352230744,
|
4458 |
+
"grad_norm": 0.2665114104747772,
|
4459 |
+
"learning_rate": 3.0503916452586612e-05,
|
4460 |
+
"loss": 1.0682,
|
4461 |
+
"step": 628
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 0.03998251956616741,
|
4465 |
+
"grad_norm": 0.2506917715072632,
|
4466 |
+
"learning_rate": 3.0232251715521932e-05,
|
4467 |
+
"loss": 1.2247,
|
4468 |
+
"step": 629
|
4469 |
+
},
|
4470 |
+
{
|
4471 |
+
"epoch": 0.04004608478010409,
|
4472 |
+
"grad_norm": 0.24417519569396973,
|
4473 |
+
"learning_rate": 2.9961586512864947e-05,
|
4474 |
+
"loss": 1.2097,
|
4475 |
+
"step": 630
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 0.040109649994040764,
|
4479 |
+
"grad_norm": 0.2546679973602295,
|
4480 |
+
"learning_rate": 2.9691924722326826e-05,
|
4481 |
+
"loss": 1.1986,
|
4482 |
+
"step": 631
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 0.04017321520797743,
|
4486 |
+
"grad_norm": 0.25688743591308594,
|
4487 |
+
"learning_rate": 2.9423270207243437e-05,
|
4488 |
+
"loss": 1.2543,
|
4489 |
+
"step": 632
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 0.04023678042191411,
|
4493 |
+
"grad_norm": 0.23989447951316833,
|
4494 |
+
"learning_rate": 2.9155626816519677e-05,
|
4495 |
+
"loss": 1.0773,
|
4496 |
+
"step": 633
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 0.04030034563585078,
|
4500 |
+
"grad_norm": 0.25679922103881836,
|
4501 |
+
"learning_rate": 2.888899838457455e-05,
|
4502 |
+
"loss": 1.2843,
|
4503 |
+
"step": 634
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 0.04036391084978745,
|
4507 |
+
"grad_norm": 0.2580190896987915,
|
4508 |
+
"learning_rate": 2.8623388731286093e-05,
|
4509 |
+
"loss": 1.1911,
|
4510 |
+
"step": 635
|
4511 |
+
},
|
4512 |
+
{
|
4513 |
+
"epoch": 0.04042747606372413,
|
4514 |
+
"grad_norm": 0.24526208639144897,
|
4515 |
+
"learning_rate": 2.835880166193683e-05,
|
4516 |
+
"loss": 1.2574,
|
4517 |
+
"step": 636
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 0.0404910412776608,
|
4521 |
+
"grad_norm": 0.25860583782196045,
|
4522 |
+
"learning_rate": 2.8095240967158954e-05,
|
4523 |
+
"loss": 1.2943,
|
4524 |
+
"step": 637
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 0.04055460649159747,
|
4528 |
+
"grad_norm": 0.26202407479286194,
|
4529 |
+
"learning_rate": 2.7832710422880328e-05,
|
4530 |
+
"loss": 1.1769,
|
4531 |
+
"step": 638
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 0.040618171705534147,
|
4535 |
+
"grad_norm": 0.2458542138338089,
|
4536 |
+
"learning_rate": 2.75712137902703e-05,
|
4537 |
+
"loss": 1.1669,
|
4538 |
+
"step": 639
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 0.04068173691947082,
|
4542 |
+
"grad_norm": 0.25534749031066895,
|
4543 |
+
"learning_rate": 2.7310754815685624e-05,
|
4544 |
+
"loss": 1.2057,
|
4545 |
+
"step": 640
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 0.04074530213340749,
|
4549 |
+
"grad_norm": 0.2514583170413971,
|
4550 |
+
"learning_rate": 2.7051337230617125e-05,
|
4551 |
+
"loss": 1.2483,
|
4552 |
+
"step": 641
|
4553 |
+
},
|
4554 |
+
{
|
4555 |
+
"epoch": 0.040808867347344166,
|
4556 |
+
"grad_norm": 0.25142601132392883,
|
4557 |
+
"learning_rate": 2.679296475163595e-05,
|
4558 |
+
"loss": 1.1685,
|
4559 |
+
"step": 642
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 0.04087243256128084,
|
4563 |
+
"grad_norm": 0.2746109962463379,
|
4564 |
+
"learning_rate": 2.6535641080340458e-05,
|
4565 |
+
"loss": 1.2658,
|
4566 |
+
"step": 643
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 0.04093599777521751,
|
4570 |
+
"grad_norm": 0.26082682609558105,
|
4571 |
+
"learning_rate": 2.6279369903303175e-05,
|
4572 |
+
"loss": 1.2184,
|
4573 |
+
"step": 644
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 0.040999562989154185,
|
4577 |
+
"grad_norm": 0.27172860503196716,
|
4578 |
+
"learning_rate": 2.6024154892017937e-05,
|
4579 |
+
"loss": 1.2417,
|
4580 |
+
"step": 645
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 0.04106312820309086,
|
4584 |
+
"grad_norm": 0.26511403918266296,
|
4585 |
+
"learning_rate": 2.5769999702847346e-05,
|
4586 |
+
"loss": 1.2099,
|
4587 |
+
"step": 646
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 0.04112669341702753,
|
4591 |
+
"grad_norm": 0.26414263248443604,
|
4592 |
+
"learning_rate": 2.5516907976970328e-05,
|
4593 |
+
"loss": 1.2562,
|
4594 |
+
"step": 647
|
4595 |
+
},
|
4596 |
+
{
|
4597 |
+
"epoch": 0.041190258630964205,
|
4598 |
+
"grad_norm": 0.25787821412086487,
|
4599 |
+
"learning_rate": 2.5264883340330113e-05,
|
4600 |
+
"loss": 1.2202,
|
4601 |
+
"step": 648
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 0.04125382384490088,
|
4605 |
+
"grad_norm": 0.25424811244010925,
|
4606 |
+
"learning_rate": 2.501392940358197e-05,
|
4607 |
+
"loss": 1.2154,
|
4608 |
+
"step": 649
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 0.04131738905883755,
|
4612 |
+
"grad_norm": 0.26234978437423706,
|
4613 |
+
"learning_rate": 2.4764049762041874e-05,
|
4614 |
+
"loss": 1.242,
|
4615 |
+
"step": 650
|
4616 |
+
},
|
4617 |
+
{
|
4618 |
+
"epoch": 0.041380954272774224,
|
4619 |
+
"grad_norm": 0.25528523325920105,
|
4620 |
+
"learning_rate": 2.4515247995634694e-05,
|
4621 |
+
"loss": 1.1873,
|
4622 |
+
"step": 651
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 0.0414445194867109,
|
4626 |
+
"grad_norm": 0.2629062235355377,
|
4627 |
+
"learning_rate": 2.426752766884306e-05,
|
4628 |
+
"loss": 1.1596,
|
4629 |
+
"step": 652
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 0.04150808470064757,
|
4633 |
+
"grad_norm": 0.24369929730892181,
|
4634 |
+
"learning_rate": 2.4020892330656252e-05,
|
4635 |
+
"loss": 1.069,
|
4636 |
+
"step": 653
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 0.041571649914584244,
|
4640 |
+
"grad_norm": 0.2602699100971222,
|
4641 |
+
"learning_rate": 2.377534551451932e-05,
|
4642 |
+
"loss": 1.2132,
|
4643 |
+
"step": 654
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 0.04163521512852092,
|
4647 |
+
"grad_norm": 0.24992002546787262,
|
4648 |
+
"learning_rate": 2.353089073828255e-05,
|
4649 |
+
"loss": 1.1259,
|
4650 |
+
"step": 655
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 0.04169878034245759,
|
4654 |
+
"grad_norm": 0.2784167528152466,
|
4655 |
+
"learning_rate": 2.328753150415094e-05,
|
4656 |
+
"loss": 1.1997,
|
4657 |
+
"step": 656
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 0.04176234555639426,
|
4661 |
+
"grad_norm": 0.2581193745136261,
|
4662 |
+
"learning_rate": 2.304527129863424e-05,
|
4663 |
+
"loss": 1.1832,
|
4664 |
+
"step": 657
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 0.04182591077033094,
|
4668 |
+
"grad_norm": 0.25155678391456604,
|
4669 |
+
"learning_rate": 2.280411359249668e-05,
|
4670 |
+
"loss": 1.147,
|
4671 |
+
"step": 658
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 0.04188947598426761,
|
4675 |
+
"grad_norm": 0.2618091106414795,
|
4676 |
+
"learning_rate": 2.2564061840707495e-05,
|
4677 |
+
"loss": 1.303,
|
4678 |
+
"step": 659
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 0.04195304119820428,
|
4682 |
+
"grad_norm": 0.2630173861980438,
|
4683 |
+
"learning_rate": 2.2325119482391467e-05,
|
4684 |
+
"loss": 1.2555,
|
4685 |
+
"step": 660
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 0.04201660641214096,
|
4689 |
+
"grad_norm": 0.25127795338630676,
|
4690 |
+
"learning_rate": 2.2087289940779343e-05,
|
4691 |
+
"loss": 1.1694,
|
4692 |
+
"step": 661
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 0.04208017162607763,
|
4696 |
+
"grad_norm": 0.2526141107082367,
|
4697 |
+
"learning_rate": 2.185057662315918e-05,
|
4698 |
+
"loss": 1.0997,
|
4699 |
+
"step": 662
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 0.0421437368400143,
|
4703 |
+
"grad_norm": 0.2466498613357544,
|
4704 |
+
"learning_rate": 2.1614982920827243e-05,
|
4705 |
+
"loss": 1.2093,
|
4706 |
+
"step": 663
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 0.04220730205395098,
|
4710 |
+
"grad_norm": 0.2559715211391449,
|
4711 |
+
"learning_rate": 2.1380512209039528e-05,
|
4712 |
+
"loss": 1.239,
|
4713 |
+
"step": 664
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 0.042270867267887646,
|
4717 |
+
"grad_norm": 0.24562884867191315,
|
4718 |
+
"learning_rate": 2.1147167846963422e-05,
|
4719 |
+
"loss": 1.1716,
|
4720 |
+
"step": 665
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 0.04233443248182432,
|
4724 |
+
"grad_norm": 0.25966036319732666,
|
4725 |
+
"learning_rate": 2.0914953177629548e-05,
|
4726 |
+
"loss": 1.2553,
|
4727 |
+
"step": 666
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 0.042397997695761,
|
4731 |
+
"grad_norm": 0.25772759318351746,
|
4732 |
+
"learning_rate": 2.068387152788387e-05,
|
4733 |
+
"loss": 1.1341,
|
4734 |
+
"step": 667
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 0.042461562909697666,
|
4738 |
+
"grad_norm": 0.24900874495506287,
|
4739 |
+
"learning_rate": 2.0453926208340003e-05,
|
4740 |
+
"loss": 1.1742,
|
4741 |
+
"step": 668
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 0.04252512812363434,
|
4745 |
+
"grad_norm": 0.2540144622325897,
|
4746 |
+
"learning_rate": 2.022512051333194e-05,
|
4747 |
+
"loss": 1.1856,
|
4748 |
+
"step": 669
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 0.04258869333757102,
|
4752 |
+
"grad_norm": 0.26840710639953613,
|
4753 |
+
"learning_rate": 1.999745772086655e-05,
|
4754 |
+
"loss": 1.2104,
|
4755 |
+
"step": 670
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 0.042652258551507685,
|
4759 |
+
"grad_norm": 0.2511826753616333,
|
4760 |
+
"learning_rate": 1.9770941092576957e-05,
|
4761 |
+
"loss": 1.2477,
|
4762 |
+
"step": 671
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 0.04271582376544436,
|
4766 |
+
"grad_norm": 0.26480165123939514,
|
4767 |
+
"learning_rate": 1.954557387367557e-05,
|
4768 |
+
"loss": 1.1991,
|
4769 |
+
"step": 672
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 0.042779388979381036,
|
4773 |
+
"grad_norm": 0.2562330663204193,
|
4774 |
+
"learning_rate": 1.9321359292907702e-05,
|
4775 |
+
"loss": 1.2336,
|
4776 |
+
"step": 673
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 0.042842954193317705,
|
4780 |
+
"grad_norm": 0.25312507152557373,
|
4781 |
+
"learning_rate": 1.9098300562505266e-05,
|
4782 |
+
"loss": 1.1593,
|
4783 |
+
"step": 674
|
4784 |
+
},
|
4785 |
+
{
|
4786 |
+
"epoch": 0.04290651940725438,
|
4787 |
+
"grad_norm": 0.2678249180316925,
|
4788 |
+
"learning_rate": 1.8876400878140775e-05,
|
4789 |
+
"loss": 1.1886,
|
4790 |
+
"step": 675
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 0.042970084621191056,
|
4794 |
+
"grad_norm": 0.25428783893585205,
|
4795 |
+
"learning_rate": 1.8655663418881584e-05,
|
4796 |
+
"loss": 1.2123,
|
4797 |
+
"step": 676
|
4798 |
+
},
|
4799 |
+
{
|
4800 |
+
"epoch": 0.043033649835127724,
|
4801 |
+
"grad_norm": 0.2611987292766571,
|
4802 |
+
"learning_rate": 1.8436091347144246e-05,
|
4803 |
+
"loss": 1.2407,
|
4804 |
+
"step": 677
|
4805 |
+
},
|
4806 |
+
{
|
4807 |
+
"epoch": 0.0430972150490644,
|
4808 |
+
"grad_norm": 0.2611881196498871,
|
4809 |
+
"learning_rate": 1.821768780864943e-05,
|
4810 |
+
"loss": 1.1918,
|
4811 |
+
"step": 678
|
4812 |
+
},
|
4813 |
+
{
|
4814 |
+
"epoch": 0.043160780263001075,
|
4815 |
+
"grad_norm": 0.2661250829696655,
|
4816 |
+
"learning_rate": 1.800045593237647e-05,
|
4817 |
+
"loss": 1.2046,
|
4818 |
+
"step": 679
|
4819 |
+
},
|
4820 |
+
{
|
4821 |
+
"epoch": 0.043224345476937744,
|
4822 |
+
"grad_norm": 0.2643533945083618,
|
4823 |
+
"learning_rate": 1.7784398830519e-05,
|
4824 |
+
"loss": 1.1827,
|
4825 |
+
"step": 680
|
4826 |
+
},
|
4827 |
+
{
|
4828 |
+
"epoch": 0.04328791069087442,
|
4829 |
+
"grad_norm": 0.25061362981796265,
|
4830 |
+
"learning_rate": 1.756951959844e-05,
|
4831 |
+
"loss": 1.2051,
|
4832 |
+
"step": 681
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 0.043351475904811095,
|
4836 |
+
"grad_norm": 0.24832050502300262,
|
4837 |
+
"learning_rate": 1.7355821314627564e-05,
|
4838 |
+
"loss": 1.1704,
|
4839 |
+
"step": 682
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 0.04341504111874776,
|
4843 |
+
"grad_norm": 0.26712068915367126,
|
4844 |
+
"learning_rate": 1.7143307040650925e-05,
|
4845 |
+
"loss": 1.2655,
|
4846 |
+
"step": 683
|
4847 |
+
},
|
4848 |
+
{
|
4849 |
+
"epoch": 0.04347860633268444,
|
4850 |
+
"grad_norm": 0.26257115602493286,
|
4851 |
+
"learning_rate": 1.6931979821116418e-05,
|
4852 |
+
"loss": 1.183,
|
4853 |
+
"step": 684
|
4854 |
+
},
|
4855 |
+
{
|
4856 |
+
"epoch": 0.043542171546621114,
|
4857 |
+
"grad_norm": 0.2578732371330261,
|
4858 |
+
"learning_rate": 1.672184268362391e-05,
|
4859 |
+
"loss": 1.1036,
|
4860 |
+
"step": 685
|
4861 |
+
},
|
4862 |
+
{
|
4863 |
+
"epoch": 0.04360573676055778,
|
4864 |
+
"grad_norm": 0.25747859477996826,
|
4865 |
+
"learning_rate": 1.6512898638723497e-05,
|
4866 |
+
"loss": 1.2769,
|
4867 |
+
"step": 686
|
4868 |
+
},
|
4869 |
+
{
|
4870 |
+
"epoch": 0.04366930197449446,
|
4871 |
+
"grad_norm": 0.26593005657196045,
|
4872 |
+
"learning_rate": 1.630515067987226e-05,
|
4873 |
+
"loss": 1.2707,
|
4874 |
+
"step": 687
|
4875 |
+
},
|
4876 |
+
{
|
4877 |
+
"epoch": 0.04373286718843113,
|
4878 |
+
"grad_norm": 0.2610760033130646,
|
4879 |
+
"learning_rate": 1.6098601783391487e-05,
|
4880 |
+
"loss": 1.2226,
|
4881 |
+
"step": 688
|
4882 |
+
},
|
4883 |
+
{
|
4884 |
+
"epoch": 0.0437964324023678,
|
4885 |
+
"grad_norm": 0.2636644244194031,
|
4886 |
+
"learning_rate": 1.5893254908423937e-05,
|
4887 |
+
"loss": 1.194,
|
4888 |
+
"step": 689
|
4889 |
+
},
|
4890 |
+
{
|
4891 |
+
"epoch": 0.04385999761630448,
|
4892 |
+
"grad_norm": 0.25099021196365356,
|
4893 |
+
"learning_rate": 1.5689112996891576e-05,
|
4894 |
+
"loss": 1.1853,
|
4895 |
+
"step": 690
|
4896 |
+
},
|
4897 |
+
{
|
4898 |
+
"epoch": 0.04392356283024115,
|
4899 |
+
"grad_norm": 0.26002123951911926,
|
4900 |
+
"learning_rate": 1.54861789734532e-05,
|
4901 |
+
"loss": 1.1705,
|
4902 |
+
"step": 691
|
4903 |
+
},
|
4904 |
+
{
|
4905 |
+
"epoch": 0.04398712804417782,
|
4906 |
+
"grad_norm": 0.25610899925231934,
|
4907 |
+
"learning_rate": 1.5284455745462834e-05,
|
4908 |
+
"loss": 1.173,
|
4909 |
+
"step": 692
|
4910 |
+
},
|
4911 |
+
{
|
4912 |
+
"epoch": 0.0440506932581145,
|
4913 |
+
"grad_norm": 0.2630417048931122,
|
4914 |
+
"learning_rate": 1.5083946202927824e-05,
|
4915 |
+
"loss": 1.183,
|
4916 |
+
"step": 693
|
4917 |
+
},
|
4918 |
+
{
|
4919 |
+
"epoch": 0.04411425847205117,
|
4920 |
+
"grad_norm": 0.26131799817085266,
|
4921 |
+
"learning_rate": 1.4884653218467571e-05,
|
4922 |
+
"loss": 1.2147,
|
4923 |
+
"step": 694
|
4924 |
+
},
|
4925 |
+
{
|
4926 |
+
"epoch": 0.04417782368598784,
|
4927 |
+
"grad_norm": 0.2511073052883148,
|
4928 |
+
"learning_rate": 1.4686579647272336e-05,
|
4929 |
+
"loss": 1.1362,
|
4930 |
+
"step": 695
|
4931 |
+
},
|
4932 |
+
{
|
4933 |
+
"epoch": 0.044241388899924516,
|
4934 |
+
"grad_norm": 0.2500525414943695,
|
4935 |
+
"learning_rate": 1.4489728327062324e-05,
|
4936 |
+
"loss": 1.1264,
|
4937 |
+
"step": 696
|
4938 |
+
},
|
4939 |
+
{
|
4940 |
+
"epoch": 0.04430495411386119,
|
4941 |
+
"grad_norm": 0.2648208439350128,
|
4942 |
+
"learning_rate": 1.4294102078047055e-05,
|
4943 |
+
"loss": 1.2098,
|
4944 |
+
"step": 697
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 0.04436851932779786,
|
4948 |
+
"grad_norm": 0.2602032721042633,
|
4949 |
+
"learning_rate": 1.4099703702884936e-05,
|
4950 |
+
"loss": 1.2527,
|
4951 |
+
"step": 698
|
4952 |
+
},
|
4953 |
+
{
|
4954 |
+
"epoch": 0.044432084541734536,
|
4955 |
+
"grad_norm": 0.26263752579689026,
|
4956 |
+
"learning_rate": 1.3906535986643176e-05,
|
4957 |
+
"loss": 1.218,
|
4958 |
+
"step": 699
|
4959 |
+
},
|
4960 |
+
{
|
4961 |
+
"epoch": 0.04449564975567121,
|
4962 |
+
"grad_norm": 0.2635667622089386,
|
4963 |
+
"learning_rate": 1.3714601696757712e-05,
|
4964 |
+
"loss": 1.2896,
|
4965 |
+
"step": 700
|
4966 |
+
},
|
4967 |
+
{
|
4968 |
+
"epoch": 0.04449564975567121,
|
4969 |
+
"eval_loss": 1.2046868801116943,
|
4970 |
+
"eval_runtime": 1238.8537,
|
4971 |
+
"eval_samples_per_second": 4.036,
|
4972 |
+
"eval_steps_per_second": 1.009,
|
4973 |
+
"step": 700
|
4974 |
}
|
4975 |
],
|
4976 |
"logging_steps": 1,
|
|
|
4999 |
"attributes": {}
|
5000 |
}
|
5001 |
},
|
5002 |
+
"total_flos": 3.637357672660992e+18,
|
5003 |
"train_batch_size": 4,
|
5004 |
"trial_name": null,
|
5005 |
"trial_params": null
|