|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 513, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.917808219178082e-06, |
|
"loss": 0.0322, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_oasst_export_accuracy": 0.49051699074786653, |
|
"eval_oasst_export_loss": 3.484375, |
|
"eval_oasst_export_runtime": 6.0372, |
|
"eval_oasst_export_samples_per_second": 181.21, |
|
"eval_oasst_export_steps_per_second": 2.982, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.800391389432485e-06, |
|
"loss": 0.0246, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_oasst_export_accuracy": 0.489678997143981, |
|
"eval_oasst_export_loss": 3.779296875, |
|
"eval_oasst_export_runtime": 6.056, |
|
"eval_oasst_export_samples_per_second": 180.647, |
|
"eval_oasst_export_steps_per_second": 2.972, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.682974559686888e-06, |
|
"loss": 0.0203, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_oasst_export_accuracy": 0.4914062900826022, |
|
"eval_oasst_export_loss": 3.734375, |
|
"eval_oasst_export_runtime": 6.0349, |
|
"eval_oasst_export_samples_per_second": 181.279, |
|
"eval_oasst_export_steps_per_second": 2.983, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5.565557729941292e-06, |
|
"loss": 0.0187, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_oasst_export_accuracy": 0.4879859080259265, |
|
"eval_oasst_export_loss": 3.7109375, |
|
"eval_oasst_export_runtime": 6.0607, |
|
"eval_oasst_export_samples_per_second": 180.508, |
|
"eval_oasst_export_steps_per_second": 2.97, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.448140900195694e-06, |
|
"loss": 0.0162, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_oasst_export_accuracy": 0.4880030099362099, |
|
"eval_oasst_export_loss": 3.783203125, |
|
"eval_oasst_export_runtime": 6.0304, |
|
"eval_oasst_export_samples_per_second": 181.415, |
|
"eval_oasst_export_steps_per_second": 2.985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.330724070450098e-06, |
|
"loss": 0.0159, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_oasst_export_accuracy": 0.486292818907872, |
|
"eval_oasst_export_loss": 3.7890625, |
|
"eval_oasst_export_runtime": 6.0367, |
|
"eval_oasst_export_samples_per_second": 181.224, |
|
"eval_oasst_export_steps_per_second": 2.982, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.213307240704501e-06, |
|
"loss": 0.0172, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_oasst_export_accuracy": 0.48716501633232434, |
|
"eval_oasst_export_loss": 3.806640625, |
|
"eval_oasst_export_runtime": 6.0405, |
|
"eval_oasst_export_samples_per_second": 181.11, |
|
"eval_oasst_export_steps_per_second": 2.98, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.095890410958904e-06, |
|
"loss": 0.0157, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_oasst_export_accuracy": 0.48454842405896736, |
|
"eval_oasst_export_loss": 3.791015625, |
|
"eval_oasst_export_runtime": 6.0411, |
|
"eval_oasst_export_samples_per_second": 181.092, |
|
"eval_oasst_export_steps_per_second": 2.98, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.978473581213307e-06, |
|
"loss": 0.0163, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_oasst_export_accuracy": 0.49138918817231886, |
|
"eval_oasst_export_loss": 3.822265625, |
|
"eval_oasst_export_runtime": 5.6229, |
|
"eval_oasst_export_samples_per_second": 194.561, |
|
"eval_oasst_export_steps_per_second": 3.201, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.86105675146771e-06, |
|
"loss": 0.0139, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_oasst_export_accuracy": 0.4956817676534469, |
|
"eval_oasst_export_loss": 3.826171875, |
|
"eval_oasst_export_runtime": 6.0223, |
|
"eval_oasst_export_samples_per_second": 181.659, |
|
"eval_oasst_export_steps_per_second": 2.989, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.7436399217221134e-06, |
|
"loss": 0.0151, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_oasst_export_accuracy": 0.4845655259692508, |
|
"eval_oasst_export_loss": 3.904296875, |
|
"eval_oasst_export_runtime": 6.0257, |
|
"eval_oasst_export_samples_per_second": 181.554, |
|
"eval_oasst_export_steps_per_second": 2.987, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.626223091976516e-06, |
|
"loss": 0.0139, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_oasst_export_accuracy": 0.49049988883758316, |
|
"eval_oasst_export_loss": 3.958984375, |
|
"eval_oasst_export_runtime": 6.0136, |
|
"eval_oasst_export_samples_per_second": 181.92, |
|
"eval_oasst_export_steps_per_second": 2.993, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.50880626223092e-06, |
|
"loss": 0.0133, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_oasst_export_accuracy": 0.4803413541292562, |
|
"eval_oasst_export_loss": 3.845703125, |
|
"eval_oasst_export_runtime": 6.0221, |
|
"eval_oasst_export_samples_per_second": 181.663, |
|
"eval_oasst_export_steps_per_second": 2.989, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.3913894324853226e-06, |
|
"loss": 0.0142, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_oasst_export_accuracy": 0.4777076599456159, |
|
"eval_oasst_export_loss": 3.828125, |
|
"eval_oasst_export_runtime": 6.0368, |
|
"eval_oasst_export_samples_per_second": 181.223, |
|
"eval_oasst_export_steps_per_second": 2.982, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.273972602739726e-06, |
|
"loss": 0.0141, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_oasst_export_accuracy": 0.47088399774254786, |
|
"eval_oasst_export_loss": 3.998046875, |
|
"eval_oasst_export_runtime": 6.0502, |
|
"eval_oasst_export_samples_per_second": 180.82, |
|
"eval_oasst_export_steps_per_second": 2.975, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.156555772994129e-06, |
|
"loss": 0.0157, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_oasst_export_accuracy": 0.4726796983223026, |
|
"eval_oasst_export_loss": 3.8828125, |
|
"eval_oasst_export_runtime": 5.995, |
|
"eval_oasst_export_samples_per_second": 182.487, |
|
"eval_oasst_export_steps_per_second": 3.003, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.0391389432485325e-06, |
|
"loss": 0.0139, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_oasst_export_accuracy": 0.4744240931712072, |
|
"eval_oasst_export_loss": 3.9609375, |
|
"eval_oasst_export_runtime": 6.0696, |
|
"eval_oasst_export_samples_per_second": 180.244, |
|
"eval_oasst_export_steps_per_second": 2.966, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.921722113502935e-06, |
|
"loss": 0.0125, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_oasst_export_accuracy": 0.48210285088844423, |
|
"eval_oasst_export_loss": 4.03125, |
|
"eval_oasst_export_runtime": 5.616, |
|
"eval_oasst_export_samples_per_second": 194.8, |
|
"eval_oasst_export_steps_per_second": 3.205, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.8043052837573385e-06, |
|
"loss": 0.017, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_oasst_export_accuracy": 0.4897303028748311, |
|
"eval_oasst_export_loss": 3.970703125, |
|
"eval_oasst_export_runtime": 6.03, |
|
"eval_oasst_export_samples_per_second": 181.426, |
|
"eval_oasst_export_steps_per_second": 2.985, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.6868884540117416e-06, |
|
"loss": 0.0137, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_oasst_export_accuracy": 0.4794862586150873, |
|
"eval_oasst_export_loss": 3.986328125, |
|
"eval_oasst_export_runtime": 6.0192, |
|
"eval_oasst_export_samples_per_second": 181.751, |
|
"eval_oasst_export_steps_per_second": 2.99, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.569471624266145e-06, |
|
"loss": 0.0133, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_oasst_export_accuracy": 0.47693807398286386, |
|
"eval_oasst_export_loss": 4.140625, |
|
"eval_oasst_export_runtime": 6.0148, |
|
"eval_oasst_export_samples_per_second": 181.886, |
|
"eval_oasst_export_steps_per_second": 2.993, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.452054794520548e-06, |
|
"loss": 0.0133, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_oasst_export_accuracy": 0.48713081251175755, |
|
"eval_oasst_export_loss": 4.1484375, |
|
"eval_oasst_export_runtime": 6.0347, |
|
"eval_oasst_export_samples_per_second": 181.285, |
|
"eval_oasst_export_steps_per_second": 2.983, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.334637964774951e-06, |
|
"loss": 0.0154, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_oasst_export_accuracy": 0.4752620867750928, |
|
"eval_oasst_export_loss": 4.09375, |
|
"eval_oasst_export_runtime": 5.6373, |
|
"eval_oasst_export_samples_per_second": 194.065, |
|
"eval_oasst_export_steps_per_second": 3.193, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.217221135029354e-06, |
|
"loss": 0.0139, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_oasst_export_accuracy": 0.4760658765584116, |
|
"eval_oasst_export_loss": 4.09765625, |
|
"eval_oasst_export_runtime": 6.055, |
|
"eval_oasst_export_samples_per_second": 180.676, |
|
"eval_oasst_export_steps_per_second": 2.973, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.099804305283757e-06, |
|
"loss": 0.0136, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_oasst_export_accuracy": 0.47102081302481486, |
|
"eval_oasst_export_loss": 4.20703125, |
|
"eval_oasst_export_runtime": 6.0408, |
|
"eval_oasst_export_samples_per_second": 181.103, |
|
"eval_oasst_export_steps_per_second": 2.98, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.9823874755381603e-06, |
|
"loss": 0.0129, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_oasst_export_accuracy": 0.4641458450908967, |
|
"eval_oasst_export_loss": 4.2578125, |
|
"eval_oasst_export_runtime": 6.0349, |
|
"eval_oasst_export_samples_per_second": 181.279, |
|
"eval_oasst_export_steps_per_second": 2.983, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.8649706457925635e-06, |
|
"loss": 0.0128, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_oasst_export_accuracy": 0.4649838386947822, |
|
"eval_oasst_export_loss": 4.24609375, |
|
"eval_oasst_export_runtime": 6.0355, |
|
"eval_oasst_export_samples_per_second": 181.26, |
|
"eval_oasst_export_steps_per_second": 2.982, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.7475538160469666e-06, |
|
"loss": 0.0133, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_oasst_export_accuracy": 0.46412874318061326, |
|
"eval_oasst_export_loss": 4.2421875, |
|
"eval_oasst_export_runtime": 6.0367, |
|
"eval_oasst_export_samples_per_second": 181.224, |
|
"eval_oasst_export_steps_per_second": 2.982, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.63013698630137e-06, |
|
"loss": 0.013, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_oasst_export_accuracy": 0.47182460280813365, |
|
"eval_oasst_export_loss": 4.28125, |
|
"eval_oasst_export_runtime": 6.0187, |
|
"eval_oasst_export_samples_per_second": 181.768, |
|
"eval_oasst_export_steps_per_second": 2.991, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.512720156555773e-06, |
|
"loss": 0.0133, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_oasst_export_accuracy": 0.47093530347339796, |
|
"eval_oasst_export_loss": 4.30078125, |
|
"eval_oasst_export_runtime": 6.0452, |
|
"eval_oasst_export_samples_per_second": 180.969, |
|
"eval_oasst_export_steps_per_second": 2.978, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.395303326810176e-06, |
|
"loss": 0.0117, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_oasst_export_accuracy": 0.4726454945017358, |
|
"eval_oasst_export_loss": 4.41796875, |
|
"eval_oasst_export_runtime": 6.0234, |
|
"eval_oasst_export_samples_per_second": 181.624, |
|
"eval_oasst_export_steps_per_second": 2.988, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.2778864970645793e-06, |
|
"loss": 0.0121, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_oasst_export_accuracy": 0.47096950729396475, |
|
"eval_oasst_export_loss": 4.4375, |
|
"eval_oasst_export_runtime": 6.0349, |
|
"eval_oasst_export_samples_per_second": 181.279, |
|
"eval_oasst_export_steps_per_second": 2.983, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.1604696673189825e-06, |
|
"loss": 0.0121, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_oasst_export_accuracy": 0.480375557949823, |
|
"eval_oasst_export_loss": 4.421875, |
|
"eval_oasst_export_runtime": 6.0242, |
|
"eval_oasst_export_samples_per_second": 181.602, |
|
"eval_oasst_export_steps_per_second": 2.988, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.0430528375733853e-06, |
|
"loss": 0.0128, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_oasst_export_accuracy": 0.4811964496434252, |
|
"eval_oasst_export_loss": 4.40625, |
|
"eval_oasst_export_runtime": 6.0198, |
|
"eval_oasst_export_samples_per_second": 181.734, |
|
"eval_oasst_export_steps_per_second": 2.99, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.9256360078277885e-06, |
|
"loss": 0.0126, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_oasst_export_accuracy": 0.4785969592803516, |
|
"eval_oasst_export_loss": 4.34375, |
|
"eval_oasst_export_runtime": 5.9923, |
|
"eval_oasst_export_samples_per_second": 182.567, |
|
"eval_oasst_export_steps_per_second": 3.004, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.8082191780821916e-06, |
|
"loss": 0.0142, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_oasst_export_accuracy": 0.47438988935064047, |
|
"eval_oasst_export_loss": 4.3359375, |
|
"eval_oasst_export_runtime": 6.0319, |
|
"eval_oasst_export_samples_per_second": 181.368, |
|
"eval_oasst_export_steps_per_second": 2.984, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.6908023483365948e-06, |
|
"loss": 0.0119, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_oasst_export_accuracy": 0.47102081302481486, |
|
"eval_oasst_export_loss": 4.359375, |
|
"eval_oasst_export_runtime": 6.0592, |
|
"eval_oasst_export_samples_per_second": 180.553, |
|
"eval_oasst_export_steps_per_second": 2.971, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.573385518590998e-06, |
|
"loss": 0.0122, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_oasst_export_accuracy": 0.46927641817591026, |
|
"eval_oasst_export_loss": 4.37890625, |
|
"eval_oasst_export_runtime": 6.0252, |
|
"eval_oasst_export_samples_per_second": 181.571, |
|
"eval_oasst_export_steps_per_second": 2.987, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.4559686888454012e-06, |
|
"loss": 0.0115, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_oasst_export_accuracy": 0.471841704718417, |
|
"eval_oasst_export_loss": 4.40625, |
|
"eval_oasst_export_runtime": 6.0266, |
|
"eval_oasst_export_samples_per_second": 181.528, |
|
"eval_oasst_export_steps_per_second": 2.987, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.3385518590998044e-06, |
|
"loss": 0.0122, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_oasst_export_accuracy": 0.4727823097840029, |
|
"eval_oasst_export_loss": 4.4453125, |
|
"eval_oasst_export_runtime": 6.0313, |
|
"eval_oasst_export_samples_per_second": 181.387, |
|
"eval_oasst_export_steps_per_second": 2.984, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.2211350293542073e-06, |
|
"loss": 0.0128, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_oasst_export_accuracy": 0.4727310040531527, |
|
"eval_oasst_export_loss": 4.48828125, |
|
"eval_oasst_export_runtime": 6.0409, |
|
"eval_oasst_export_samples_per_second": 181.1, |
|
"eval_oasst_export_steps_per_second": 2.98, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.1037181996086105e-06, |
|
"loss": 0.0114, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_oasst_export_accuracy": 0.4734834881056214, |
|
"eval_oasst_export_loss": 4.48828125, |
|
"eval_oasst_export_runtime": 6.046, |
|
"eval_oasst_export_samples_per_second": 180.947, |
|
"eval_oasst_export_steps_per_second": 2.977, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.863013698630137e-07, |
|
"loss": 0.011, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_oasst_export_accuracy": 0.47777606758674945, |
|
"eval_oasst_export_loss": 4.4765625, |
|
"eval_oasst_export_runtime": 6.0324, |
|
"eval_oasst_export_samples_per_second": 181.355, |
|
"eval_oasst_export_steps_per_second": 2.984, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.688845401174168e-07, |
|
"loss": 0.0129, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_oasst_export_accuracy": 0.4744240931712072, |
|
"eval_oasst_export_loss": 4.45703125, |
|
"eval_oasst_export_runtime": 6.0468, |
|
"eval_oasst_export_samples_per_second": 180.921, |
|
"eval_oasst_export_steps_per_second": 2.977, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.514677103718199e-07, |
|
"loss": 0.0133, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_oasst_export_accuracy": 0.4726796983223026, |
|
"eval_oasst_export_loss": 4.45703125, |
|
"eval_oasst_export_runtime": 6.0371, |
|
"eval_oasst_export_samples_per_second": 181.213, |
|
"eval_oasst_export_steps_per_second": 2.982, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.340508806262231e-07, |
|
"loss": 0.0131, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_oasst_export_accuracy": 0.47782737331759956, |
|
"eval_oasst_export_loss": 4.48046875, |
|
"eval_oasst_export_runtime": 6.0367, |
|
"eval_oasst_export_samples_per_second": 181.226, |
|
"eval_oasst_export_steps_per_second": 2.982, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 5.166340508806262e-07, |
|
"loss": 0.0111, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_oasst_export_accuracy": 0.4752620867750928, |
|
"eval_oasst_export_loss": 4.484375, |
|
"eval_oasst_export_runtime": 6.0217, |
|
"eval_oasst_export_samples_per_second": 181.676, |
|
"eval_oasst_export_steps_per_second": 2.989, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.9921722113502936e-07, |
|
"loss": 0.012, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_oasst_export_accuracy": 0.47774186376618266, |
|
"eval_oasst_export_loss": 4.4765625, |
|
"eval_oasst_export_runtime": 6.018, |
|
"eval_oasst_export_samples_per_second": 181.788, |
|
"eval_oasst_export_steps_per_second": 2.991, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.818003913894325e-07, |
|
"loss": 0.0111, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_oasst_export_accuracy": 0.47351769192618814, |
|
"eval_oasst_export_loss": 4.484375, |
|
"eval_oasst_export_runtime": 6.0271, |
|
"eval_oasst_export_samples_per_second": 181.513, |
|
"eval_oasst_export_steps_per_second": 2.986, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.6438356164383561e-07, |
|
"loss": 0.0125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_oasst_export_accuracy": 0.47608297846869496, |
|
"eval_oasst_export_loss": 4.484375, |
|
"eval_oasst_export_runtime": 6.026, |
|
"eval_oasst_export_samples_per_second": 181.546, |
|
"eval_oasst_export_steps_per_second": 2.987, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.6966731898238746e-08, |
|
"loss": 0.0124, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_oasst_export_accuracy": 0.475227882954526, |
|
"eval_oasst_export_loss": 4.4765625, |
|
"eval_oasst_export_runtime": 6.0338, |
|
"eval_oasst_export_samples_per_second": 181.313, |
|
"eval_oasst_export_steps_per_second": 2.983, |
|
"step": 510 |
|
} |
|
], |
|
"max_steps": 513, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.6185460841979576e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|