jfranklin-foundry's picture
Upload folder using huggingface_hub
6c2551e verified
{
"best_metric": 0.09464961290359497,
"best_model_checkpoint": "outputs/checkpoint-540",
"epoch": 9.840546697038725,
"eval_steps": 500,
"global_step": 540,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.36446469248291574,
"grad_norm": 3.396773099899292,
"learning_rate": 1.8000000000000001e-06,
"loss": 0.3727,
"step": 20
},
{
"epoch": 0.7289293849658315,
"grad_norm": 1.4876775741577148,
"learning_rate": 3.8000000000000005e-06,
"loss": 0.2861,
"step": 40
},
{
"epoch": 0.9840546697038725,
"eval_loss": 0.18749132752418518,
"eval_runtime": 49.826,
"eval_samples_per_second": 3.372,
"eval_steps_per_second": 0.421,
"step": 54
},
{
"epoch": 1.0933940774487472,
"grad_norm": 0.6102920770645142,
"learning_rate": 5.8e-06,
"loss": 0.1811,
"step": 60
},
{
"epoch": 1.4578587699316627,
"grad_norm": 0.4133767783641815,
"learning_rate": 7.800000000000002e-06,
"loss": 0.1346,
"step": 80
},
{
"epoch": 1.8223234624145785,
"grad_norm": 1.7365530729293823,
"learning_rate": 9.800000000000001e-06,
"loss": 0.1243,
"step": 100
},
{
"epoch": 1.9863325740318907,
"eval_loss": 0.13598798215389252,
"eval_runtime": 49.8253,
"eval_samples_per_second": 3.372,
"eval_steps_per_second": 0.421,
"step": 109
},
{
"epoch": 2.1867881548974943,
"grad_norm": 0.4655854403972626,
"learning_rate": 9.958763523679515e-06,
"loss": 0.1038,
"step": 120
},
{
"epoch": 2.55125284738041,
"grad_norm": 0.6029446125030518,
"learning_rate": 9.817090706862895e-06,
"loss": 0.0873,
"step": 140
},
{
"epoch": 2.9157175398633255,
"grad_norm": 0.4066179394721985,
"learning_rate": 9.577355814597031e-06,
"loss": 0.0862,
"step": 160
},
{
"epoch": 2.988610478359909,
"eval_loss": 0.11723620444536209,
"eval_runtime": 49.7707,
"eval_samples_per_second": 3.375,
"eval_steps_per_second": 0.422,
"step": 164
},
{
"epoch": 3.2801822323462413,
"grad_norm": 0.6009082198143005,
"learning_rate": 9.244439157950114e-06,
"loss": 0.0834,
"step": 180
},
{
"epoch": 3.644646924829157,
"grad_norm": 0.6393954157829285,
"learning_rate": 8.825117959999117e-06,
"loss": 0.0756,
"step": 200
},
{
"epoch": 3.990888382687927,
"eval_loss": 0.10594599694013596,
"eval_runtime": 49.8676,
"eval_samples_per_second": 3.369,
"eval_steps_per_second": 0.421,
"step": 219
},
{
"epoch": 4.009111617312073,
"grad_norm": 0.5410734415054321,
"learning_rate": 8.327928391111841e-06,
"loss": 0.0733,
"step": 220
},
{
"epoch": 4.373576309794989,
"grad_norm": 0.544377326965332,
"learning_rate": 7.762991797134513e-06,
"loss": 0.0684,
"step": 240
},
{
"epoch": 4.738041002277904,
"grad_norm": 0.5764002799987793,
"learning_rate": 7.1418086579779075e-06,
"loss": 0.0628,
"step": 260
},
{
"epoch": 4.993166287015946,
"eval_loss": 0.0995541512966156,
"eval_runtime": 49.7114,
"eval_samples_per_second": 3.38,
"eval_steps_per_second": 0.422,
"step": 274
},
{
"epoch": 5.10250569476082,
"grad_norm": 0.48410555720329285,
"learning_rate": 6.477024471011001e-06,
"loss": 0.0628,
"step": 280
},
{
"epoch": 5.466970387243736,
"grad_norm": 0.6120467185974121,
"learning_rate": 5.782172325201155e-06,
"loss": 0.0594,
"step": 300
},
{
"epoch": 5.831435079726651,
"grad_norm": 0.7353035807609558,
"learning_rate": 5.071397406448937e-06,
"loss": 0.0593,
"step": 320
},
{
"epoch": 5.995444191343964,
"eval_loss": 0.09746743738651276,
"eval_runtime": 49.7379,
"eval_samples_per_second": 3.378,
"eval_steps_per_second": 0.422,
"step": 329
},
{
"epoch": 6.195899772209567,
"grad_norm": 0.5597842931747437,
"learning_rate": 4.359169042394537e-06,
"loss": 0.0606,
"step": 340
},
{
"epoch": 6.560364464692483,
"grad_norm": 0.5548788905143738,
"learning_rate": 3.6599861486331074e-06,
"loss": 0.0548,
"step": 360
},
{
"epoch": 6.924829157175399,
"grad_norm": 0.6110637784004211,
"learning_rate": 2.9880820726046613e-06,
"loss": 0.0498,
"step": 380
},
{
"epoch": 6.997722095671982,
"eval_loss": 0.09508081525564194,
"eval_runtime": 49.5881,
"eval_samples_per_second": 3.388,
"eval_steps_per_second": 0.423,
"step": 384
},
{
"epoch": 7.289293849658314,
"grad_norm": 0.6793264150619507,
"learning_rate": 2.3571348436857906e-06,
"loss": 0.0485,
"step": 400
},
{
"epoch": 7.65375854214123,
"grad_norm": 0.499203085899353,
"learning_rate": 1.7799887279557238e-06,
"loss": 0.0474,
"step": 420
},
{
"epoch": 8.0,
"eval_loss": 0.09498950093984604,
"eval_runtime": 49.5797,
"eval_samples_per_second": 3.388,
"eval_steps_per_second": 0.424,
"step": 439
},
{
"epoch": 8.018223234624147,
"grad_norm": 0.86496502161026,
"learning_rate": 1.2683927559787657e-06,
"loss": 0.0523,
"step": 440
},
{
"epoch": 8.382687927107062,
"grad_norm": 0.7341931462287903,
"learning_rate": 8.327615464234129e-07,
"loss": 0.0423,
"step": 460
},
{
"epoch": 8.747152619589977,
"grad_norm": 0.5355525016784668,
"learning_rate": 4.819632944595415e-07,
"loss": 0.047,
"step": 480
},
{
"epoch": 8.984054669703873,
"eval_loss": 0.09474514424800873,
"eval_runtime": 49.5868,
"eval_samples_per_second": 3.388,
"eval_steps_per_second": 0.424,
"step": 493
},
{
"epoch": 9.111617312072893,
"grad_norm": 0.5788146257400513,
"learning_rate": 2.2313924087851657e-07,
"loss": 0.0548,
"step": 500
},
{
"epoch": 9.476082004555808,
"grad_norm": 0.4824052155017853,
"learning_rate": 6.15582970243117e-08,
"loss": 0.0451,
"step": 520
},
{
"epoch": 9.840546697038725,
"grad_norm": 0.9220362901687622,
"learning_rate": 5.09784952833492e-10,
"loss": 0.0453,
"step": 540
},
{
"epoch": 9.840546697038725,
"eval_loss": 0.09464961290359497,
"eval_runtime": 49.5747,
"eval_samples_per_second": 3.389,
"eval_steps_per_second": 0.424,
"step": 540
}
],
"logging_steps": 20,
"max_steps": 540,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.0191775689433088e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}