Phi_medprob-biochemistry_lora / trainer_state.json
emilykang's picture
Training in progress, epoch 0
3a30a6d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.820359281437126,
"eval_steps": 500,
"global_step": 410,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23952095808383234,
"grad_norm": 2.16796875,
"learning_rate": 0.00019970658011837404,
"loss": 0.5442,
"step": 10
},
{
"epoch": 0.47904191616766467,
"grad_norm": 2.02734375,
"learning_rate": 0.00019882804237803488,
"loss": 0.5445,
"step": 20
},
{
"epoch": 0.718562874251497,
"grad_norm": 1.2587890625,
"learning_rate": 0.00019736954238777792,
"loss": 0.5499,
"step": 30
},
{
"epoch": 0.9580838323353293,
"grad_norm": 1.3486328125,
"learning_rate": 0.00019533963920549306,
"loss": 0.5429,
"step": 40
},
{
"epoch": 1.1976047904191618,
"grad_norm": 2.08984375,
"learning_rate": 0.0001927502451102095,
"loss": 0.5336,
"step": 50
},
{
"epoch": 1.437125748502994,
"grad_norm": 1.7734375,
"learning_rate": 0.00018961655569610557,
"loss": 0.5333,
"step": 60
},
{
"epoch": 1.6766467065868262,
"grad_norm": 2.63671875,
"learning_rate": 0.00018595696069872013,
"loss": 0.5401,
"step": 70
},
{
"epoch": 1.9161676646706587,
"grad_norm": 2.19921875,
"learning_rate": 0.00018179293607667178,
"loss": 0.5414,
"step": 80
},
{
"epoch": 2.155688622754491,
"grad_norm": 1.908203125,
"learning_rate": 0.0001771489179821943,
"loss": 0.5713,
"step": 90
},
{
"epoch": 2.3952095808383236,
"grad_norm": 4.09765625,
"learning_rate": 0.0001720521593600787,
"loss": 0.538,
"step": 100
},
{
"epoch": 2.6347305389221556,
"grad_norm": 2.0390625,
"learning_rate": 0.00016653257001655652,
"loss": 0.5199,
"step": 110
},
{
"epoch": 2.874251497005988,
"grad_norm": 1.8154296875,
"learning_rate": 0.0001606225410966638,
"loss": 0.5204,
"step": 120
},
{
"epoch": 3.1137724550898205,
"grad_norm": 1.9189453125,
"learning_rate": 0.00015435675500012212,
"loss": 0.5396,
"step": 130
},
{
"epoch": 3.3532934131736525,
"grad_norm": 1.8896484375,
"learning_rate": 0.0001477719818512263,
"loss": 0.5294,
"step": 140
},
{
"epoch": 3.592814371257485,
"grad_norm": 2.302734375,
"learning_rate": 0.00014090686371713402,
"loss": 0.5394,
"step": 150
},
{
"epoch": 3.8323353293413174,
"grad_norm": 2.16796875,
"learning_rate": 0.00013380168784085027,
"loss": 0.5279,
"step": 160
},
{
"epoch": 4.07185628742515,
"grad_norm": 2.11328125,
"learning_rate": 0.0001264981502196662,
"loss": 0.5182,
"step": 170
},
{
"epoch": 4.311377245508982,
"grad_norm": 1.9189453125,
"learning_rate": 0.00011903911091646684,
"loss": 0.5304,
"step": 180
},
{
"epoch": 4.550898203592815,
"grad_norm": 2.48046875,
"learning_rate": 0.00011146834253984006,
"loss": 0.5386,
"step": 190
},
{
"epoch": 4.790419161676647,
"grad_norm": 2.62890625,
"learning_rate": 0.00010383027336900355,
"loss": 0.5276,
"step": 200
},
{
"epoch": 5.029940119760479,
"grad_norm": 3.021484375,
"learning_rate": 9.616972663099647e-05,
"loss": 0.5229,
"step": 210
},
{
"epoch": 5.269461077844311,
"grad_norm": 3.259765625,
"learning_rate": 8.853165746015997e-05,
"loss": 0.5243,
"step": 220
},
{
"epoch": 5.508982035928144,
"grad_norm": 2.876953125,
"learning_rate": 8.096088908353315e-05,
"loss": 0.5206,
"step": 230
},
{
"epoch": 5.748502994011976,
"grad_norm": 3.349609375,
"learning_rate": 7.350184978033386e-05,
"loss": 0.5374,
"step": 240
},
{
"epoch": 5.9880239520958085,
"grad_norm": 3.791015625,
"learning_rate": 6.619831215914974e-05,
"loss": 0.5258,
"step": 250
},
{
"epoch": 6.227544910179641,
"grad_norm": 3.240234375,
"learning_rate": 5.909313628286601e-05,
"loss": 0.5166,
"step": 260
},
{
"epoch": 6.467065868263473,
"grad_norm": 3.248046875,
"learning_rate": 5.222801814877369e-05,
"loss": 0.5329,
"step": 270
},
{
"epoch": 6.706586826347305,
"grad_norm": 2.83984375,
"learning_rate": 4.56432449998779e-05,
"loss": 0.5117,
"step": 280
},
{
"epoch": 6.946107784431137,
"grad_norm": 4.03515625,
"learning_rate": 3.937745890333623e-05,
"loss": 0.5269,
"step": 290
},
{
"epoch": 7.18562874251497,
"grad_norm": 4.08203125,
"learning_rate": 3.346742998344348e-05,
"loss": 0.5299,
"step": 300
},
{
"epoch": 7.425149700598802,
"grad_norm": 3.03125,
"learning_rate": 2.794784063992131e-05,
"loss": 0.5217,
"step": 310
},
{
"epoch": 7.664670658682635,
"grad_norm": 3.00390625,
"learning_rate": 2.2851082017805703e-05,
"loss": 0.5241,
"step": 320
},
{
"epoch": 7.904191616766467,
"grad_norm": 3.27734375,
"learning_rate": 1.8207063923328237e-05,
"loss": 0.5066,
"step": 330
},
{
"epoch": 8.1437125748503,
"grad_norm": 2.927734375,
"learning_rate": 1.4043039301279903e-05,
"loss": 0.5111,
"step": 340
},
{
"epoch": 8.383233532934131,
"grad_norm": 3.3984375,
"learning_rate": 1.0383444303894452e-05,
"loss": 0.5137,
"step": 350
},
{
"epoch": 8.622754491017965,
"grad_norm": 3.4375,
"learning_rate": 7.249754889790539e-06,
"loss": 0.5279,
"step": 360
},
{
"epoch": 8.862275449101796,
"grad_norm": 3.19140625,
"learning_rate": 4.660360794506946e-06,
"loss": 0.5125,
"step": 370
},
{
"epoch": 9.10179640718563,
"grad_norm": 4.23046875,
"learning_rate": 2.6304576122221035e-06,
"loss": 0.5141,
"step": 380
},
{
"epoch": 9.341317365269461,
"grad_norm": 2.779296875,
"learning_rate": 1.1719576219651585e-06,
"loss": 0.5172,
"step": 390
},
{
"epoch": 9.580838323353294,
"grad_norm": 3.619140625,
"learning_rate": 2.934198816259559e-07,
"loss": 0.5389,
"step": 400
},
{
"epoch": 9.820359281437126,
"grad_norm": 4.30078125,
"learning_rate": 0.0,
"loss": 0.5024,
"step": 410
},
{
"epoch": 9.820359281437126,
"step": 410,
"total_flos": 2.67670788243456e+16,
"train_loss": 0.5285330202521348,
"train_runtime": 355.5253,
"train_samples_per_second": 4.697,
"train_steps_per_second": 1.153
}
],
"logging_steps": 10,
"max_steps": 410,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.67670788243456e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}