hiyouga's picture
Upload 10 files
1e6fffa verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9985652797704447,
"eval_steps": 500,
"global_step": 261,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"grad_norm": 0.45229363441467285,
"learning_rate": 9.990947518281311e-05,
"loss": 1.2293,
"step": 5
},
{
"epoch": 0.04,
"grad_norm": 0.45895835757255554,
"learning_rate": 9.963822852095345e-05,
"loss": 1.0332,
"step": 10
},
{
"epoch": 0.06,
"grad_norm": 1.148818850517273,
"learning_rate": 9.918724219660013e-05,
"loss": 1.1688,
"step": 15
},
{
"epoch": 0.08,
"grad_norm": 0.6193984150886536,
"learning_rate": 9.855814922793582e-05,
"loss": 0.8491,
"step": 20
},
{
"epoch": 0.1,
"grad_norm": 0.5973692536354065,
"learning_rate": 9.775322755599978e-05,
"loss": 0.8356,
"step": 25
},
{
"epoch": 0.11,
"grad_norm": 0.471483439207077,
"learning_rate": 9.677539179628005e-05,
"loss": 0.782,
"step": 30
},
{
"epoch": 0.13,
"grad_norm": 0.5395711660385132,
"learning_rate": 9.562818268491216e-05,
"loss": 0.7464,
"step": 35
},
{
"epoch": 0.15,
"grad_norm": 0.7068768739700317,
"learning_rate": 9.431575425769938e-05,
"loss": 0.6813,
"step": 40
},
{
"epoch": 0.17,
"grad_norm": 0.7348042130470276,
"learning_rate": 9.284285880837946e-05,
"loss": 0.6508,
"step": 45
},
{
"epoch": 0.19,
"grad_norm": 1.763695478439331,
"learning_rate": 9.121482968060384e-05,
"loss": 0.7296,
"step": 50
},
{
"epoch": 0.21,
"grad_norm": 0.7689581513404846,
"learning_rate": 8.943756195593916e-05,
"loss": 0.604,
"step": 55
},
{
"epoch": 0.23,
"grad_norm": 1.1951208114624023,
"learning_rate": 8.751749110782012e-05,
"loss": 0.594,
"step": 60
},
{
"epoch": 0.25,
"grad_norm": 0.9435452818870544,
"learning_rate": 8.546156969874723e-05,
"loss": 0.6778,
"step": 65
},
{
"epoch": 0.27,
"grad_norm": 0.8245531320571899,
"learning_rate": 8.327724220510873e-05,
"loss": 0.6173,
"step": 70
},
{
"epoch": 0.29,
"grad_norm": 0.6063089966773987,
"learning_rate": 8.097241806078615e-05,
"loss": 0.7239,
"step": 75
},
{
"epoch": 0.31,
"grad_norm": 0.4883978068828583,
"learning_rate": 7.855544301715203e-05,
"loss": 0.5158,
"step": 80
},
{
"epoch": 0.33,
"grad_norm": 0.6936870217323303,
"learning_rate": 7.603506892316512e-05,
"loss": 0.5011,
"step": 85
},
{
"epoch": 0.34,
"grad_norm": 2.553333282470703,
"learning_rate": 7.342042203498951e-05,
"loss": 0.6461,
"step": 90
},
{
"epoch": 0.36,
"grad_norm": 0.9251458644866943,
"learning_rate": 7.07209699698876e-05,
"loss": 0.7273,
"step": 95
},
{
"epoch": 0.38,
"grad_norm": 0.699193000793457,
"learning_rate": 6.79464874240473e-05,
"loss": 0.5878,
"step": 100
},
{
"epoch": 0.4,
"grad_norm": 0.661618173122406,
"learning_rate": 6.510702077847863e-05,
"loss": 0.4787,
"step": 105
},
{
"epoch": 0.42,
"grad_norm": 0.4883117079734802,
"learning_rate": 6.221285172114157e-05,
"loss": 0.5766,
"step": 110
},
{
"epoch": 0.44,
"grad_norm": 2.715449810028076,
"learning_rate": 5.927446001702899e-05,
"loss": 0.5553,
"step": 115
},
{
"epoch": 0.46,
"grad_norm": 0.6433199644088745,
"learning_rate": 5.6302485561014475e-05,
"loss": 0.7007,
"step": 120
},
{
"epoch": 0.48,
"grad_norm": 1.7128715515136719,
"learning_rate": 5.330768985087059e-05,
"loss": 0.5773,
"step": 125
},
{
"epoch": 0.5,
"grad_norm": 0.7177631258964539,
"learning_rate": 5.030091701996428e-05,
"loss": 0.5787,
"step": 130
},
{
"epoch": 0.52,
"grad_norm": 0.6795344948768616,
"learning_rate": 4.729305457072913e-05,
"loss": 0.5013,
"step": 135
},
{
"epoch": 0.54,
"grad_norm": 1.1840591430664062,
"learning_rate": 4.429499395109877e-05,
"loss": 0.6419,
"step": 140
},
{
"epoch": 0.55,
"grad_norm": 0.7961875200271606,
"learning_rate": 4.131759111665349e-05,
"loss": 0.4529,
"step": 145
},
{
"epoch": 0.57,
"grad_norm": 0.7156918048858643,
"learning_rate": 3.8371627221284495e-05,
"loss": 0.5831,
"step": 150
},
{
"epoch": 0.59,
"grad_norm": 0.8424770832061768,
"learning_rate": 3.546776957871445e-05,
"loss": 0.6044,
"step": 155
},
{
"epoch": 0.61,
"grad_norm": 0.9914979338645935,
"learning_rate": 3.261653303623263e-05,
"loss": 0.5824,
"step": 160
},
{
"epoch": 0.63,
"grad_norm": 1.0051041841506958,
"learning_rate": 2.982824190050958e-05,
"loss": 0.4595,
"step": 165
},
{
"epoch": 0.65,
"grad_norm": 1.2977936267852783,
"learning_rate": 2.711299255335833e-05,
"loss": 0.544,
"step": 170
},
{
"epoch": 0.67,
"grad_norm": 0.5247394442558289,
"learning_rate": 2.4480616892809594e-05,
"loss": 0.5458,
"step": 175
},
{
"epoch": 0.69,
"grad_norm": 0.9446833729743958,
"learning_rate": 2.194064673188089e-05,
"loss": 0.5625,
"step": 180
},
{
"epoch": 0.71,
"grad_norm": 0.9111084938049316,
"learning_rate": 1.9502279283951364e-05,
"loss": 0.5163,
"step": 185
},
{
"epoch": 0.73,
"grad_norm": 0.9301843643188477,
"learning_rate": 1.7174343859719333e-05,
"loss": 0.5594,
"step": 190
},
{
"epoch": 0.75,
"grad_norm": 1.5420268774032593,
"learning_rate": 1.4965269896332885e-05,
"loss": 0.4737,
"step": 195
},
{
"epoch": 0.77,
"grad_norm": 0.866782546043396,
"learning_rate": 1.2883056434459506e-05,
"loss": 0.5539,
"step": 200
},
{
"epoch": 0.78,
"grad_norm": 1.928578495979309,
"learning_rate": 1.0935243153818436e-05,
"loss": 0.5426,
"step": 205
},
{
"epoch": 0.8,
"grad_norm": 0.5703736543655396,
"learning_rate": 9.12888307205541e-06,
"loss": 0.5781,
"step": 210
},
{
"epoch": 0.82,
"grad_norm": 1.3220263719558716,
"learning_rate": 7.470517005817474e-06,
"loss": 0.6523,
"step": 215
},
{
"epoch": 0.84,
"grad_norm": 4.24189567565918,
"learning_rate": 5.966149886503614e-06,
"loss": 0.5199,
"step": 220
},
{
"epoch": 0.86,
"grad_norm": 0.606140673160553,
"learning_rate": 4.621229016452156e-06,
"loss": 0.487,
"step": 225
},
{
"epoch": 0.88,
"grad_norm": 0.7255881428718567,
"learning_rate": 3.4406243442987764e-06,
"loss": 0.5654,
"step": 230
},
{
"epoch": 0.9,
"grad_norm": 0.7342286705970764,
"learning_rate": 2.428610830928152e-06,
"loss": 0.5211,
"step": 235
},
{
"epoch": 0.92,
"grad_norm": 1.0628710985183716,
"learning_rate": 1.5888529698718346e-06,
"loss": 0.5965,
"step": 240
},
{
"epoch": 0.94,
"grad_norm": 0.6802533268928528,
"learning_rate": 9.243915182039431e-07,
"loss": 0.542,
"step": 245
},
{
"epoch": 0.96,
"grad_norm": 2.282472610473633,
"learning_rate": 4.376324859820924e-07,
"loss": 0.5677,
"step": 250
},
{
"epoch": 0.98,
"grad_norm": 2.717949628829956,
"learning_rate": 1.3033842410251075e-07,
"loss": 0.4775,
"step": 255
},
{
"epoch": 0.99,
"grad_norm": 1.769280195236206,
"learning_rate": 3.622042116169233e-09,
"loss": 0.6862,
"step": 260
},
{
"epoch": 1.0,
"step": 261,
"total_flos": 3.809983450683802e+16,
"train_loss": 0.6279083029063726,
"train_runtime": 203702.1714,
"train_samples_per_second": 0.01,
"train_steps_per_second": 0.001
}
],
"logging_steps": 5,
"max_steps": 261,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 3.809983450683802e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}