hoang14's picture
Upload folder using huggingface_hub
45274fc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 313,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01597444089456869,
"grad_norm": 478.69457004238836,
"learning_rate": 2e-05,
"loss": 5.0794,
"step": 5
},
{
"epoch": 0.03194888178913738,
"grad_norm": 215.5472393836919,
"learning_rate": 2e-05,
"loss": 1.7552,
"step": 10
},
{
"epoch": 0.04792332268370607,
"grad_norm": 88.51180202926707,
"learning_rate": 2e-05,
"loss": 0.861,
"step": 15
},
{
"epoch": 0.06389776357827476,
"grad_norm": 15.867724265639525,
"learning_rate": 2e-05,
"loss": 0.7908,
"step": 20
},
{
"epoch": 0.07987220447284345,
"grad_norm": 9.962226402862825,
"learning_rate": 2e-05,
"loss": 0.5627,
"step": 25
},
{
"epoch": 0.09584664536741214,
"grad_norm": 11.323650461972006,
"learning_rate": 2e-05,
"loss": 0.4492,
"step": 30
},
{
"epoch": 0.11182108626198083,
"grad_norm": 5.618908250561753,
"learning_rate": 2e-05,
"loss": 0.3863,
"step": 35
},
{
"epoch": 0.12779552715654952,
"grad_norm": 8.639980902230302,
"learning_rate": 2e-05,
"loss": 0.3724,
"step": 40
},
{
"epoch": 0.14376996805111822,
"grad_norm": 6.065581794373812,
"learning_rate": 2e-05,
"loss": 0.3305,
"step": 45
},
{
"epoch": 0.1597444089456869,
"grad_norm": 6.487222993623944,
"learning_rate": 2e-05,
"loss": 0.3454,
"step": 50
},
{
"epoch": 0.1757188498402556,
"grad_norm": 8.157982493659246,
"learning_rate": 2e-05,
"loss": 0.31,
"step": 55
},
{
"epoch": 0.19169329073482427,
"grad_norm": 4.433439880366275,
"learning_rate": 2e-05,
"loss": 0.3233,
"step": 60
},
{
"epoch": 0.20766773162939298,
"grad_norm": 8.704032511156715,
"learning_rate": 2e-05,
"loss": 0.3254,
"step": 65
},
{
"epoch": 0.22364217252396165,
"grad_norm": 3.2354358181768,
"learning_rate": 2e-05,
"loss": 0.3028,
"step": 70
},
{
"epoch": 0.23961661341853036,
"grad_norm": 3.927058406370219,
"learning_rate": 2e-05,
"loss": 0.2545,
"step": 75
},
{
"epoch": 0.25559105431309903,
"grad_norm": 4.383347359544785,
"learning_rate": 2e-05,
"loss": 0.2766,
"step": 80
},
{
"epoch": 0.2715654952076677,
"grad_norm": 4.755401718885403,
"learning_rate": 2e-05,
"loss": 0.2756,
"step": 85
},
{
"epoch": 0.28753993610223644,
"grad_norm": 7.018973526139115,
"learning_rate": 2e-05,
"loss": 0.2579,
"step": 90
},
{
"epoch": 0.3035143769968051,
"grad_norm": 6.272026448721462,
"learning_rate": 2e-05,
"loss": 0.2971,
"step": 95
},
{
"epoch": 0.3194888178913738,
"grad_norm": 4.8079684113378365,
"learning_rate": 2e-05,
"loss": 0.3307,
"step": 100
},
{
"epoch": 0.3354632587859425,
"grad_norm": 4.028493080280556,
"learning_rate": 2e-05,
"loss": 0.2727,
"step": 105
},
{
"epoch": 0.3514376996805112,
"grad_norm": 5.388707606364108,
"learning_rate": 2e-05,
"loss": 0.2822,
"step": 110
},
{
"epoch": 0.36741214057507987,
"grad_norm": 3.730845411810028,
"learning_rate": 2e-05,
"loss": 0.2816,
"step": 115
},
{
"epoch": 0.38338658146964855,
"grad_norm": 5.819780875953061,
"learning_rate": 2e-05,
"loss": 0.2438,
"step": 120
},
{
"epoch": 0.3993610223642173,
"grad_norm": 5.818771077307558,
"learning_rate": 2e-05,
"loss": 0.2764,
"step": 125
},
{
"epoch": 0.41533546325878595,
"grad_norm": 5.674449251632924,
"learning_rate": 2e-05,
"loss": 0.2679,
"step": 130
},
{
"epoch": 0.43130990415335463,
"grad_norm": 3.5139138000890564,
"learning_rate": 2e-05,
"loss": 0.266,
"step": 135
},
{
"epoch": 0.4472843450479233,
"grad_norm": 3.6050594093343644,
"learning_rate": 2e-05,
"loss": 0.2558,
"step": 140
},
{
"epoch": 0.46325878594249204,
"grad_norm": 3.7736226262761248,
"learning_rate": 2e-05,
"loss": 0.2747,
"step": 145
},
{
"epoch": 0.4792332268370607,
"grad_norm": 3.3294463018044382,
"learning_rate": 2e-05,
"loss": 0.2124,
"step": 150
},
{
"epoch": 0.4952076677316294,
"grad_norm": 3.978340934287849,
"learning_rate": 2e-05,
"loss": 0.2626,
"step": 155
},
{
"epoch": 0.5111821086261981,
"grad_norm": 3.7733916384693997,
"learning_rate": 2e-05,
"loss": 0.3012,
"step": 160
},
{
"epoch": 0.5271565495207667,
"grad_norm": 2.475405136211538,
"learning_rate": 2e-05,
"loss": 0.2506,
"step": 165
},
{
"epoch": 0.5431309904153354,
"grad_norm": 2.623200763225571,
"learning_rate": 2e-05,
"loss": 0.2127,
"step": 170
},
{
"epoch": 0.5591054313099042,
"grad_norm": 3.1075207472955797,
"learning_rate": 2e-05,
"loss": 0.2441,
"step": 175
},
{
"epoch": 0.5750798722044729,
"grad_norm": 2.446477613149001,
"learning_rate": 2e-05,
"loss": 0.2124,
"step": 180
},
{
"epoch": 0.5910543130990416,
"grad_norm": 4.2022279283216495,
"learning_rate": 2e-05,
"loss": 0.24,
"step": 185
},
{
"epoch": 0.6070287539936102,
"grad_norm": 3.527771879306774,
"learning_rate": 2e-05,
"loss": 0.2458,
"step": 190
},
{
"epoch": 0.6230031948881789,
"grad_norm": 3.5313927317162133,
"learning_rate": 2e-05,
"loss": 0.2714,
"step": 195
},
{
"epoch": 0.6389776357827476,
"grad_norm": 3.6235305866137546,
"learning_rate": 2e-05,
"loss": 0.2653,
"step": 200
},
{
"epoch": 0.6549520766773163,
"grad_norm": 4.876371447504886,
"learning_rate": 2e-05,
"loss": 0.2373,
"step": 205
},
{
"epoch": 0.670926517571885,
"grad_norm": 3.5358993905726868,
"learning_rate": 2e-05,
"loss": 0.2205,
"step": 210
},
{
"epoch": 0.6869009584664537,
"grad_norm": 2.4600844043540127,
"learning_rate": 2e-05,
"loss": 0.205,
"step": 215
},
{
"epoch": 0.7028753993610224,
"grad_norm": 4.689947740869789,
"learning_rate": 2e-05,
"loss": 0.2497,
"step": 220
},
{
"epoch": 0.7188498402555911,
"grad_norm": 3.8186352734247073,
"learning_rate": 2e-05,
"loss": 0.2624,
"step": 225
},
{
"epoch": 0.7348242811501597,
"grad_norm": 4.186654907595584,
"learning_rate": 2e-05,
"loss": 0.2046,
"step": 230
},
{
"epoch": 0.7507987220447284,
"grad_norm": 4.618434453667313,
"learning_rate": 2e-05,
"loss": 0.2297,
"step": 235
},
{
"epoch": 0.7667731629392971,
"grad_norm": 1.6540359321412514,
"learning_rate": 2e-05,
"loss": 0.1976,
"step": 240
},
{
"epoch": 0.7827476038338658,
"grad_norm": 2.966359474906274,
"learning_rate": 2e-05,
"loss": 0.2267,
"step": 245
},
{
"epoch": 0.7987220447284346,
"grad_norm": 3.178498309301471,
"learning_rate": 2e-05,
"loss": 0.2015,
"step": 250
},
{
"epoch": 0.8146964856230032,
"grad_norm": 3.0943406181806066,
"learning_rate": 2e-05,
"loss": 0.2088,
"step": 255
},
{
"epoch": 0.8306709265175719,
"grad_norm": 2.601647495877313,
"learning_rate": 2e-05,
"loss": 0.1997,
"step": 260
},
{
"epoch": 0.8466453674121406,
"grad_norm": 2.74734218285866,
"learning_rate": 2e-05,
"loss": 0.2271,
"step": 265
},
{
"epoch": 0.8626198083067093,
"grad_norm": 4.600055126522387,
"learning_rate": 2e-05,
"loss": 0.2188,
"step": 270
},
{
"epoch": 0.8785942492012779,
"grad_norm": 2.854778230115055,
"learning_rate": 2e-05,
"loss": 0.2136,
"step": 275
},
{
"epoch": 0.8945686900958466,
"grad_norm": 5.6767551180163185,
"learning_rate": 2e-05,
"loss": 0.2362,
"step": 280
},
{
"epoch": 0.9105431309904153,
"grad_norm": 2.4685062213282705,
"learning_rate": 2e-05,
"loss": 0.2108,
"step": 285
},
{
"epoch": 0.9265175718849841,
"grad_norm": 4.1197310782397,
"learning_rate": 2e-05,
"loss": 0.2084,
"step": 290
},
{
"epoch": 0.9424920127795527,
"grad_norm": 3.4714190539955085,
"learning_rate": 2e-05,
"loss": 0.2327,
"step": 295
},
{
"epoch": 0.9584664536741214,
"grad_norm": 2.7324693594411613,
"learning_rate": 2e-05,
"loss": 0.2264,
"step": 300
},
{
"epoch": 0.9744408945686901,
"grad_norm": 3.421741611446172,
"learning_rate": 2e-05,
"loss": 0.1995,
"step": 305
},
{
"epoch": 0.9904153354632588,
"grad_norm": 2.9392575520935753,
"learning_rate": 2e-05,
"loss": 0.2168,
"step": 310
}
],
"logging_steps": 5,
"max_steps": 626,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 313,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4095989514240.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}