|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.997867803837953, |
|
"eval_steps": 500, |
|
"global_step": 234, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0042643923240938165, |
|
"grad_norm": 25.654364462455888, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 1.5118, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.021321961620469083, |
|
"grad_norm": 9.217679826848354, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 1.4588, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.042643923240938165, |
|
"grad_norm": 3.8364568135386343, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.1844, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06396588486140725, |
|
"grad_norm": 2.7685426084723606, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.0266, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 2.714167254334698, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.9764, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10660980810234541, |
|
"grad_norm": 2.575043424767955, |
|
"learning_rate": 9.999440509051367e-06, |
|
"loss": 0.9474, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1279317697228145, |
|
"grad_norm": 2.5473126644658635, |
|
"learning_rate": 9.979871469976197e-06, |
|
"loss": 0.9265, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 2.7728522540985927, |
|
"learning_rate": 9.932452969617607e-06, |
|
"loss": 0.9103, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 2.262656802643975, |
|
"learning_rate": 9.857450191464337e-06, |
|
"loss": 0.9089, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19189765458422176, |
|
"grad_norm": 2.2929860595064353, |
|
"learning_rate": 9.755282581475769e-06, |
|
"loss": 0.8839, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.21321961620469082, |
|
"grad_norm": 2.9962187125117556, |
|
"learning_rate": 9.626521502369984e-06, |
|
"loss": 0.8779, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2345415778251599, |
|
"grad_norm": 2.4461853196937744, |
|
"learning_rate": 9.471887038331686e-06, |
|
"loss": 0.8655, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 2.548713200713329, |
|
"learning_rate": 9.292243968009332e-06, |
|
"loss": 0.8452, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2771855010660981, |
|
"grad_norm": 2.354080355646257, |
|
"learning_rate": 9.088596928322158e-06, |
|
"loss": 0.8453, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 2.3350186621937494, |
|
"learning_rate": 8.862084796122998e-06, |
|
"loss": 0.8213, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.31982942430703626, |
|
"grad_norm": 2.352888208422696, |
|
"learning_rate": 8.613974319136959e-06, |
|
"loss": 0.8087, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 2.626490865987853, |
|
"learning_rate": 8.345653031794292e-06, |
|
"loss": 0.8, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3624733475479744, |
|
"grad_norm": 2.2564126156464934, |
|
"learning_rate": 8.058621495575032e-06, |
|
"loss": 0.7883, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3837953091684435, |
|
"grad_norm": 2.536678489630529, |
|
"learning_rate": 7.754484907260513e-06, |
|
"loss": 0.7797, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4051172707889126, |
|
"grad_norm": 2.330261835490306, |
|
"learning_rate": 7.434944122021837e-06, |
|
"loss": 0.7704, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 2.375473887900136, |
|
"learning_rate": 7.101786141547829e-06, |
|
"loss": 0.7491, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 2.31845485895562, |
|
"learning_rate": 6.7568741204067145e-06, |
|
"loss": 0.7426, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4690831556503198, |
|
"grad_norm": 2.2326175780721513, |
|
"learning_rate": 6.402136946530014e-06, |
|
"loss": 0.7366, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4904051172707889, |
|
"grad_norm": 2.444799836226394, |
|
"learning_rate": 6.039558454088796e-06, |
|
"loss": 0.7294, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 2.42023799653421, |
|
"learning_rate": 5.671166329088278e-06, |
|
"loss": 0.7346, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5330490405117271, |
|
"grad_norm": 2.525769921790198, |
|
"learning_rate": 5.299020769725172e-06, |
|
"loss": 0.716, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5543710021321961, |
|
"grad_norm": 2.210624855154462, |
|
"learning_rate": 4.9252029649236835e-06, |
|
"loss": 0.7087, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5756929637526652, |
|
"grad_norm": 2.260417777455262, |
|
"learning_rate": 4.551803455482833e-06, |
|
"loss": 0.6979, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 2.5410734519213847, |
|
"learning_rate": 4.180910442924312e-06, |
|
"loss": 0.6758, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6183368869936035, |
|
"grad_norm": 2.2197214614990983, |
|
"learning_rate": 3.8145981114225135e-06, |
|
"loss": 0.6832, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6396588486140725, |
|
"grad_norm": 2.417478197312417, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 0.6705, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6609808102345416, |
|
"grad_norm": 2.193206874567919, |
|
"learning_rate": 3.1038726867353587e-06, |
|
"loss": 0.6909, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 2.3141978562259133, |
|
"learning_rate": 2.7634342584218364e-06, |
|
"loss": 0.678, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7036247334754797, |
|
"grad_norm": 2.20282691421215, |
|
"learning_rate": 2.43550361297047e-06, |
|
"loss": 0.6646, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7249466950959488, |
|
"grad_norm": 2.3241432733966962, |
|
"learning_rate": 2.1219146715716332e-06, |
|
"loss": 0.6633, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 2.3658483418520464, |
|
"learning_rate": 1.8244211507891064e-06, |
|
"loss": 0.6516, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 2.259696417637488, |
|
"learning_rate": 1.544686755065677e-06, |
|
"loss": 0.6418, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7889125799573561, |
|
"grad_norm": 2.284368925546414, |
|
"learning_rate": 1.2842758726130283e-06, |
|
"loss": 0.6405, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8102345415778252, |
|
"grad_norm": 2.2174015564488223, |
|
"learning_rate": 1.044644826718295e-06, |
|
"loss": 0.6359, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8315565031982942, |
|
"grad_norm": 2.3098966859462076, |
|
"learning_rate": 8.271337313934869e-07, |
|
"loss": 0.6232, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 2.240425165408693, |
|
"learning_rate": 6.329589969143518e-07, |
|
"loss": 0.6263, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8742004264392325, |
|
"grad_norm": 2.203409177091297, |
|
"learning_rate": 4.632065271606756e-07, |
|
"loss": 0.6299, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 2.1702011902470724, |
|
"learning_rate": 3.18825646801314e-07, |
|
"loss": 0.636, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9168443496801706, |
|
"grad_norm": 2.202446820245564, |
|
"learning_rate": 2.006237922855553e-07, |
|
"loss": 0.6182, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 2.118840248626809, |
|
"learning_rate": 1.0926199633097156e-07, |
|
"loss": 0.609, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9594882729211087, |
|
"grad_norm": 2.168175873632397, |
|
"learning_rate": 4.52511911603265e-08, |
|
"loss": 0.6173, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9808102345415778, |
|
"grad_norm": 2.2624619803066617, |
|
"learning_rate": 8.949351161324227e-09, |
|
"loss": 0.6207, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.997867803837953, |
|
"eval_loss": 0.7390011548995972, |
|
"eval_runtime": 106.273, |
|
"eval_samples_per_second": 3.67, |
|
"eval_steps_per_second": 0.922, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.997867803837953, |
|
"step": 234, |
|
"total_flos": 48942494515200.0, |
|
"train_loss": 0.7695284368645432, |
|
"train_runtime": 7306.9109, |
|
"train_samples_per_second": 1.026, |
|
"train_steps_per_second": 0.032 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 234, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 48942494515200.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|