Upload 37 files

6517377 verified over 1 year ago

6.53 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.001194743130227,
	"eval_steps": 500,
	"global_step": 1675,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05973715651135006,
	"grad_norm": 2.8490114212036133,
	"learning_rate": 0.00016447368421052634,
	"loss": 11.6497,
	"step": 50
	},
	{
	"epoch": 0.11947431302270012,
	"grad_norm": 0.05463433265686035,
	"learning_rate": 0.0002475359342915811,
	"loss": 4.6396,
	"step": 100
	},
	{
	"epoch": 0.17921146953405018,
	"grad_norm": 0.03409096226096153,
	"learning_rate": 0.00024240246406570843,
	"loss": 4.4228,
	"step": 150
	},
	{
	"epoch": 0.23894862604540024,
	"grad_norm": 0.03413880988955498,
	"learning_rate": 0.00023726899383983574,
	"loss": 4.3988,
	"step": 200
	},
	{
	"epoch": 0.2986857825567503,
	"grad_norm": 0.033178623765707016,
	"learning_rate": 0.00023213552361396305,
	"loss": 4.3922,
	"step": 250
	},
	{
	"epoch": 0.35842293906810035,
	"grad_norm": 0.028784427791833878,
	"learning_rate": 0.00022700205338809036,
	"loss": 4.4053,
	"step": 300
	},
	{
	"epoch": 0.41816009557945044,
	"grad_norm": 0.036163728684186935,
	"learning_rate": 0.00022186858316221766,
	"loss": 4.3944,
	"step": 350
	},
	{
	"epoch": 0.4778972520908005,
	"grad_norm": 0.03532182425260544,
	"learning_rate": 0.00021673511293634497,
	"loss": 4.3938,
	"step": 400
	},
	{
	"epoch": 0.5376344086021505,
	"grad_norm": 0.03272629156708717,
	"learning_rate": 0.00021160164271047228,
	"loss": 4.3859,
	"step": 450
	},
	{
	"epoch": 0.5973715651135006,
	"grad_norm": 0.027959033846855164,
	"learning_rate": 0.0002064681724845996,
	"loss": 4.3881,
	"step": 500
	},
	{
	"epoch": 0.6571087216248507,
	"grad_norm": 0.024525364860892296,
	"learning_rate": 0.0002013347022587269,
	"loss": 4.3989,
	"step": 550
	},
	{
	"epoch": 0.7168458781362007,
	"grad_norm": 0.025551579892635345,
	"learning_rate": 0.00019620123203285423,
	"loss": 4.3802,
	"step": 600
	},
	{
	"epoch": 0.7765830346475507,
	"grad_norm": 0.03189048916101456,
	"learning_rate": 0.00019106776180698152,
	"loss": 4.4041,
	"step": 650
	},
	{
	"epoch": 0.8363201911589009,
	"grad_norm": 0.02770661748945713,
	"learning_rate": 0.00018593429158110883,
	"loss": 4.3955,
	"step": 700
	},
	{
	"epoch": 0.8960573476702509,
	"grad_norm": 0.03752126544713974,
	"learning_rate": 0.00018080082135523616,
	"loss": 4.3857,
	"step": 750
	},
	{
	"epoch": 0.955794504181601,
	"grad_norm": 0.0396958664059639,
	"learning_rate": 0.00017566735112936344,
	"loss": 4.3847,
	"step": 800
	},
	{
	"epoch": 1.015531660692951,
	"grad_norm": 0.03522910550236702,
	"learning_rate": 0.00017053388090349075,
	"loss": 4.3815,
	"step": 850
	},
	{
	"epoch": 1.075268817204301,
	"grad_norm": 0.033044200390577316,
	"learning_rate": 0.00016540041067761806,
	"loss": 4.3903,
	"step": 900
	},
	{
	"epoch": 1.135005973715651,
	"grad_norm": 0.03267841041088104,
	"learning_rate": 0.0001602669404517454,
	"loss": 4.3836,
	"step": 950
	},
	{
	"epoch": 1.194743130227001,
	"grad_norm": 0.04201454669237137,
	"learning_rate": 0.00015513347022587268,
	"loss": 4.3776,
	"step": 1000
	},
	{
	"epoch": 1.2544802867383513,
	"grad_norm": 0.047623638063669205,
	"learning_rate": 0.00015,
	"loss": 4.3734,
	"step": 1050
	},
	{
	"epoch": 1.3142174432497014,
	"grad_norm": 0.03200829401612282,
	"learning_rate": 0.00014486652977412732,
	"loss": 4.3837,
	"step": 1100
	},
	{
	"epoch": 1.3739545997610514,
	"grad_norm": 0.04358180612325668,
	"learning_rate": 0.00013973305954825463,
	"loss": 4.3815,
	"step": 1150
	},
	{
	"epoch": 1.4336917562724014,
	"grad_norm": 0.04975922778248787,
	"learning_rate": 0.0001345995893223819,
	"loss": 4.3746,
	"step": 1200
	},
	{
	"epoch": 1.4934289127837514,
	"grad_norm": 0.03673349320888519,
	"learning_rate": 0.00012946611909650925,
	"loss": 4.3755,
	"step": 1250
	},
	{
	"epoch": 1.5531660692951017,
	"grad_norm": 0.03130173310637474,
	"learning_rate": 0.00012433264887063656,
	"loss": 4.3902,
	"step": 1300
	},
	{
	"epoch": 1.6129032258064515,
	"grad_norm": 0.03993390500545502,
	"learning_rate": 0.00011919917864476385,
	"loss": 4.3852,
	"step": 1350
	},
	{
	"epoch": 1.6726403823178018,
	"grad_norm": 0.04937516897916794,
	"learning_rate": 0.00011406570841889118,
	"loss": 4.3782,
	"step": 1400
	},
	{
	"epoch": 1.7323775388291516,
	"grad_norm": 0.04578279331326485,
	"learning_rate": 0.00010893223819301848,
	"loss": 4.377,
	"step": 1450
	},
	{
	"epoch": 1.7921146953405018,
	"grad_norm": 0.048149123787879944,
	"learning_rate": 0.00010379876796714579,
	"loss": 4.3835,
	"step": 1500
	},
	{
	"epoch": 1.8518518518518519,
	"grad_norm": 0.0500078909099102,
	"learning_rate": 9.86652977412731e-05,
	"loss": 4.3806,
	"step": 1550
	},
	{
	"epoch": 1.911589008363202,
	"grad_norm": 0.040174700319767,
	"learning_rate": 9.353182751540041e-05,
	"loss": 4.3863,
	"step": 1600
	},
	{
	"epoch": 1.971326164874552,
	"grad_norm": 0.033409375697374344,
	"learning_rate": 8.839835728952772e-05,
	"loss": 4.3754,
	"step": 1650
	}
	],
	"logging_steps": 50,
	"max_steps": 2511,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 25,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.0486522326430515e+17,
	"train_batch_size": 6,
	"trial_name": null,
	"trial_params": null
	}