opsgenius-gptlarge_8bit / trainer_state.json

initial fine-tuning commit

6a23a66 verified 3 months ago

6.42 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 90.0,
	"eval_steps": 500,
	"global_step": 360,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 2.5,
	"grad_norm": 0.5093985795974731,
	"learning_rate": 0.0009980973490458728,
	"loss": 2.5467,
	"step": 10
	},
	{
	"epoch": 5.0,
	"grad_norm": 0.27049773931503296,
	"learning_rate": 0.000992403876506104,
	"loss": 1.618,
	"step": 20
	},
	{
	"epoch": 7.5,
	"grad_norm": 0.22319507598876953,
	"learning_rate": 0.0009829629131445341,
	"loss": 1.3933,
	"step": 30
	},
	{
	"epoch": 10.0,
	"grad_norm": 0.2564426362514496,
	"learning_rate": 0.0009698463103929542,
	"loss": 1.2537,
	"step": 40
	},
	{
	"epoch": 12.5,
	"grad_norm": 0.29759547114372253,
	"learning_rate": 0.0009531538935183251,
	"loss": 1.1368,
	"step": 50
	},
	{
	"epoch": 15.0,
	"grad_norm": 0.39310407638549805,
	"learning_rate": 0.0009330127018922195,
	"loss": 1.0137,
	"step": 60
	},
	{
	"epoch": 17.5,
	"grad_norm": 0.4282013475894928,
	"learning_rate": 0.0009095760221444959,
	"loss": 0.9095,
	"step": 70
	},
	{
	"epoch": 20.0,
	"grad_norm": 0.5433480143547058,
	"learning_rate": 0.000883022221559489,
	"loss": 0.8259,
	"step": 80
	},
	{
	"epoch": 22.5,
	"grad_norm": 0.5130556225776672,
	"learning_rate": 0.0008535533905932737,
	"loss": 0.7432,
	"step": 90
	},
	{
	"epoch": 25.0,
	"grad_norm": 0.5033867359161377,
	"learning_rate": 0.0008213938048432696,
	"loss": 0.6756,
	"step": 100
	},
	{
	"epoch": 27.5,
	"grad_norm": 0.5322891473770142,
	"learning_rate": 0.0007867882181755231,
	"loss": 0.6123,
	"step": 110
	},
	{
	"epoch": 30.0,
	"grad_norm": 0.5658751726150513,
	"learning_rate": 0.00075,
	"loss": 0.567,
	"step": 120
	},
	{
	"epoch": 32.5,
	"grad_norm": 0.5688820481300354,
	"learning_rate": 0.0007113091308703497,
	"loss": 0.5171,
	"step": 130
	},
	{
	"epoch": 35.0,
	"grad_norm": 0.48720425367355347,
	"learning_rate": 0.0006710100716628344,
	"loss": 0.4825,
	"step": 140
	},
	{
	"epoch": 37.5,
	"grad_norm": 0.5325213074684143,
	"learning_rate": 0.0006294095225512603,
	"loss": 0.4498,
	"step": 150
	},
	{
	"epoch": 40.0,
	"grad_norm": 0.527807891368866,
	"learning_rate": 0.0005868240888334653,
	"loss": 0.4184,
	"step": 160
	},
	{
	"epoch": 42.5,
	"grad_norm": 0.4882418215274811,
	"learning_rate": 0.0005435778713738292,
	"loss": 0.3946,
	"step": 170
	},
	{
	"epoch": 45.0,
	"grad_norm": 0.49641069769859314,
	"learning_rate": 0.0005,
	"loss": 0.3706,
	"step": 180
	},
	{
	"epoch": 47.5,
	"grad_norm": 0.5216576457023621,
	"learning_rate": 0.00045642212862617086,
	"loss": 0.3529,
	"step": 190
	},
	{
	"epoch": 50.0,
	"grad_norm": 0.5289739966392517,
	"learning_rate": 0.00041317591116653486,
	"loss": 0.3334,
	"step": 200
	},
	{
	"epoch": 52.5,
	"grad_norm": 0.49065396189689636,
	"learning_rate": 0.0003705904774487396,
	"loss": 0.3166,
	"step": 210
	},
	{
	"epoch": 55.0,
	"grad_norm": 0.4922383725643158,
	"learning_rate": 0.0003289899283371657,
	"loss": 0.3086,
	"step": 220
	},
	{
	"epoch": 57.5,
	"grad_norm": 0.42886731028556824,
	"learning_rate": 0.0002886908691296504,
	"loss": 0.2894,
	"step": 230
	},
	{
	"epoch": 60.0,
	"grad_norm": 0.42780551314353943,
	"learning_rate": 0.0002500000000000001,
	"loss": 0.286,
	"step": 240
	},
	{
	"epoch": 62.5,
	"grad_norm": 0.4010085463523865,
	"learning_rate": 0.00021321178182447708,
	"loss": 0.2775,
	"step": 250
	},
	{
	"epoch": 65.0,
	"grad_norm": 0.4402850866317749,
	"learning_rate": 0.0001786061951567303,
	"loss": 0.2714,
	"step": 260
	},
	{
	"epoch": 67.5,
	"grad_norm": 0.4067270755767822,
	"learning_rate": 0.00014644660940672628,
	"loss": 0.2664,
	"step": 270
	},
	{
	"epoch": 70.0,
	"grad_norm": 0.43572983145713806,
	"learning_rate": 0.00011697777844051105,
	"loss": 0.2582,
	"step": 280
	},
	{
	"epoch": 72.5,
	"grad_norm": 0.3881024420261383,
	"learning_rate": 9.042397785550405e-05,
	"loss": 0.2549,
	"step": 290
	},
	{
	"epoch": 75.0,
	"grad_norm": 0.3768531382083893,
	"learning_rate": 6.698729810778065e-05,
	"loss": 0.251,
	"step": 300
	},
	{
	"epoch": 77.5,
	"grad_norm": 0.3921433389186859,
	"learning_rate": 4.684610648167503e-05,
	"loss": 0.2509,
	"step": 310
	},
	{
	"epoch": 80.0,
	"grad_norm": 0.38551065325737,
	"learning_rate": 3.0153689607045842e-05,
	"loss": 0.2466,
	"step": 320
	},
	{
	"epoch": 82.5,
	"grad_norm": 0.3801429867744446,
	"learning_rate": 1.70370868554659e-05,
	"loss": 0.2462,
	"step": 330
	},
	{
	"epoch": 85.0,
	"grad_norm": 0.39348262548446655,
	"learning_rate": 7.59612349389599e-06,
	"loss": 0.2468,
	"step": 340
	},
	{
	"epoch": 87.5,
	"grad_norm": 0.3822017014026642,
	"learning_rate": 1.9026509541272275e-06,
	"loss": 0.2477,
	"step": 350
	},
	{
	"epoch": 90.0,
	"grad_norm": 0.3794040381908417,
	"learning_rate": 0.0,
	"loss": 0.2449,
	"step": 360
	}
	],
	"logging_steps": 10,
	"max_steps": 360,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 90,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.43272328822784e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}