gpt2_2 / trainer_state.json

Upload folder using huggingface_hub

4d6ec3c verified 3 months ago

No virus

6.79 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.932811944543192,
	"eval_steps": 500,
	"global_step": 16500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.08887308922858159,
	"grad_norm": 20.37746810913086,
	"learning_rate": 4.851878184619031e-05,
	"loss": 0.6617,
	"step": 500
	},
	{
	"epoch": 0.17774617845716317,
	"grad_norm": 8.832418441772461,
	"learning_rate": 4.703756369238061e-05,
	"loss": 0.5007,
	"step": 1000
	},
	{
	"epoch": 0.26661926768574473,
	"grad_norm": 112.71693420410156,
	"learning_rate": 4.555634553857092e-05,
	"loss": 0.4867,
	"step": 1500
	},
	{
	"epoch": 0.35549235691432635,
	"grad_norm": 0.7494603991508484,
	"learning_rate": 4.407512738476123e-05,
	"loss": 0.5113,
	"step": 2000
	},
	{
	"epoch": 0.4443654461429079,
	"grad_norm": 0.5181692838668823,
	"learning_rate": 4.259390923095153e-05,
	"loss": 0.557,
	"step": 2500
	},
	{
	"epoch": 0.5332385353714895,
	"grad_norm": 0.8127353191375732,
	"learning_rate": 4.1112691077141844e-05,
	"loss": 0.4746,
	"step": 3000
	},
	{
	"epoch": 0.6221116246000711,
	"grad_norm": 52.21469497680664,
	"learning_rate": 3.9631472923332156e-05,
	"loss": 0.4848,
	"step": 3500
	},
	{
	"epoch": 0.7109847138286527,
	"grad_norm": 3.922440767288208,
	"learning_rate": 3.815025476952246e-05,
	"loss": 0.4689,
	"step": 4000
	},
	{
	"epoch": 0.7998578030572343,
	"grad_norm": 39.936492919921875,
	"learning_rate": 3.6669036615712765e-05,
	"loss": 0.4724,
	"step": 4500
	},
	{
	"epoch": 0.8887308922858158,
	"grad_norm": 3.033518075942993,
	"learning_rate": 3.518781846190307e-05,
	"loss": 0.4384,
	"step": 5000
	},
	{
	"epoch": 0.9776039815143974,
	"grad_norm": 8.464579582214355,
	"learning_rate": 3.370660030809338e-05,
	"loss": 0.4534,
	"step": 5500
	},
	{
	"epoch": 1.066477070742979,
	"grad_norm": 0.24204222857952118,
	"learning_rate": 3.2225382154283686e-05,
	"loss": 0.3978,
	"step": 6000
	},
	{
	"epoch": 1.1553501599715605,
	"grad_norm": 0.057052597403526306,
	"learning_rate": 3.074416400047399e-05,
	"loss": 0.3852,
	"step": 6500
	},
	{
	"epoch": 1.2442232492001422,
	"grad_norm": 0.15880955755710602,
	"learning_rate": 2.92629458466643e-05,
	"loss": 0.3915,
	"step": 7000
	},
	{
	"epoch": 1.3330963384287238,
	"grad_norm": 0.09902948141098022,
	"learning_rate": 2.7781727692854603e-05,
	"loss": 0.3785,
	"step": 7500
	},
	{
	"epoch": 1.4219694276573054,
	"grad_norm": 0.15144290030002594,
	"learning_rate": 2.6300509539044908e-05,
	"loss": 0.3587,
	"step": 8000
	},
	{
	"epoch": 1.510842516885887,
	"grad_norm": 0.22604715824127197,
	"learning_rate": 2.481929138523522e-05,
	"loss": 0.3811,
	"step": 8500
	},
	{
	"epoch": 1.5997156061144686,
	"grad_norm": 14.215106964111328,
	"learning_rate": 2.3338073231425524e-05,
	"loss": 0.3591,
	"step": 9000
	},
	{
	"epoch": 1.68858869534305,
	"grad_norm": 0.2168210744857788,
	"learning_rate": 2.1856855077615832e-05,
	"loss": 0.37,
	"step": 9500
	},
	{
	"epoch": 1.7774617845716318,
	"grad_norm": 0.15380479395389557,
	"learning_rate": 2.0375636923806137e-05,
	"loss": 0.3492,
	"step": 10000
	},
	{
	"epoch": 1.8663348738002132,
	"grad_norm": 0.09666112810373306,
	"learning_rate": 1.889441876999645e-05,
	"loss": 0.3922,
	"step": 10500
	},
	{
	"epoch": 1.9552079630287948,
	"grad_norm": 0.7467890381813049,
	"learning_rate": 1.7413200616186753e-05,
	"loss": 0.3786,
	"step": 11000
	},
	{
	"epoch": 2.0440810522573765,
	"grad_norm": 1.9722317457199097,
	"learning_rate": 1.593198246237706e-05,
	"loss": 0.3258,
	"step": 11500
	},
	{
	"epoch": 2.132954141485958,
	"grad_norm": 0.07470700144767761,
	"learning_rate": 1.4450764308567366e-05,
	"loss": 0.2807,
	"step": 12000
	},
	{
	"epoch": 2.2218272307145397,
	"grad_norm": 0.11256339401006699,
	"learning_rate": 1.2969546154757672e-05,
	"loss": 0.2751,
	"step": 12500
	},
	{
	"epoch": 2.310700319943121,
	"grad_norm": 0.010098825208842754,
	"learning_rate": 1.148832800094798e-05,
	"loss": 0.2385,
	"step": 13000
	},
	{
	"epoch": 2.399573409171703,
	"grad_norm": 0.15252766013145447,
	"learning_rate": 1.0007109847138287e-05,
	"loss": 0.2379,
	"step": 13500
	},
	{
	"epoch": 2.4884464984002843,
	"grad_norm": 17.05082130432129,
	"learning_rate": 8.525891693328595e-06,
	"loss": 0.2706,
	"step": 14000
	},
	{
	"epoch": 2.5773195876288657,
	"grad_norm": 0.11130794882774353,
	"learning_rate": 7.0446735395189e-06,
	"loss": 0.2591,
	"step": 14500
	},
	{
	"epoch": 2.6661926768574475,
	"grad_norm": 74.68379974365234,
	"learning_rate": 5.5634553857092076e-06,
	"loss": 0.3314,
	"step": 15000
	},
	{
	"epoch": 2.7550657660860294,
	"grad_norm": 15.91612434387207,
	"learning_rate": 4.082237231899514e-06,
	"loss": 0.2514,
	"step": 15500
	},
	{
	"epoch": 2.8439388553146108,
	"grad_norm": 39.0718994140625,
	"learning_rate": 2.601019078089821e-06,
	"loss": 0.2358,
	"step": 16000
	},
	{
	"epoch": 2.932811944543192,
	"grad_norm": 0.09988761693239212,
	"learning_rate": 1.119800924280128e-06,
	"loss": 0.2359,
	"step": 16500
	}
	],
	"logging_steps": 500,
	"max_steps": 16878,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 4794112531464192.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}