1k_dataset_ft_adaper / checkpoint-30 /trainer_state.json

Initial commit

cff336f verified 3 months ago

No virus

5.9 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.960960960960961,
	"eval_steps": 500,
	"global_step": 30,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03203203203203203,
	"grad_norm": 0.5500465035438538,
	"learning_rate": 0.0002,
	"loss": 1.9326,
	"step": 1
	},
	{
	"epoch": 0.06406406406406406,
	"grad_norm": 0.3067496418952942,
	"learning_rate": 0.0004,
	"loss": 2.0123,
	"step": 2
	},
	{
	"epoch": 0.0960960960960961,
	"grad_norm": 0.24163766205310822,
	"learning_rate": 0.0003988275914308719,
	"loss": 1.6857,
	"step": 3
	},
	{
	"epoch": 0.12812812812812813,
	"grad_norm": 0.4033714830875397,
	"learning_rate": 0.00039532411114201737,
	"loss": 1.568,
	"step": 4
	},
	{
	"epoch": 0.16016016016016016,
	"grad_norm": 0.40937018394470215,
	"learning_rate": 0.00038953063423656053,
	"loss": 1.2066,
	"step": 5
	},
	{
	"epoch": 0.1921921921921922,
	"grad_norm": 0.303056001663208,
	"learning_rate": 0.00038151508393419143,
	"loss": 1.2382,
	"step": 6
	},
	{
	"epoch": 0.22422422422422422,
	"grad_norm": 0.286432147026062,
	"learning_rate": 0.00037137143523351785,
	"loss": 0.9406,
	"step": 7
	},
	{
	"epoch": 0.25625625625625625,
	"grad_norm": 0.354612797498703,
	"learning_rate": 0.00035921861314112876,
	"loss": 1.032,
	"step": 8
	},
	{
	"epoch": 0.2882882882882883,
	"grad_norm": 0.3172045052051544,
	"learning_rate": 0.0003451990983846262,
	"loss": 1.1604,
	"step": 9
	},
	{
	"epoch": 0.3203203203203203,
	"grad_norm": 0.35591357946395874,
	"learning_rate": 0.00032947725695636553,
	"loss": 1.2686,
	"step": 10
	},
	{
	"epoch": 0.35235235235235235,
	"grad_norm": 0.45947980880737305,
	"learning_rate": 0.0003122374130724765,
	"loss": 1.4562,
	"step": 11
	},
	{
	"epoch": 0.3843843843843844,
	"grad_norm": 0.381909042596817,
	"learning_rate": 0.00029368168813995806,
	"loss": 1.3231,
	"step": 12
	},
	{
	"epoch": 0.4164164164164164,
	"grad_norm": 0.47724688053131104,
	"learning_rate": 0.0002740276310679829,
	"loss": 1.276,
	"step": 13
	},
	{
	"epoch": 0.44844844844844844,
	"grad_norm": 0.3836033344268799,
	"learning_rate": 0.00025350566770584423,
	"loss": 1.5523,
	"step": 14
	},
	{
	"epoch": 0.4804804804804805,
	"grad_norm": 0.3899593651294708,
	"learning_rate": 0.000232356399310553,
	"loss": 1.2293,
	"step": 15
	},
	{
	"epoch": 0.5125125125125125,
	"grad_norm": 0.3470306992530823,
	"learning_rate": 0.00021082778171708353,
	"loss": 1.2625,
	"step": 16
	},
	{
	"epoch": 0.5445445445445446,
	"grad_norm": 0.36498191952705383,
	"learning_rate": 0.00018917221828291652,
	"loss": 1.2286,
	"step": 17
	},
	{
	"epoch": 0.5765765765765766,
	"grad_norm": 0.3170139193534851,
	"learning_rate": 0.00016764360068944706,
	"loss": 1.2458,
	"step": 18
	},
	{
	"epoch": 0.6086086086086087,
	"grad_norm": 0.3011997938156128,
	"learning_rate": 0.00014649433229415587,
	"loss": 1.1672,
	"step": 19
	},
	{
	"epoch": 0.6406406406406406,
	"grad_norm": 0.29357728362083435,
	"learning_rate": 0.00012597236893201712,
	"loss": 1.1728,
	"step": 20
	},
	{
	"epoch": 0.6726726726726727,
	"grad_norm": 0.30459967255592346,
	"learning_rate": 0.000106318311860042,
	"loss": 1.1528,
	"step": 21
	},
	{
	"epoch": 0.7047047047047047,
	"grad_norm": 0.23712120950222015,
	"learning_rate": 8.776258692752355e-05,
	"loss": 0.7205,
	"step": 22
	},
	{
	"epoch": 0.7367367367367368,
	"grad_norm": 0.2815784215927124,
	"learning_rate": 7.052274304363449e-05,
	"loss": 0.7541,
	"step": 23
	},
	{
	"epoch": 0.7687687687687688,
	"grad_norm": 0.3061799705028534,
	"learning_rate": 5.4800901615373876e-05,
	"loss": 1.4001,
	"step": 24
	},
	{
	"epoch": 0.8008008008008008,
	"grad_norm": 0.32817238569259644,
	"learning_rate": 4.078138685887125e-05,
	"loss": 1.1528,
	"step": 25
	},
	{
	"epoch": 0.8328328328328328,
	"grad_norm": 0.37422603368759155,
	"learning_rate": 2.862856476648219e-05,
	"loss": 1.3155,
	"step": 26
	},
	{
	"epoch": 0.8648648648648649,
	"grad_norm": 0.3299436867237091,
	"learning_rate": 1.8484916065808623e-05,
	"loss": 1.1114,
	"step": 27
	},
	{
	"epoch": 0.8968968968968969,
	"grad_norm": 0.32912251353263855,
	"learning_rate": 1.0469365763439531e-05,
	"loss": 1.0327,
	"step": 28
	},
	{
	"epoch": 0.928928928928929,
	"grad_norm": 0.30801478028297424,
	"learning_rate": 4.6758888579826685e-06,
	"loss": 0.85,
	"step": 29
	},
	{
	"epoch": 0.960960960960961,
	"grad_norm": 0.3802436292171478,
	"learning_rate": 1.1724085691280805e-06,
	"loss": 1.2445,
	"step": 30
	}
	],
	"logging_steps": 1,
	"max_steps": 31,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 30,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 5.01879310700544e+16,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}