Training in progress, step 25, checkpoint

d8acb64 verified 16 days ago

5.71 kB

	{
	"best_metric": 1.6246213912963867,
	"best_model_checkpoint": "miner_id_24/checkpoint-25",
	"epoch": 0.1225114854517611,
	"eval_steps": 25,
	"global_step": 25,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.004900459418070444,
	"grad_norm": 23.471738815307617,
	"learning_rate": 5e-05,
	"loss": 6.7852,
	"step": 1
	},
	{
	"epoch": 0.004900459418070444,
	"eval_loss": 7.382022857666016,
	"eval_runtime": 18.5323,
	"eval_samples_per_second": 74.195,
	"eval_steps_per_second": 9.281,
	"step": 1
	},
	{
	"epoch": 0.009800918836140888,
	"grad_norm": 23.077556610107422,
	"learning_rate": 0.0001,
	"loss": 6.8171,
	"step": 2
	},
	{
	"epoch": 0.014701378254211332,
	"grad_norm": 17.30784034729004,
	"learning_rate": 9.989294616193017e-05,
	"loss": 5.2819,
	"step": 3
	},
	{
	"epoch": 0.019601837672281776,
	"grad_norm": 10.578380584716797,
	"learning_rate": 9.957224306869053e-05,
	"loss": 3.4992,
	"step": 4
	},
	{
	"epoch": 0.02450229709035222,
	"grad_norm": 8.487051010131836,
	"learning_rate": 9.903926402016153e-05,
	"loss": 2.9079,
	"step": 5
	},
	{
	"epoch": 0.029402756508422664,
	"grad_norm": 6.275716781616211,
	"learning_rate": 9.829629131445342e-05,
	"loss": 2.6387,
	"step": 6
	},
	{
	"epoch": 0.03430321592649311,
	"grad_norm": 4.362668037414551,
	"learning_rate": 9.73465064747553e-05,
	"loss": 2.48,
	"step": 7
	},
	{
	"epoch": 0.03920367534456355,
	"grad_norm": 3.9618260860443115,
	"learning_rate": 9.619397662556435e-05,
	"loss": 2.3593,
	"step": 8
	},
	{
	"epoch": 0.044104134762633994,
	"grad_norm": 3.2072575092315674,
	"learning_rate": 9.484363707663442e-05,
	"loss": 2.3954,
	"step": 9
	},
	{
	"epoch": 0.04900459418070444,
	"grad_norm": 3.726250171661377,
	"learning_rate": 9.330127018922194e-05,
	"loss": 2.1785,
	"step": 10
	},
	{
	"epoch": 0.053905053598774885,
	"grad_norm": 2.532313585281372,
	"learning_rate": 9.157348061512727e-05,
	"loss": 2.2436,
	"step": 11
	},
	{
	"epoch": 0.05880551301684533,
	"grad_norm": 2.7389440536499023,
	"learning_rate": 8.966766701456177e-05,
	"loss": 2.1768,
	"step": 12
	},
	{
	"epoch": 0.06370597243491577,
	"grad_norm": 2.995002508163452,
	"learning_rate": 8.759199037394887e-05,
	"loss": 1.9088,
	"step": 13
	},
	{
	"epoch": 0.06860643185298622,
	"grad_norm": 2.0172295570373535,
	"learning_rate": 8.535533905932738e-05,
	"loss": 1.4442,
	"step": 14
	},
	{
	"epoch": 0.07350689127105667,
	"grad_norm": 2.4108283519744873,
	"learning_rate": 8.296729075500344e-05,
	"loss": 1.5092,
	"step": 15
	},
	{
	"epoch": 0.0784073506891271,
	"grad_norm": 2.739496946334839,
	"learning_rate": 8.043807145043604e-05,
	"loss": 1.4547,
	"step": 16
	},
	{
	"epoch": 0.08330781010719755,
	"grad_norm": 3.4123828411102295,
	"learning_rate": 7.777851165098012e-05,
	"loss": 1.4325,
	"step": 17
	},
	{
	"epoch": 0.08820826952526799,
	"grad_norm": 2.2385709285736084,
	"learning_rate": 7.500000000000001e-05,
	"loss": 1.6656,
	"step": 18
	},
	{
	"epoch": 0.09310872894333844,
	"grad_norm": 2.2674455642700195,
	"learning_rate": 7.211443451095007e-05,
	"loss": 1.6064,
	"step": 19
	},
	{
	"epoch": 0.09800918836140889,
	"grad_norm": 2.166579484939575,
	"learning_rate": 6.91341716182545e-05,
	"loss": 1.707,
	"step": 20
	},
	{
	"epoch": 0.10290964777947932,
	"grad_norm": 2.1645350456237793,
	"learning_rate": 6.607197326515808e-05,
	"loss": 1.7495,
	"step": 21
	},
	{
	"epoch": 0.10781010719754977,
	"grad_norm": 2.4724934101104736,
	"learning_rate": 6.294095225512603e-05,
	"loss": 2.0361,
	"step": 22
	},
	{
	"epoch": 0.11271056661562022,
	"grad_norm": 1.933280110359192,
	"learning_rate": 5.9754516100806423e-05,
	"loss": 2.0074,
	"step": 23
	},
	{
	"epoch": 0.11761102603369065,
	"grad_norm": 2.0323431491851807,
	"learning_rate": 5.6526309611002594e-05,
	"loss": 2.0281,
	"step": 24
	},
	{
	"epoch": 0.1225114854517611,
	"grad_norm": 2.1172215938568115,
	"learning_rate": 5.327015646150716e-05,
	"loss": 2.1502,
	"step": 25
	},
	{
	"epoch": 0.1225114854517611,
	"eval_loss": 1.6246213912963867,
	"eval_runtime": 18.4789,
	"eval_samples_per_second": 74.409,
	"eval_steps_per_second": 9.308,
	"step": 25
	}
	],
	"logging_steps": 1,
	"max_steps": 50,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 25,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 1,
	"early_stopping_threshold": 0.0
	},
	"attributes": {
	"early_stopping_patience_counter": 0
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 7.957253052799386e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}