task-13-google-gemma-2b / checkpoint-500 /trainer_state.json

Upload folder using huggingface_hub

06fa034 verified 5 months ago

5.06 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 9.75609756097561,
	"eval_steps": 500,
	"global_step": 500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.3902439024390244,
	"grad_norm": 0.9049399495124817,
	"learning_rate": 4.347826086956522e-05,
	"loss": 2.2562,
	"step": 20
	},
	{
	"epoch": 0.7804878048780488,
	"grad_norm": 0.8101117610931396,
	"learning_rate": 4.88544474393531e-05,
	"loss": 2.1366,
	"step": 40
	},
	{
	"epoch": 1.170731707317073,
	"grad_norm": 0.6929331421852112,
	"learning_rate": 4.750673854447439e-05,
	"loss": 1.9538,
	"step": 60
	},
	{
	"epoch": 1.5609756097560976,
	"grad_norm": 0.754576563835144,
	"learning_rate": 4.615902964959569e-05,
	"loss": 1.9039,
	"step": 80
	},
	{
	"epoch": 1.951219512195122,
	"grad_norm": 0.8291248679161072,
	"learning_rate": 4.4811320754716985e-05,
	"loss": 1.8062,
	"step": 100
	},
	{
	"epoch": 2.341463414634146,
	"grad_norm": 0.9486225247383118,
	"learning_rate": 4.3463611859838275e-05,
	"loss": 1.7765,
	"step": 120
	},
	{
	"epoch": 2.7317073170731705,
	"grad_norm": 1.1279809474945068,
	"learning_rate": 4.211590296495957e-05,
	"loss": 1.7288,
	"step": 140
	},
	{
	"epoch": 3.1219512195121952,
	"grad_norm": 1.1358485221862793,
	"learning_rate": 4.076819407008086e-05,
	"loss": 1.7136,
	"step": 160
	},
	{
	"epoch": 3.5121951219512195,
	"grad_norm": 1.2939114570617676,
	"learning_rate": 3.942048517520216e-05,
	"loss": 1.698,
	"step": 180
	},
	{
	"epoch": 3.902439024390244,
	"grad_norm": 1.3795692920684814,
	"learning_rate": 3.807277628032345e-05,
	"loss": 1.668,
	"step": 200
	},
	{
	"epoch": 4.2926829268292686,
	"grad_norm": 1.2294673919677734,
	"learning_rate": 3.672506738544474e-05,
	"loss": 1.6258,
	"step": 220
	},
	{
	"epoch": 4.682926829268292,
	"grad_norm": 1.4048880338668823,
	"learning_rate": 3.537735849056604e-05,
	"loss": 1.6133,
	"step": 240
	},
	{
	"epoch": 5.073170731707317,
	"grad_norm": 1.1704691648483276,
	"learning_rate": 3.4029649595687336e-05,
	"loss": 1.6349,
	"step": 260
	},
	{
	"epoch": 5.463414634146342,
	"grad_norm": 1.4525257349014282,
	"learning_rate": 3.2681940700808625e-05,
	"loss": 1.5788,
	"step": 280
	},
	{
	"epoch": 5.853658536585366,
	"grad_norm": 1.5394439697265625,
	"learning_rate": 3.133423180592992e-05,
	"loss": 1.5827,
	"step": 300
	},
	{
	"epoch": 6.2439024390243905,
	"grad_norm": 1.5792720317840576,
	"learning_rate": 2.998652291105121e-05,
	"loss": 1.5451,
	"step": 320
	},
	{
	"epoch": 6.634146341463414,
	"grad_norm": 1.6444499492645264,
	"learning_rate": 2.863881401617251e-05,
	"loss": 1.5757,
	"step": 340
	},
	{
	"epoch": 7.024390243902439,
	"grad_norm": 1.5749609470367432,
	"learning_rate": 2.7291105121293804e-05,
	"loss": 1.5414,
	"step": 360
	},
	{
	"epoch": 7.414634146341464,
	"grad_norm": 1.6040682792663574,
	"learning_rate": 2.5943396226415094e-05,
	"loss": 1.5311,
	"step": 380
	},
	{
	"epoch": 7.804878048780488,
	"grad_norm": 1.7397934198379517,
	"learning_rate": 2.459568733153639e-05,
	"loss": 1.534,
	"step": 400
	},
	{
	"epoch": 8.195121951219512,
	"grad_norm": 1.9339927434921265,
	"learning_rate": 2.3247978436657683e-05,
	"loss": 1.5277,
	"step": 420
	},
	{
	"epoch": 8.585365853658537,
	"grad_norm": 1.8686648607254028,
	"learning_rate": 2.1900269541778976e-05,
	"loss": 1.496,
	"step": 440
	},
	{
	"epoch": 8.975609756097562,
	"grad_norm": 1.8943285942077637,
	"learning_rate": 2.055256064690027e-05,
	"loss": 1.4955,
	"step": 460
	},
	{
	"epoch": 9.365853658536585,
	"grad_norm": 2.715195894241333,
	"learning_rate": 1.9204851752021562e-05,
	"loss": 1.4837,
	"step": 480
	},
	{
	"epoch": 9.75609756097561,
	"grad_norm": 2.464538097381592,
	"learning_rate": 1.785714285714286e-05,
	"loss": 1.5082,
	"step": 500
	}
	],
	"logging_steps": 20,
	"max_steps": 765,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 15,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.137043399013171e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}