Upload folder using huggingface_hub

2a91060 verified 20 days ago

6.92 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500,
	"global_step": 387,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.07751937984496124,
	"grad_norm": 2.9215729246190634,
	"learning_rate": 5e-06,
	"loss": 0.7501,
	"step": 10
	},
	{
	"epoch": 0.15503875968992248,
	"grad_norm": 1.7390484139778606,
	"learning_rate": 5e-06,
	"loss": 0.6417,
	"step": 20
	},
	{
	"epoch": 0.23255813953488372,
	"grad_norm": 2.6303600299328775,
	"learning_rate": 5e-06,
	"loss": 0.6086,
	"step": 30
	},
	{
	"epoch": 0.31007751937984496,
	"grad_norm": 1.7365672816191515,
	"learning_rate": 5e-06,
	"loss": 0.598,
	"step": 40
	},
	{
	"epoch": 0.3875968992248062,
	"grad_norm": 2.5822057210689393,
	"learning_rate": 5e-06,
	"loss": 0.5869,
	"step": 50
	},
	{
	"epoch": 0.46511627906976744,
	"grad_norm": 1.746996607003936,
	"learning_rate": 5e-06,
	"loss": 0.5776,
	"step": 60
	},
	{
	"epoch": 0.5426356589147286,
	"grad_norm": 1.8948006480184871,
	"learning_rate": 5e-06,
	"loss": 0.5742,
	"step": 70
	},
	{
	"epoch": 0.6201550387596899,
	"grad_norm": 2.4400520626498414,
	"learning_rate": 5e-06,
	"loss": 0.5726,
	"step": 80
	},
	{
	"epoch": 0.6976744186046512,
	"grad_norm": 2.067112415972005,
	"learning_rate": 5e-06,
	"loss": 0.5648,
	"step": 90
	},
	{
	"epoch": 0.7751937984496124,
	"grad_norm": 1.78188199676091,
	"learning_rate": 5e-06,
	"loss": 0.5661,
	"step": 100
	},
	{
	"epoch": 0.8527131782945736,
	"grad_norm": 1.8707814480818752,
	"learning_rate": 5e-06,
	"loss": 0.5637,
	"step": 110
	},
	{
	"epoch": 0.9302325581395349,
	"grad_norm": 1.1858683276722362,
	"learning_rate": 5e-06,
	"loss": 0.5603,
	"step": 120
	},
	{
	"epoch": 1.0077519379844961,
	"grad_norm": 2.4440942695562193,
	"learning_rate": 5e-06,
	"loss": 0.5535,
	"step": 130
	},
	{
	"epoch": 1.0852713178294573,
	"grad_norm": 3.010730904678519,
	"learning_rate": 5e-06,
	"loss": 0.5174,
	"step": 140
	},
	{
	"epoch": 1.1627906976744187,
	"grad_norm": 2.260676020857914,
	"learning_rate": 5e-06,
	"loss": 0.51,
	"step": 150
	},
	{
	"epoch": 1.2403100775193798,
	"grad_norm": 2.8965772384726995,
	"learning_rate": 5e-06,
	"loss": 0.5111,
	"step": 160
	},
	{
	"epoch": 1.3178294573643412,
	"grad_norm": 2.561493912479322,
	"learning_rate": 5e-06,
	"loss": 0.5109,
	"step": 170
	},
	{
	"epoch": 1.3953488372093024,
	"grad_norm": 2.381325960362653,
	"learning_rate": 5e-06,
	"loss": 0.5113,
	"step": 180
	},
	{
	"epoch": 1.4728682170542635,
	"grad_norm": 2.597163347347591,
	"learning_rate": 5e-06,
	"loss": 0.5098,
	"step": 190
	},
	{
	"epoch": 1.550387596899225,
	"grad_norm": 2.3386390647615833,
	"learning_rate": 5e-06,
	"loss": 0.5105,
	"step": 200
	},
	{
	"epoch": 1.627906976744186,
	"grad_norm": 2.423259280958861,
	"learning_rate": 5e-06,
	"loss": 0.5073,
	"step": 210
	},
	{
	"epoch": 1.7054263565891472,
	"grad_norm": 1.8305441285397837,
	"learning_rate": 5e-06,
	"loss": 0.5113,
	"step": 220
	},
	{
	"epoch": 1.7829457364341086,
	"grad_norm": 1.972210760081399,
	"learning_rate": 5e-06,
	"loss": 0.5104,
	"step": 230
	},
	{
	"epoch": 1.8604651162790697,
	"grad_norm": 2.1286521963156813,
	"learning_rate": 5e-06,
	"loss": 0.5119,
	"step": 240
	},
	{
	"epoch": 1.937984496124031,
	"grad_norm": 2.384913511133737,
	"learning_rate": 5e-06,
	"loss": 0.5111,
	"step": 250
	},
	{
	"epoch": 2.0155038759689923,
	"grad_norm": 2.287778661831066,
	"learning_rate": 5e-06,
	"loss": 0.5034,
	"step": 260
	},
	{
	"epoch": 2.0930232558139537,
	"grad_norm": 1.5497189232645128,
	"learning_rate": 5e-06,
	"loss": 0.4659,
	"step": 270
	},
	{
	"epoch": 2.1705426356589146,
	"grad_norm": 1.1700554228567934,
	"learning_rate": 5e-06,
	"loss": 0.4658,
	"step": 280
	},
	{
	"epoch": 2.248062015503876,
	"grad_norm": 1.3050664193052923,
	"learning_rate": 5e-06,
	"loss": 0.4645,
	"step": 290
	},
	{
	"epoch": 2.3255813953488373,
	"grad_norm": 1.4446705398465065,
	"learning_rate": 5e-06,
	"loss": 0.4667,
	"step": 300
	},
	{
	"epoch": 2.4031007751937983,
	"grad_norm": 1.4968160462249973,
	"learning_rate": 5e-06,
	"loss": 0.47,
	"step": 310
	},
	{
	"epoch": 2.4806201550387597,
	"grad_norm": 1.4716364495623662,
	"learning_rate": 5e-06,
	"loss": 0.4705,
	"step": 320
	},
	{
	"epoch": 2.558139534883721,
	"grad_norm": 1.172790704057467,
	"learning_rate": 5e-06,
	"loss": 0.4707,
	"step": 330
	},
	{
	"epoch": 2.6356589147286824,
	"grad_norm": 1.7971924928025738,
	"learning_rate": 5e-06,
	"loss": 0.4725,
	"step": 340
	},
	{
	"epoch": 2.7131782945736433,
	"grad_norm": 1.9994960190730922,
	"learning_rate": 5e-06,
	"loss": 0.4722,
	"step": 350
	},
	{
	"epoch": 2.7906976744186047,
	"grad_norm": 1.1272436920505606,
	"learning_rate": 5e-06,
	"loss": 0.4711,
	"step": 360
	},
	{
	"epoch": 2.8682170542635657,
	"grad_norm": 1.297666789751404,
	"learning_rate": 5e-06,
	"loss": 0.4743,
	"step": 370
	},
	{
	"epoch": 2.945736434108527,
	"grad_norm": 1.3226222258312366,
	"learning_rate": 5e-06,
	"loss": 0.4716,
	"step": 380
	},
	{
	"epoch": 3.0,
	"step": 387,
	"total_flos": 2589607581450240.0,
	"train_loss": 0.5259489261518769,
	"train_runtime": 12288.9627,
	"train_samples_per_second": 64.441,
	"train_steps_per_second": 0.031
	}
	],
	"logging_steps": 10,
	"max_steps": 387,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2589607581450240.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}