mgh6
/

TCS_MLM_All

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

TCS_MLM_All / last-checkpoint /trainer_state.json

mgh6's picture

Training in progress, step 21000, checkpoint

87f470f verified 5 months ago

4.42 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 5.058412621943876,
	"eval_steps": 500,
	"global_step": 21000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.24087679152113695,
	"grad_norm": 0.5082331299781799,
	"learning_rate": 0.000991969806472336,
	"loss": 1.4852,
	"step": 1000
	},
	{
	"epoch": 0.4817535830422739,
	"grad_norm": 0.4949457347393036,
	"learning_rate": 0.0009839396129446719,
	"loss": 1.369,
	"step": 2000
	},
	{
	"epoch": 0.7226303745634108,
	"grad_norm": 0.5630651116371155,
	"learning_rate": 0.0009759094194170079,
	"loss": 1.3189,
	"step": 3000
	},
	{
	"epoch": 0.9635071660845478,
	"grad_norm": 0.5108479261398315,
	"learning_rate": 0.000967879225889344,
	"loss": 1.2817,
	"step": 4000
	},
	{
	"epoch": 1.2043839576056847,
	"grad_norm": 0.47163671255111694,
	"learning_rate": 0.00095984903236168,
	"loss": 1.2511,
	"step": 5000
	},
	{
	"epoch": 1.4452607491268217,
	"grad_norm": 0.531821072101593,
	"learning_rate": 0.0009518188388340159,
	"loss": 1.2309,
	"step": 6000
	},
	{
	"epoch": 1.6861375406479586,
	"grad_norm": 0.4891831874847412,
	"learning_rate": 0.0009437886453063518,
	"loss": 1.2088,
	"step": 7000
	},
	{
	"epoch": 1.9270143321690956,
	"grad_norm": 0.4778994619846344,
	"learning_rate": 0.0009357584517786879,
	"loss": 1.1936,
	"step": 8000
	},
	{
	"epoch": 2.1678911236902323,
	"grad_norm": 0.5436965823173523,
	"learning_rate": 0.0009277282582510239,
	"loss": 1.1792,
	"step": 9000
	},
	{
	"epoch": 2.4087679152113695,
	"grad_norm": 0.45789214968681335,
	"learning_rate": 0.0009196980647233599,
	"loss": 1.1609,
	"step": 10000
	},
	{
	"epoch": 2.649644706732506,
	"grad_norm": 0.49292871356010437,
	"learning_rate": 0.0009116678711956958,
	"loss": 1.1465,
	"step": 11000
	},
	{
	"epoch": 2.8905214982536434,
	"grad_norm": 0.4877796769142151,
	"learning_rate": 0.0009036376776680318,
	"loss": 1.1339,
	"step": 12000
	},
	{
	"epoch": 3.13139828977478,
	"grad_norm": 0.45624956488609314,
	"learning_rate": 0.0008956074841403679,
	"loss": 1.1235,
	"step": 13000
	},
	{
	"epoch": 3.3722750812959172,
	"grad_norm": 0.45705732703208923,
	"learning_rate": 0.0008875772906127038,
	"loss": 1.114,
	"step": 14000
	},
	{
	"epoch": 3.613151872817054,
	"grad_norm": 0.4534723460674286,
	"learning_rate": 0.0008795470970850398,
	"loss": 1.1022,
	"step": 15000
	},
	{
	"epoch": 3.854028664338191,
	"grad_norm": 0.5117238163948059,
	"learning_rate": 0.0008715169035573758,
	"loss": 1.0936,
	"step": 16000
	},
	{
	"epoch": 4.094905455859328,
	"grad_norm": 0.4832773804664612,
	"learning_rate": 0.0008634867100297117,
	"loss": 1.0895,
	"step": 17000
	},
	{
	"epoch": 4.335782247380465,
	"grad_norm": 0.5012445449829102,
	"learning_rate": 0.0008554565165020477,
	"loss": 1.0793,
	"step": 18000
	},
	{
	"epoch": 4.576659038901602,
	"grad_norm": 0.5131984353065491,
	"learning_rate": 0.0008474263229743838,
	"loss": 1.0737,
	"step": 19000
	},
	{
	"epoch": 4.817535830422739,
	"grad_norm": 0.5085521340370178,
	"learning_rate": 0.0008393961294467197,
	"loss": 1.068,
	"step": 20000
	},
	{
	"epoch": 5.058412621943876,
	"grad_norm": 0.4861578345298767,
	"learning_rate": 0.0008313659359190556,
	"loss": 1.0596,
	"step": 21000
	}
	],
	"logging_steps": 1000,
	"max_steps": 124530,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 30,
	"save_steps": 1000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.1007632729269862e+17,
	"train_batch_size": 64,
	"trial_name": null,
	"trial_params": null
	}