film_chatbot_with_lora / last-checkpoint /trainer_state.json

Training in progress, epoch 14, checkpoint

0ecf281 verified 7 months ago

6.63 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 14.989939637826962,
	"eval_steps": 500,
	"global_step": 3720,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.4024144869215292,
	"grad_norm": 0.1414330005645752,
	"learning_rate": 0.0002,
	"loss": 0.8177,
	"step": 100
	},
	{
	"epoch": 0.8048289738430584,
	"grad_norm": 0.1557430922985077,
	"learning_rate": 0.0002,
	"loss": 0.7179,
	"step": 200
	},
	{
	"epoch": 1.2072434607645874,
	"grad_norm": 0.19957831501960754,
	"learning_rate": 0.0002,
	"loss": 0.676,
	"step": 300
	},
	{
	"epoch": 1.6096579476861166,
	"grad_norm": 0.23390892148017883,
	"learning_rate": 0.0002,
	"loss": 0.6436,
	"step": 400
	},
	{
	"epoch": 2.0120724346076457,
	"grad_norm": 0.22036917507648468,
	"learning_rate": 0.0002,
	"loss": 0.6293,
	"step": 500
	},
	{
	"epoch": 2.414486921529175,
	"grad_norm": 0.23049889504909515,
	"learning_rate": 0.0002,
	"loss": 0.5812,
	"step": 600
	},
	{
	"epoch": 2.816901408450704,
	"grad_norm": 0.2435583770275116,
	"learning_rate": 0.0002,
	"loss": 0.5789,
	"step": 700
	},
	{
	"epoch": 3.219315895372233,
	"grad_norm": 0.2674865126609802,
	"learning_rate": 0.0002,
	"loss": 0.5455,
	"step": 800
	},
	{
	"epoch": 3.6217303822937623,
	"grad_norm": 0.2517271339893341,
	"learning_rate": 0.0002,
	"loss": 0.5281,
	"step": 900
	},
	{
	"epoch": 4.0241448692152915,
	"grad_norm": 0.27737826108932495,
	"learning_rate": 0.0002,
	"loss": 0.5117,
	"step": 1000
	},
	{
	"epoch": 4.426559356136821,
	"grad_norm": 0.2797327935695648,
	"learning_rate": 0.0002,
	"loss": 0.4646,
	"step": 1100
	},
	{
	"epoch": 4.82897384305835,
	"grad_norm": 0.2902151644229889,
	"learning_rate": 0.0002,
	"loss": 0.4705,
	"step": 1200
	},
	{
	"epoch": 5.241448692152917,
	"grad_norm": 0.3148166239261627,
	"learning_rate": 0.0002,
	"loss": 0.4095,
	"step": 1300
	},
	{
	"epoch": 5.6438631790744465,
	"grad_norm": 0.34497711062431335,
	"learning_rate": 0.0002,
	"loss": 0.4081,
	"step": 1400
	},
	{
	"epoch": 6.046277665995976,
	"grad_norm": 0.3478667736053467,
	"learning_rate": 0.0002,
	"loss": 0.403,
	"step": 1500
	},
	{
	"epoch": 6.448692152917505,
	"grad_norm": 0.3573280870914459,
	"learning_rate": 0.0002,
	"loss": 0.3532,
	"step": 1600
	},
	{
	"epoch": 6.851106639839034,
	"grad_norm": 0.423519492149353,
	"learning_rate": 0.0002,
	"loss": 0.3552,
	"step": 1700
	},
	{
	"epoch": 7.253521126760563,
	"grad_norm": 0.39140382409095764,
	"learning_rate": 0.0002,
	"loss": 0.3265,
	"step": 1800
	},
	{
	"epoch": 7.655935613682092,
	"grad_norm": 0.42240509390830994,
	"learning_rate": 0.0002,
	"loss": 0.3078,
	"step": 1900
	},
	{
	"epoch": 8.058350100603622,
	"grad_norm": 0.41395705938339233,
	"learning_rate": 0.0002,
	"loss": 0.301,
	"step": 2000
	},
	{
	"epoch": 8.460764587525151,
	"grad_norm": 0.4344163239002228,
	"learning_rate": 0.0002,
	"loss": 0.2637,
	"step": 2100
	},
	{
	"epoch": 8.86317907444668,
	"grad_norm": 0.4379712641239166,
	"learning_rate": 0.0002,
	"loss": 0.2684,
	"step": 2200
	},
	{
	"epoch": 9.26559356136821,
	"grad_norm": 0.37764859199523926,
	"learning_rate": 0.0002,
	"loss": 0.2411,
	"step": 2300
	},
	{
	"epoch": 9.668008048289739,
	"grad_norm": 0.4754630923271179,
	"learning_rate": 0.0002,
	"loss": 0.2334,
	"step": 2400
	},
	{
	"epoch": 10.080482897384305,
	"grad_norm": 0.39889901876449585,
	"learning_rate": 0.0002,
	"loss": 0.1951,
	"step": 2500
	},
	{
	"epoch": 10.482897384305835,
	"grad_norm": 0.4438158869743347,
	"learning_rate": 0.0002,
	"loss": 0.2041,
	"step": 2600
	},
	{
	"epoch": 10.885311871227364,
	"grad_norm": 0.46953314542770386,
	"learning_rate": 0.0002,
	"loss": 0.2066,
	"step": 2700
	},
	{
	"epoch": 11.287726358148893,
	"grad_norm": 0.4034820795059204,
	"learning_rate": 0.0002,
	"loss": 0.1857,
	"step": 2800
	},
	{
	"epoch": 11.690140845070422,
	"grad_norm": 0.44169268012046814,
	"learning_rate": 0.0002,
	"loss": 0.1859,
	"step": 2900
	},
	{
	"epoch": 12.092555331991951,
	"grad_norm": 0.3979734480381012,
	"learning_rate": 0.0002,
	"loss": 0.1799,
	"step": 3000
	},
	{
	"epoch": 12.49496981891348,
	"grad_norm": 0.38474753499031067,
	"learning_rate": 0.0002,
	"loss": 0.165,
	"step": 3100
	},
	{
	"epoch": 12.89738430583501,
	"grad_norm": 0.40796715021133423,
	"learning_rate": 0.0002,
	"loss": 0.1706,
	"step": 3200
	},
	{
	"epoch": 13.299798792756539,
	"grad_norm": 0.42144036293029785,
	"learning_rate": 0.0002,
	"loss": 0.1563,
	"step": 3300
	},
	{
	"epoch": 13.702213279678068,
	"grad_norm": 0.40393030643463135,
	"learning_rate": 0.0002,
	"loss": 0.1555,
	"step": 3400
	},
	{
	"epoch": 14.104627766599597,
	"grad_norm": 0.3850514888763428,
	"learning_rate": 0.0002,
	"loss": 0.1537,
	"step": 3500
	},
	{
	"epoch": 14.507042253521126,
	"grad_norm": 0.3872847855091095,
	"learning_rate": 0.0002,
	"loss": 0.1427,
	"step": 3600
	},
	{
	"epoch": 14.909456740442655,
	"grad_norm": 0.3384864032268524,
	"learning_rate": 0.0002,
	"loss": 0.1471,
	"step": 3700
	}
	],
	"logging_steps": 100,
	"max_steps": 3720,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 15,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.606624189428531e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}