LCK_LLM6 / trainer_state.json

Upload folder using huggingface_hub

80aa7d8 verified 5 months ago

9.23 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.07948599059415778,
	"eval_steps": 500,
	"global_step": 4800,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.001655958137378287,
	"grad_norm": 0.19597935676574707,
	"learning_rate": 0.00019966890756553004,
	"loss": 2.1972,
	"step": 100
	},
	{
	"epoch": 0.003311916274756574,
	"grad_norm": 0.25808241963386536,
	"learning_rate": 0.00019933771577352244,
	"loss": 1.9677,
	"step": 200
	},
	{
	"epoch": 0.0049678744121348616,
	"grad_norm": 0.23811133205890656,
	"learning_rate": 0.00019900652398151486,
	"loss": 1.9341,
	"step": 300
	},
	{
	"epoch": 0.006623832549513148,
	"grad_norm": 0.26714324951171875,
	"learning_rate": 0.00019867533218950728,
	"loss": 1.915,
	"step": 400
	},
	{
	"epoch": 0.008279790686891435,
	"grad_norm": 0.23645658791065216,
	"learning_rate": 0.0001983441403974997,
	"loss": 1.8916,
	"step": 500
	},
	{
	"epoch": 0.009935748824269723,
	"grad_norm": 0.2878512740135193,
	"learning_rate": 0.00019801294860549213,
	"loss": 1.9003,
	"step": 600
	},
	{
	"epoch": 0.01159170696164801,
	"grad_norm": 0.2687942087650299,
	"learning_rate": 0.00019768175681348456,
	"loss": 1.876,
	"step": 700
	},
	{
	"epoch": 0.013247665099026296,
	"grad_norm": 0.2722982168197632,
	"learning_rate": 0.00019735056502147698,
	"loss": 1.9004,
	"step": 800
	},
	{
	"epoch": 0.014903623236404583,
	"grad_norm": 0.25342944264411926,
	"learning_rate": 0.0001970193732294694,
	"loss": 1.8947,
	"step": 900
	},
	{
	"epoch": 0.01655958137378287,
	"grad_norm": 0.2900806963443756,
	"learning_rate": 0.0001966881814374618,
	"loss": 1.8795,
	"step": 1000
	},
	{
	"epoch": 0.018215539511161158,
	"grad_norm": 0.24855603277683258,
	"learning_rate": 0.00019635698964545422,
	"loss": 1.8657,
	"step": 1100
	},
	{
	"epoch": 0.019871497648539446,
	"grad_norm": 0.25272709131240845,
	"learning_rate": 0.00019602579785344665,
	"loss": 1.8687,
	"step": 1200
	},
	{
	"epoch": 0.02152745578591773,
	"grad_norm": 0.31408464908599854,
	"learning_rate": 0.00019569460606143904,
	"loss": 1.8332,
	"step": 1300
	},
	{
	"epoch": 0.02318341392329602,
	"grad_norm": 0.26880863308906555,
	"learning_rate": 0.00019536341426943147,
	"loss": 1.8603,
	"step": 1400
	},
	{
	"epoch": 0.024839372060674308,
	"grad_norm": 0.2371913194656372,
	"learning_rate": 0.0001950322224774239,
	"loss": 1.8273,
	"step": 1500
	},
	{
	"epoch": 0.026495330198052593,
	"grad_norm": 0.2510370910167694,
	"learning_rate": 0.00019470103068541632,
	"loss": 1.8524,
	"step": 1600
	},
	{
	"epoch": 0.02815128833543088,
	"grad_norm": 0.26143962144851685,
	"learning_rate": 0.00019436983889340874,
	"loss": 1.8543,
	"step": 1700
	},
	{
	"epoch": 0.029807246472809166,
	"grad_norm": 0.2438499480485916,
	"learning_rate": 0.00019403864710140116,
	"loss": 1.8411,
	"step": 1800
	},
	{
	"epoch": 0.03146320461018746,
	"grad_norm": 0.2666601836681366,
	"learning_rate": 0.0001937074553093936,
	"loss": 1.8548,
	"step": 1900
	},
	{
	"epoch": 0.03311916274756574,
	"grad_norm": 0.2752065062522888,
	"learning_rate": 0.000193376263517386,
	"loss": 1.8534,
	"step": 2000
	},
	{
	"epoch": 0.03477512088494403,
	"grad_norm": 0.24849963188171387,
	"learning_rate": 0.00019304507172537844,
	"loss": 1.8476,
	"step": 2100
	},
	{
	"epoch": 0.036431079022322316,
	"grad_norm": 0.2809307277202606,
	"learning_rate": 0.00019271387993337083,
	"loss": 1.8505,
	"step": 2200
	},
	{
	"epoch": 0.038087037159700604,
	"grad_norm": 0.23209506273269653,
	"learning_rate": 0.00019238268814136326,
	"loss": 1.864,
	"step": 2300
	},
	{
	"epoch": 0.03974299529707889,
	"grad_norm": 0.25108611583709717,
	"learning_rate": 0.00019205149634935568,
	"loss": 1.8411,
	"step": 2400
	},
	{
	"epoch": 0.041398953434457174,
	"grad_norm": 0.2639986276626587,
	"learning_rate": 0.0001917203045573481,
	"loss": 1.8456,
	"step": 2500
	},
	{
	"epoch": 0.04305491157183546,
	"grad_norm": 0.2927249073982239,
	"learning_rate": 0.00019138911276534053,
	"loss": 1.8361,
	"step": 2600
	},
	{
	"epoch": 0.04471086970921375,
	"grad_norm": 0.2660035192966461,
	"learning_rate": 0.00019105792097333295,
	"loss": 1.8352,
	"step": 2700
	},
	{
	"epoch": 0.04636682784659204,
	"grad_norm": 0.23683211207389832,
	"learning_rate": 0.00019072672918132538,
	"loss": 1.824,
	"step": 2800
	},
	{
	"epoch": 0.04802278598397033,
	"grad_norm": 0.7001804709434509,
	"learning_rate": 0.00019039553738931777,
	"loss": 1.82,
	"step": 2900
	},
	{
	"epoch": 0.049678744121348616,
	"grad_norm": 0.2500315010547638,
	"learning_rate": 0.0001900643455973102,
	"loss": 1.862,
	"step": 3000
	},
	{
	"epoch": 0.0513347022587269,
	"grad_norm": 0.2476750761270523,
	"learning_rate": 0.00018973315380530262,
	"loss": 1.8247,
	"step": 3100
	},
	{
	"epoch": 0.052990660396105185,
	"grad_norm": 0.23064357042312622,
	"learning_rate": 0.00018940196201329502,
	"loss": 1.8685,
	"step": 3200
	},
	{
	"epoch": 0.054646618533483474,
	"grad_norm": 0.2495209276676178,
	"learning_rate": 0.00018907077022128744,
	"loss": 1.8214,
	"step": 3300
	},
	{
	"epoch": 0.05630257667086176,
	"grad_norm": 0.25310614705085754,
	"learning_rate": 0.00018873957842927987,
	"loss": 1.84,
	"step": 3400
	},
	{
	"epoch": 0.05795853480824005,
	"grad_norm": 0.24329665303230286,
	"learning_rate": 0.0001884083866372723,
	"loss": 1.7982,
	"step": 3500
	},
	{
	"epoch": 0.05961449294561833,
	"grad_norm": 0.25951218605041504,
	"learning_rate": 0.00018807719484526471,
	"loss": 1.8078,
	"step": 3600
	},
	{
	"epoch": 0.06127045108299662,
	"grad_norm": 0.24307559430599213,
	"learning_rate": 0.00018774600305325714,
	"loss": 1.8181,
	"step": 3700
	},
	{
	"epoch": 0.06292640922037492,
	"grad_norm": 0.27577558159828186,
	"learning_rate": 0.00018741481126124956,
	"loss": 1.8158,
	"step": 3800
	},
	{
	"epoch": 0.06458236735775319,
	"grad_norm": 0.21584127843379974,
	"learning_rate": 0.00018708361946924199,
	"loss": 1.8362,
	"step": 3900
	},
	{
	"epoch": 0.06623832549513148,
	"grad_norm": 0.2558760344982147,
	"learning_rate": 0.0001867524276772344,
	"loss": 1.8425,
	"step": 4000
	},
	{
	"epoch": 0.06789428363250977,
	"grad_norm": 0.2206682711839676,
	"learning_rate": 0.0001864212358852268,
	"loss": 1.8155,
	"step": 4100
	},
	{
	"epoch": 0.06955024176988805,
	"grad_norm": 0.23946842551231384,
	"learning_rate": 0.00018609004409321923,
	"loss": 1.8349,
	"step": 4200
	},
	{
	"epoch": 0.07120619990726634,
	"grad_norm": 0.22356823086738586,
	"learning_rate": 0.00018575885230121165,
	"loss": 1.8013,
	"step": 4300
	},
	{
	"epoch": 0.07286215804464463,
	"grad_norm": 0.28327444195747375,
	"learning_rate": 0.00018542766050920408,
	"loss": 1.8193,
	"step": 4400
	},
	{
	"epoch": 0.07451811618202292,
	"grad_norm": 0.259748637676239,
	"learning_rate": 0.0001850964687171965,
	"loss": 1.8401,
	"step": 4500
	},
	{
	"epoch": 0.07617407431940121,
	"grad_norm": 0.24509303271770477,
	"learning_rate": 0.00018476527692518893,
	"loss": 1.8313,
	"step": 4600
	},
	{
	"epoch": 0.0778300324567795,
	"grad_norm": 0.2799519896507263,
	"learning_rate": 0.00018443408513318132,
	"loss": 1.8437,
	"step": 4700
	},
	{
	"epoch": 0.07948599059415778,
	"grad_norm": 0.25356101989746094,
	"learning_rate": 0.00018410289334117375,
	"loss": 1.7989,
	"step": 4800
	}
	],
	"logging_steps": 100,
	"max_steps": 60388,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 800,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.318952218329088e+18,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}