MiniCPM-V-2_6_LoRA_Adapter / trainer_state.json
lei-HuggingFace's picture
Upload folder using huggingface_hub
9938292 verified
raw
history blame
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 34.285714285714285,
"eval_steps": 10,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0,
"loss": 1.3796,
"step": 1
},
{
"epoch": 1.1428571428571428,
"grad_norm": 2.2001447677612305,
"learning_rate": 0.0,
"loss": 0.2105,
"step": 2
},
{
"epoch": 2.0,
"grad_norm": 2.2001447677612305,
"learning_rate": 0.0,
"loss": 0.962,
"step": 3
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.8828794360160828,
"learning_rate": 0.0002,
"loss": 0.2299,
"step": 4
},
{
"epoch": 3.0,
"grad_norm": 0.8828794360160828,
"learning_rate": 0.0002,
"loss": 0.8788,
"step": 5
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.7549806237220764,
"learning_rate": 0.0002,
"loss": 0.5161,
"step": 6
},
{
"epoch": 4.0,
"grad_norm": 0.7549806237220764,
"learning_rate": 0.0002,
"loss": 0.5018,
"step": 7
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.5643095970153809,
"learning_rate": 0.0002,
"loss": 0.5318,
"step": 8
},
{
"epoch": 5.0,
"grad_norm": 0.5643095970153809,
"learning_rate": 0.0002,
"loss": 0.3574,
"step": 9
},
{
"epoch": 5.714285714285714,
"grad_norm": 0.6527697443962097,
"learning_rate": 0.0002,
"loss": 0.6326,
"step": 10
},
{
"epoch": 5.714285714285714,
"eval_loss": 0.7977780103683472,
"eval_runtime": 1.1094,
"eval_samples_per_second": 6.31,
"eval_steps_per_second": 6.31,
"step": 10
},
{
"epoch": 6.0,
"grad_norm": 0.6527697443962097,
"learning_rate": 0.0002,
"loss": 0.1602,
"step": 11
},
{
"epoch": 6.857142857142857,
"grad_norm": 0.6531093716621399,
"learning_rate": 0.0002,
"loss": 0.5492,
"step": 12
},
{
"epoch": 7.0,
"grad_norm": 0.6531093716621399,
"learning_rate": 0.0002,
"loss": 0.1361,
"step": 13
},
{
"epoch": 8.0,
"grad_norm": 0.5385488867759705,
"learning_rate": 0.0002,
"loss": 0.5893,
"step": 14
},
{
"epoch": 9.0,
"grad_norm": 0.5385488867759705,
"learning_rate": 0.0002,
"loss": 0.475,
"step": 15
},
{
"epoch": 9.142857142857142,
"grad_norm": 0.5094980001449585,
"learning_rate": 0.0002,
"loss": 0.0501,
"step": 16
},
{
"epoch": 10.0,
"grad_norm": 0.5094980001449585,
"learning_rate": 0.0002,
"loss": 0.3318,
"step": 17
},
{
"epoch": 10.285714285714286,
"grad_norm": 0.5739837884902954,
"learning_rate": 0.0002,
"loss": 0.154,
"step": 18
},
{
"epoch": 11.0,
"grad_norm": 0.5739837884902954,
"learning_rate": 0.0002,
"loss": 0.1339,
"step": 19
},
{
"epoch": 11.428571428571429,
"grad_norm": 0.5782333016395569,
"learning_rate": 0.0002,
"loss": 0.1008,
"step": 20
},
{
"epoch": 11.428571428571429,
"eval_loss": 0.1741267442703247,
"eval_runtime": 1.0921,
"eval_samples_per_second": 6.41,
"eval_steps_per_second": 6.41,
"step": 20
},
{
"epoch": 12.0,
"grad_norm": 0.5782333016395569,
"learning_rate": 0.0002,
"loss": 0.1043,
"step": 21
},
{
"epoch": 12.571428571428571,
"grad_norm": 0.558684229850769,
"learning_rate": 0.0002,
"loss": 0.1308,
"step": 22
},
{
"epoch": 13.0,
"grad_norm": 0.558684229850769,
"learning_rate": 0.0002,
"loss": 0.0082,
"step": 23
},
{
"epoch": 13.714285714285714,
"grad_norm": 0.3258131444454193,
"learning_rate": 0.0002,
"loss": 0.0663,
"step": 24
},
{
"epoch": 14.0,
"grad_norm": 0.3258131444454193,
"learning_rate": 0.0002,
"loss": 0.0114,
"step": 25
},
{
"epoch": 14.857142857142858,
"grad_norm": 0.3965975046157837,
"learning_rate": 0.0002,
"loss": 0.0503,
"step": 26
},
{
"epoch": 15.0,
"grad_norm": 0.3965975046157837,
"learning_rate": 0.0002,
"loss": 0.0026,
"step": 27
},
{
"epoch": 16.0,
"grad_norm": 0.6329053044319153,
"learning_rate": 0.0002,
"loss": 0.0271,
"step": 28
},
{
"epoch": 17.0,
"grad_norm": 0.6329053044319153,
"learning_rate": 0.0002,
"loss": 0.0145,
"step": 29
},
{
"epoch": 17.142857142857142,
"grad_norm": 0.1660483181476593,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 30
},
{
"epoch": 17.142857142857142,
"eval_loss": 0.008966931141912937,
"eval_runtime": 1.1273,
"eval_samples_per_second": 6.21,
"eval_steps_per_second": 6.21,
"step": 30
},
{
"epoch": 18.0,
"grad_norm": 0.1660483181476593,
"learning_rate": 0.0002,
"loss": 0.0076,
"step": 31
},
{
"epoch": 18.285714285714285,
"grad_norm": 0.16035296022891998,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 32
},
{
"epoch": 19.0,
"grad_norm": 0.16035296022891998,
"learning_rate": 0.0002,
"loss": 0.0027,
"step": 33
},
{
"epoch": 19.428571428571427,
"grad_norm": 0.22989660501480103,
"learning_rate": 0.0002,
"loss": 0.0022,
"step": 34
},
{
"epoch": 20.0,
"grad_norm": 0.22989660501480103,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 35
},
{
"epoch": 20.571428571428573,
"grad_norm": 0.025392575189471245,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 36
},
{
"epoch": 21.0,
"grad_norm": 0.025392575189471245,
"learning_rate": 0.0002,
"loss": 0.0037,
"step": 37
},
{
"epoch": 21.714285714285715,
"grad_norm": 0.7075601816177368,
"learning_rate": 0.0002,
"loss": 0.0039,
"step": 38
},
{
"epoch": 22.0,
"grad_norm": 0.7075601816177368,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 39
},
{
"epoch": 22.857142857142858,
"grad_norm": 0.057493291795253754,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 40
},
{
"epoch": 22.857142857142858,
"eval_loss": 0.001083305454812944,
"eval_runtime": 1.1213,
"eval_samples_per_second": 6.243,
"eval_steps_per_second": 6.243,
"step": 40
},
{
"epoch": 23.0,
"grad_norm": 0.057493291795253754,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 41
},
{
"epoch": 24.0,
"grad_norm": 0.043593935668468475,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 42
},
{
"epoch": 25.0,
"grad_norm": 0.043593935668468475,
"learning_rate": 0.0002,
"loss": 0.0017,
"step": 43
},
{
"epoch": 25.142857142857142,
"grad_norm": 0.1038336306810379,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 44
},
{
"epoch": 26.0,
"grad_norm": 0.1038336306810379,
"learning_rate": 0.0002,
"loss": 0.0013,
"step": 45
},
{
"epoch": 26.285714285714285,
"grad_norm": 0.1458902508020401,
"learning_rate": 0.0002,
"loss": 0.0011,
"step": 46
},
{
"epoch": 27.0,
"grad_norm": 0.1458902508020401,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 47
},
{
"epoch": 27.428571428571427,
"grad_norm": 0.014756185002624989,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 48
},
{
"epoch": 28.0,
"grad_norm": 0.014756185002624989,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 49
},
{
"epoch": 28.571428571428573,
"grad_norm": 0.032949432730674744,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 50
},
{
"epoch": 28.571428571428573,
"eval_loss": 0.0005236685974523425,
"eval_runtime": 1.1017,
"eval_samples_per_second": 6.354,
"eval_steps_per_second": 6.354,
"step": 50
},
{
"epoch": 29.0,
"grad_norm": 0.032949432730674744,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 51
},
{
"epoch": 29.714285714285715,
"grad_norm": 0.015912260860204697,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 52
},
{
"epoch": 30.0,
"grad_norm": 0.015912260860204697,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 53
},
{
"epoch": 30.857142857142858,
"grad_norm": 0.012292955070734024,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 54
},
{
"epoch": 31.0,
"grad_norm": 0.012292955070734024,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 55
},
{
"epoch": 32.0,
"grad_norm": 0.009588208049535751,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 56
},
{
"epoch": 33.0,
"grad_norm": 0.009588208049535751,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 57
},
{
"epoch": 33.142857142857146,
"grad_norm": 0.007272060494869947,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 58
},
{
"epoch": 34.0,
"grad_norm": 0.007272060494869947,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 59
},
{
"epoch": 34.285714285714285,
"grad_norm": 0.006634233985096216,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 60
},
{
"epoch": 34.285714285714285,
"eval_loss": 0.00021448711049742997,
"eval_runtime": 1.0979,
"eval_samples_per_second": 6.376,
"eval_steps_per_second": 6.376,
"step": 60
},
{
"epoch": 34.285714285714285,
"step": 60,
"total_flos": 4016214881861632.0,
"train_loss": 0.1554629008491247,
"train_runtime": 98.8483,
"train_samples_per_second": 4.856,
"train_steps_per_second": 0.607
}
],
"logging_steps": 1.0,
"max_steps": 60,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 1000,
"total_flos": 4016214881861632.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}