File size: 2,710 Bytes
336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b 7298db5 336e56b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
{
"best_metric": 7.529487609863281,
"best_model_checkpoint": "./results/checkpoint-916",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 916,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1091703056768559,
"grad_norm": 25.480337142944336,
"learning_rate": 2.959061135371179e-05,
"loss": 56.923,
"step": 100
},
{
"epoch": 0.2183406113537118,
"grad_norm": 39.87223815917969,
"learning_rate": 2.918122270742358e-05,
"loss": 46.6475,
"step": 200
},
{
"epoch": 0.32751091703056767,
"grad_norm": 48.05048751831055,
"learning_rate": 2.877183406113537e-05,
"loss": 33.6867,
"step": 300
},
{
"epoch": 0.4366812227074236,
"grad_norm": 31.941883087158203,
"learning_rate": 2.8362445414847164e-05,
"loss": 21.1084,
"step": 400
},
{
"epoch": 0.5458515283842795,
"grad_norm": 55.025856018066406,
"learning_rate": 2.7953056768558954e-05,
"loss": 12.9495,
"step": 500
},
{
"epoch": 0.6550218340611353,
"grad_norm": 34.957523345947266,
"learning_rate": 2.7543668122270742e-05,
"loss": 10.0745,
"step": 600
},
{
"epoch": 0.7641921397379913,
"grad_norm": 24.020906448364258,
"learning_rate": 2.7134279475982533e-05,
"loss": 8.3541,
"step": 700
},
{
"epoch": 0.8733624454148472,
"grad_norm": 32.709571838378906,
"learning_rate": 2.6724890829694323e-05,
"loss": 7.5128,
"step": 800
},
{
"epoch": 0.982532751091703,
"grad_norm": 38.94672393798828,
"learning_rate": 2.6315502183406114e-05,
"loss": 7.2241,
"step": 900
},
{
"epoch": 1.0,
"eval_avg_mae": 7.529487609863281,
"eval_loss": 7.529487609863281,
"eval_mae_lex": 6.992014408111572,
"eval_mae_sem": 5.432034492492676,
"eval_mae_syn": 10.164413452148438,
"eval_runtime": 27.1764,
"eval_samples_per_second": 269.609,
"eval_steps_per_second": 8.426,
"step": 916
}
],
"logging_steps": 100,
"max_steps": 7328,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1927766233338624.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}
|