sam1120's picture
Training in progress, step 120
13916bd
raw
history blame
22.1 kB
{
"best_metric": 0.3130444884300232,
"best_model_checkpoint": "/robodata/smodak/Projects/nspl/scripts/terrainseg/training/models/dropoff-utcustom-train-SF-RGB-b0_6/checkpoint-120",
"epoch": 40.0,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.33,
"learning_rate": 3.888888888888889e-06,
"loss": 1.1755,
"step": 1
},
{
"epoch": 0.67,
"learning_rate": 7.777777777777777e-06,
"loss": 1.1749,
"step": 2
},
{
"epoch": 1.0,
"learning_rate": 1.1666666666666665e-05,
"loss": 1.183,
"step": 3
},
{
"epoch": 1.33,
"learning_rate": 1.5555555555555555e-05,
"loss": 1.1718,
"step": 4
},
{
"epoch": 1.67,
"learning_rate": 1.9444444444444442e-05,
"loss": 1.1706,
"step": 5
},
{
"epoch": 2.0,
"learning_rate": 2.333333333333333e-05,
"loss": 1.1696,
"step": 6
},
{
"epoch": 2.33,
"learning_rate": 2.722222222222222e-05,
"loss": 1.1558,
"step": 7
},
{
"epoch": 2.67,
"learning_rate": 3.111111111111111e-05,
"loss": 1.1489,
"step": 8
},
{
"epoch": 3.0,
"learning_rate": 3.5e-05,
"loss": 1.135,
"step": 9
},
{
"epoch": 3.33,
"learning_rate": 3.8888888888888884e-05,
"loss": 1.1234,
"step": 10
},
{
"epoch": 3.33,
"eval_accuracy_dropoff": 0.772015071105291,
"eval_accuracy_undropoff": 0.353825865102452,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.1801195043477445,
"eval_iou_undropoff": 0.3536296682807036,
"eval_iou_unlabeled": 0.0,
"eval_loss": 1.097259283065796,
"eval_mean_accuracy": 0.5629204681038715,
"eval_mean_iou": 0.17791639087614938,
"eval_overall_accuracy": 0.37234242757161456,
"eval_runtime": 1.873,
"eval_samples_per_second": 8.008,
"eval_steps_per_second": 0.534,
"step": 10
},
{
"epoch": 3.67,
"learning_rate": 4.277777777777778e-05,
"loss": 1.1168,
"step": 11
},
{
"epoch": 4.0,
"learning_rate": 4.666666666666666e-05,
"loss": 1.0895,
"step": 12
},
{
"epoch": 4.33,
"learning_rate": 5.055555555555555e-05,
"loss": 1.1027,
"step": 13
},
{
"epoch": 4.67,
"learning_rate": 5.444444444444444e-05,
"loss": 1.0795,
"step": 14
},
{
"epoch": 5.0,
"learning_rate": 5.833333333333333e-05,
"loss": 1.0598,
"step": 15
},
{
"epoch": 5.33,
"learning_rate": 6.222222222222222e-05,
"loss": 1.0449,
"step": 16
},
{
"epoch": 5.67,
"learning_rate": 6.611111111111111e-05,
"loss": 1.0268,
"step": 17
},
{
"epoch": 6.0,
"learning_rate": 7e-05,
"loss": 1.0843,
"step": 18
},
{
"epoch": 6.33,
"learning_rate": 6.97953216374269e-05,
"loss": 1.0029,
"step": 19
},
{
"epoch": 6.67,
"learning_rate": 6.95906432748538e-05,
"loss": 0.975,
"step": 20
},
{
"epoch": 6.67,
"eval_accuracy_dropoff": 0.8258609598639924,
"eval_accuracy_undropoff": 0.8102080545984994,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.24279987572018127,
"eval_iou_undropoff": 0.8068537963656117,
"eval_iou_unlabeled": 0.0,
"eval_loss": 1.0260422229766846,
"eval_mean_accuracy": 0.8180345072312459,
"eval_mean_iou": 0.34988455736193097,
"eval_overall_accuracy": 0.810901133219401,
"eval_runtime": 1.6566,
"eval_samples_per_second": 9.055,
"eval_steps_per_second": 0.604,
"step": 20
},
{
"epoch": 7.0,
"learning_rate": 6.938596491228069e-05,
"loss": 1.0358,
"step": 21
},
{
"epoch": 7.33,
"learning_rate": 6.91812865497076e-05,
"loss": 0.9361,
"step": 22
},
{
"epoch": 7.67,
"learning_rate": 6.89766081871345e-05,
"loss": 0.9311,
"step": 23
},
{
"epoch": 8.0,
"learning_rate": 6.877192982456139e-05,
"loss": 0.9149,
"step": 24
},
{
"epoch": 8.33,
"learning_rate": 6.85672514619883e-05,
"loss": 0.9056,
"step": 25
},
{
"epoch": 8.67,
"learning_rate": 6.83625730994152e-05,
"loss": 0.8637,
"step": 26
},
{
"epoch": 9.0,
"learning_rate": 6.81578947368421e-05,
"loss": 0.7991,
"step": 27
},
{
"epoch": 9.33,
"learning_rate": 6.7953216374269e-05,
"loss": 0.8143,
"step": 28
},
{
"epoch": 9.67,
"learning_rate": 6.774853801169589e-05,
"loss": 0.7999,
"step": 29
},
{
"epoch": 10.0,
"learning_rate": 6.75438596491228e-05,
"loss": 0.9464,
"step": 30
},
{
"epoch": 10.0,
"eval_accuracy_dropoff": 0.5212052289383601,
"eval_accuracy_undropoff": 0.9700488444545206,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.3384996437670423,
"eval_iou_undropoff": 0.9507046257182781,
"eval_iou_unlabeled": 0.0,
"eval_loss": 0.8130465149879456,
"eval_mean_accuracy": 0.7456270366964404,
"eval_mean_iou": 0.4297347564951068,
"eval_overall_accuracy": 0.9501749674479166,
"eval_runtime": 1.7097,
"eval_samples_per_second": 8.773,
"eval_steps_per_second": 0.585,
"step": 30
},
{
"epoch": 10.33,
"learning_rate": 6.733918128654971e-05,
"loss": 0.763,
"step": 31
},
{
"epoch": 10.67,
"learning_rate": 6.71345029239766e-05,
"loss": 0.7591,
"step": 32
},
{
"epoch": 11.0,
"learning_rate": 6.69298245614035e-05,
"loss": 0.6623,
"step": 33
},
{
"epoch": 11.33,
"learning_rate": 6.672514619883041e-05,
"loss": 0.7138,
"step": 34
},
{
"epoch": 11.67,
"learning_rate": 6.65204678362573e-05,
"loss": 0.6695,
"step": 35
},
{
"epoch": 12.0,
"learning_rate": 6.63157894736842e-05,
"loss": 0.6987,
"step": 36
},
{
"epoch": 12.33,
"learning_rate": 6.611111111111111e-05,
"loss": 0.6494,
"step": 37
},
{
"epoch": 12.67,
"learning_rate": 6.5906432748538e-05,
"loss": 0.6317,
"step": 38
},
{
"epoch": 13.0,
"learning_rate": 6.570175438596491e-05,
"loss": 0.834,
"step": 39
},
{
"epoch": 13.33,
"learning_rate": 6.54970760233918e-05,
"loss": 0.6167,
"step": 40
},
{
"epoch": 13.33,
"eval_accuracy_dropoff": 0.5047671560181037,
"eval_accuracy_undropoff": 0.9829004494881923,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.3743041743152479,
"eval_iou_undropoff": 0.961015278770039,
"eval_iou_unlabeled": 0.0,
"eval_loss": 0.6001297235488892,
"eval_mean_accuracy": 0.743833802753148,
"eval_mean_iou": 0.44510648436176226,
"eval_overall_accuracy": 0.9617296854654948,
"eval_runtime": 1.7423,
"eval_samples_per_second": 8.609,
"eval_steps_per_second": 0.574,
"step": 40
},
{
"epoch": 13.67,
"learning_rate": 6.52923976608187e-05,
"loss": 0.6201,
"step": 41
},
{
"epoch": 14.0,
"learning_rate": 6.508771929824561e-05,
"loss": 0.6009,
"step": 42
},
{
"epoch": 14.33,
"learning_rate": 6.488304093567252e-05,
"loss": 0.5716,
"step": 43
},
{
"epoch": 14.67,
"learning_rate": 6.467836257309941e-05,
"loss": 0.5779,
"step": 44
},
{
"epoch": 15.0,
"learning_rate": 6.447368421052631e-05,
"loss": 0.5397,
"step": 45
},
{
"epoch": 15.33,
"learning_rate": 6.426900584795322e-05,
"loss": 0.5569,
"step": 46
},
{
"epoch": 15.67,
"learning_rate": 6.406432748538011e-05,
"loss": 0.5152,
"step": 47
},
{
"epoch": 16.0,
"learning_rate": 6.3859649122807e-05,
"loss": 0.5266,
"step": 48
},
{
"epoch": 16.33,
"learning_rate": 6.365497076023391e-05,
"loss": 0.5389,
"step": 49
},
{
"epoch": 16.67,
"learning_rate": 6.345029239766081e-05,
"loss": 0.4818,
"step": 50
},
{
"epoch": 16.67,
"eval_accuracy_dropoff": 0.4572736462425621,
"eval_accuracy_undropoff": 0.9901589972677334,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.38151532722193204,
"eval_iou_undropoff": 0.9659037872797944,
"eval_iou_unlabeled": 0.0,
"eval_loss": 0.46292775869369507,
"eval_mean_accuracy": 0.7237163217551478,
"eval_mean_iou": 0.4491397048339088,
"eval_overall_accuracy": 0.9665639241536458,
"eval_runtime": 1.7358,
"eval_samples_per_second": 8.641,
"eval_steps_per_second": 0.576,
"step": 50
},
{
"epoch": 17.0,
"learning_rate": 6.324561403508772e-05,
"loss": 0.5289,
"step": 51
},
{
"epoch": 17.33,
"learning_rate": 6.304093567251461e-05,
"loss": 0.4991,
"step": 52
},
{
"epoch": 17.67,
"learning_rate": 6.283625730994151e-05,
"loss": 0.4643,
"step": 53
},
{
"epoch": 18.0,
"learning_rate": 6.263157894736842e-05,
"loss": 0.4359,
"step": 54
},
{
"epoch": 18.33,
"learning_rate": 6.242690058479532e-05,
"loss": 0.4679,
"step": 55
},
{
"epoch": 18.67,
"learning_rate": 6.222222222222222e-05,
"loss": 0.4619,
"step": 56
},
{
"epoch": 19.0,
"learning_rate": 6.201754385964911e-05,
"loss": 0.6641,
"step": 57
},
{
"epoch": 19.33,
"learning_rate": 6.181286549707602e-05,
"loss": 0.4316,
"step": 58
},
{
"epoch": 19.67,
"learning_rate": 6.160818713450292e-05,
"loss": 0.4475,
"step": 59
},
{
"epoch": 20.0,
"learning_rate": 6.140350877192981e-05,
"loss": 0.4733,
"step": 60
},
{
"epoch": 20.0,
"eval_accuracy_dropoff": 0.42558641762584143,
"eval_accuracy_undropoff": 0.987872440296196,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.33826825716385683,
"eval_iou_undropoff": 0.9622649725509712,
"eval_iou_unlabeled": 0.0,
"eval_loss": 0.43790048360824585,
"eval_mean_accuracy": 0.7067294289610188,
"eval_mean_iou": 0.4335110765716093,
"eval_overall_accuracy": 0.9629755655924479,
"eval_runtime": 1.7742,
"eval_samples_per_second": 8.455,
"eval_steps_per_second": 0.564,
"step": 60
},
{
"epoch": 20.33,
"learning_rate": 6.119883040935672e-05,
"loss": 0.423,
"step": 61
},
{
"epoch": 20.67,
"learning_rate": 6.0994152046783624e-05,
"loss": 0.4219,
"step": 62
},
{
"epoch": 21.0,
"learning_rate": 6.0789473684210525e-05,
"loss": 0.6326,
"step": 63
},
{
"epoch": 21.33,
"learning_rate": 6.058479532163742e-05,
"loss": 0.4299,
"step": 64
},
{
"epoch": 21.67,
"learning_rate": 6.038011695906432e-05,
"loss": 0.4147,
"step": 65
},
{
"epoch": 22.0,
"learning_rate": 6.0175438596491224e-05,
"loss": 0.4815,
"step": 66
},
{
"epoch": 22.33,
"learning_rate": 5.9970760233918126e-05,
"loss": 0.4477,
"step": 67
},
{
"epoch": 22.67,
"learning_rate": 5.976608187134502e-05,
"loss": 0.4031,
"step": 68
},
{
"epoch": 23.0,
"learning_rate": 5.956140350877192e-05,
"loss": 0.6048,
"step": 69
},
{
"epoch": 23.33,
"learning_rate": 5.9356725146198824e-05,
"loss": 0.3843,
"step": 70
},
{
"epoch": 23.33,
"eval_accuracy_dropoff": 0.3821076573161486,
"eval_accuracy_undropoff": 0.992239862567096,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.328346864743479,
"eval_iou_undropoff": 0.9646259654307497,
"eval_iou_unlabeled": 0.0,
"eval_loss": 0.4072829484939575,
"eval_mean_accuracy": 0.6871737599416223,
"eval_mean_iou": 0.4309909433914096,
"eval_overall_accuracy": 0.9652244567871093,
"eval_runtime": 1.8122,
"eval_samples_per_second": 8.277,
"eval_steps_per_second": 0.552,
"step": 70
},
{
"epoch": 23.67,
"learning_rate": 5.9152046783625726e-05,
"loss": 0.3967,
"step": 71
},
{
"epoch": 24.0,
"learning_rate": 5.894736842105262e-05,
"loss": 0.4051,
"step": 72
},
{
"epoch": 24.33,
"learning_rate": 5.874269005847952e-05,
"loss": 0.3836,
"step": 73
},
{
"epoch": 24.67,
"learning_rate": 5.853801169590643e-05,
"loss": 0.3896,
"step": 74
},
{
"epoch": 25.0,
"learning_rate": 5.833333333333333e-05,
"loss": 0.5942,
"step": 75
},
{
"epoch": 25.33,
"learning_rate": 5.812865497076023e-05,
"loss": 0.3688,
"step": 76
},
{
"epoch": 25.67,
"learning_rate": 5.792397660818713e-05,
"loss": 0.3721,
"step": 77
},
{
"epoch": 26.0,
"learning_rate": 5.771929824561403e-05,
"loss": 0.5951,
"step": 78
},
{
"epoch": 26.33,
"learning_rate": 5.751461988304093e-05,
"loss": 0.3973,
"step": 79
},
{
"epoch": 26.67,
"learning_rate": 5.730994152046783e-05,
"loss": 0.3579,
"step": 80
},
{
"epoch": 26.67,
"eval_accuracy_dropoff": 0.40901049923036276,
"eval_accuracy_undropoff": 0.9908359437282933,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.34181486548107615,
"eval_iou_undropoff": 0.9644297203691943,
"eval_iou_unlabeled": 0.0,
"eval_loss": 0.3731442093849182,
"eval_mean_accuracy": 0.6999232214793281,
"eval_mean_iou": 0.43541486195009016,
"eval_overall_accuracy": 0.9650739034016927,
"eval_runtime": 1.7608,
"eval_samples_per_second": 8.519,
"eval_steps_per_second": 0.568,
"step": 80
},
{
"epoch": 27.0,
"learning_rate": 5.710526315789473e-05,
"loss": 0.416,
"step": 81
},
{
"epoch": 27.33,
"learning_rate": 5.690058479532163e-05,
"loss": 0.3931,
"step": 82
},
{
"epoch": 27.67,
"learning_rate": 5.669590643274853e-05,
"loss": 0.3753,
"step": 83
},
{
"epoch": 28.0,
"learning_rate": 5.649122807017543e-05,
"loss": 0.3469,
"step": 84
},
{
"epoch": 28.33,
"learning_rate": 5.628654970760233e-05,
"loss": 0.3548,
"step": 85
},
{
"epoch": 28.67,
"learning_rate": 5.608187134502924e-05,
"loss": 0.3347,
"step": 86
},
{
"epoch": 29.0,
"learning_rate": 5.587719298245614e-05,
"loss": 0.5499,
"step": 87
},
{
"epoch": 29.33,
"learning_rate": 5.5672514619883035e-05,
"loss": 0.3353,
"step": 88
},
{
"epoch": 29.67,
"learning_rate": 5.546783625730994e-05,
"loss": 0.3393,
"step": 89
},
{
"epoch": 30.0,
"learning_rate": 5.526315789473684e-05,
"loss": 0.3212,
"step": 90
},
{
"epoch": 30.0,
"eval_accuracy_dropoff": 0.43656236359041517,
"eval_accuracy_undropoff": 0.9891638008202122,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.3538083423714455,
"eval_iou_undropoff": 0.9639998537401673,
"eval_iou_unlabeled": NaN,
"eval_loss": 0.3654923737049103,
"eval_mean_accuracy": 0.7128630822053137,
"eval_mean_iou": 0.6589040980558064,
"eval_overall_accuracy": 0.9646957397460938,
"eval_runtime": 1.7507,
"eval_samples_per_second": 8.568,
"eval_steps_per_second": 0.571,
"step": 90
},
{
"epoch": 30.33,
"learning_rate": 5.505847953216374e-05,
"loss": 0.3282,
"step": 91
},
{
"epoch": 30.67,
"learning_rate": 5.4853801169590635e-05,
"loss": 0.341,
"step": 92
},
{
"epoch": 31.0,
"learning_rate": 5.464912280701754e-05,
"loss": 0.5022,
"step": 93
},
{
"epoch": 31.33,
"learning_rate": 5.444444444444444e-05,
"loss": 0.3242,
"step": 94
},
{
"epoch": 31.67,
"learning_rate": 5.423976608187134e-05,
"loss": 0.3237,
"step": 95
},
{
"epoch": 32.0,
"learning_rate": 5.4035087719298236e-05,
"loss": 0.3377,
"step": 96
},
{
"epoch": 32.33,
"learning_rate": 5.383040935672514e-05,
"loss": 0.3189,
"step": 97
},
{
"epoch": 32.67,
"learning_rate": 5.3625730994152046e-05,
"loss": 0.31,
"step": 98
},
{
"epoch": 33.0,
"learning_rate": 5.342105263157895e-05,
"loss": 0.4947,
"step": 99
},
{
"epoch": 33.33,
"learning_rate": 5.321637426900584e-05,
"loss": 0.3088,
"step": 100
},
{
"epoch": 33.33,
"eval_accuracy_dropoff": 0.34505019872722675,
"eval_accuracy_undropoff": 0.9927792377540279,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.2985231857844209,
"eval_iou_undropoff": 0.9635421132392789,
"eval_iou_unlabeled": NaN,
"eval_loss": 0.3305789530277252,
"eval_mean_accuracy": 0.6689147182406273,
"eval_mean_iou": 0.6310326495118499,
"eval_overall_accuracy": 0.96409912109375,
"eval_runtime": 1.6777,
"eval_samples_per_second": 8.941,
"eval_steps_per_second": 0.596,
"step": 100
},
{
"epoch": 33.67,
"learning_rate": 5.3011695906432744e-05,
"loss": 0.3057,
"step": 101
},
{
"epoch": 34.0,
"learning_rate": 5.2807017543859646e-05,
"loss": 0.4575,
"step": 102
},
{
"epoch": 34.33,
"learning_rate": 5.260233918128655e-05,
"loss": 0.3011,
"step": 103
},
{
"epoch": 34.67,
"learning_rate": 5.239766081871344e-05,
"loss": 0.3063,
"step": 104
},
{
"epoch": 35.0,
"learning_rate": 5.2192982456140345e-05,
"loss": 0.2768,
"step": 105
},
{
"epoch": 35.33,
"learning_rate": 5.1988304093567246e-05,
"loss": 0.3065,
"step": 106
},
{
"epoch": 35.67,
"learning_rate": 5.178362573099415e-05,
"loss": 0.2841,
"step": 107
},
{
"epoch": 36.0,
"learning_rate": 5.157894736842104e-05,
"loss": 0.4367,
"step": 108
},
{
"epoch": 36.33,
"learning_rate": 5.1374269005847945e-05,
"loss": 0.2944,
"step": 109
},
{
"epoch": 36.67,
"learning_rate": 5.1169590643274853e-05,
"loss": 0.2825,
"step": 110
},
{
"epoch": 36.67,
"eval_accuracy_dropoff": 0.42931398901831047,
"eval_accuracy_undropoff": 0.9912175243982787,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.3608996055293487,
"eval_iou_undropoff": 0.9656852921893507,
"eval_iou_unlabeled": NaN,
"eval_loss": 0.3253430724143982,
"eval_mean_accuracy": 0.7102657567082946,
"eval_mean_iou": 0.6632924488593497,
"eval_overall_accuracy": 0.9663375854492188,
"eval_runtime": 1.7624,
"eval_samples_per_second": 8.511,
"eval_steps_per_second": 0.567,
"step": 110
},
{
"epoch": 37.0,
"learning_rate": 5.0964912280701755e-05,
"loss": 0.292,
"step": 111
},
{
"epoch": 37.33,
"learning_rate": 5.076023391812865e-05,
"loss": 0.2868,
"step": 112
},
{
"epoch": 37.67,
"learning_rate": 5.055555555555555e-05,
"loss": 0.2897,
"step": 113
},
{
"epoch": 38.0,
"learning_rate": 5.0350877192982454e-05,
"loss": 0.4326,
"step": 114
},
{
"epoch": 38.33,
"learning_rate": 5.0146198830409355e-05,
"loss": 0.2754,
"step": 115
},
{
"epoch": 38.67,
"learning_rate": 4.994152046783625e-05,
"loss": 0.3155,
"step": 116
},
{
"epoch": 39.0,
"learning_rate": 4.973684210526315e-05,
"loss": 0.2995,
"step": 117
},
{
"epoch": 39.33,
"learning_rate": 4.9532163742690054e-05,
"loss": 0.2727,
"step": 118
},
{
"epoch": 39.67,
"learning_rate": 4.9327485380116956e-05,
"loss": 0.2645,
"step": 119
},
{
"epoch": 40.0,
"learning_rate": 4.912280701754385e-05,
"loss": 0.3029,
"step": 120
},
{
"epoch": 40.0,
"eval_accuracy_dropoff": 0.42643646472304547,
"eval_accuracy_undropoff": 0.9894596988014003,
"eval_accuracy_unlabeled": NaN,
"eval_iou_dropoff": 0.34740009077339873,
"eval_iou_undropoff": 0.9638475611431463,
"eval_iou_unlabeled": NaN,
"eval_loss": 0.3130444884300232,
"eval_mean_accuracy": 0.7079480817622229,
"eval_mean_iou": 0.6556238259582725,
"eval_overall_accuracy": 0.9645301818847656,
"eval_runtime": 1.7686,
"eval_samples_per_second": 8.481,
"eval_steps_per_second": 0.565,
"step": 120
}
],
"max_steps": 360,
"num_train_epochs": 120,
"total_flos": 2.38396687515648e+16,
"trial_name": null,
"trial_params": null
}