|
{ |
|
"best_metric": 0.15952083840732592, |
|
"best_model_checkpoint": "esm2_t30_150M_qlora_ptm_sites_2023-10-19_22-31-34/checkpoint-48919", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 48919, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0003701416918864124, |
|
"loss": 0.2635, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00037009604913071327, |
|
"loss": 0.0714, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00037001988446013227, |
|
"loss": 0.056, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00036991381967446495, |
|
"loss": 0.0526, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00036977680630794575, |
|
"loss": 0.0411, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00036960932369126316, |
|
"loss": 0.0415, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00036941139945362056, |
|
"loss": 0.0338, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00036918306624610206, |
|
"loss": 0.029, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0003689243617362863, |
|
"loss": 0.0333, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0003686353286020322, |
|
"loss": 0.0308, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0003683160145244388, |
|
"loss": 0.0314, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00036796647217997937, |
|
"loss": 0.0243, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00036758675923181147, |
|
"loss": 0.027, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00036717693832026435, |
|
"loss": 0.0283, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003667370770525053, |
|
"loss": 0.0235, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0003662672479913868, |
|
"loss": 0.0223, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0003657701014579829, |
|
"loss": 0.0208, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00036524072308681836, |
|
"loss": 0.0251, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00036468162377219915, |
|
"loss": 0.0198, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00036409289574739075, |
|
"loss": 0.0231, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0003634746361334354, |
|
"loss": 0.0212, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0003628269469231305, |
|
"loss": 0.0203, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000362149934964203, |
|
"loss": 0.021, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0003614437119416829, |
|
"loss": 0.0241, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00036070839435947883, |
|
"loss": 0.0205, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0003599441035211589, |
|
"loss": 0.0197, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00035915096550993934, |
|
"loss": 0.0201, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000358329111167885, |
|
"loss": 0.0214, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00035747867607432445, |
|
"loss": 0.0194, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0003565998005234839, |
|
"loss": 0.0167, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0003556926295013433, |
|
"loss": 0.0234, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00035475731266171827, |
|
"loss": 0.0171, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00035379400430157195, |
|
"loss": 0.0181, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00035280286333556107, |
|
"loss": 0.0194, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00035178405326982047, |
|
"loss": 0.0244, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0003507377421749893, |
|
"loss": 0.0188, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00034966410265848546, |
|
"loss": 0.0186, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00034856331183603055, |
|
"loss": 0.0164, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0003474355513024315, |
|
"loss": 0.0184, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0003462810071016236, |
|
"loss": 0.0154, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00034509986969597905, |
|
"loss": 0.0203, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0003438984369518822, |
|
"loss": 0.0214, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00034266483253267676, |
|
"loss": 0.0166, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0003414052314602316, |
|
"loss": 0.017, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0003401198415278977, |
|
"loss": 0.0182, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0003388088747833521, |
|
"loss": 0.017, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0003374725474936167, |
|
"loss": 0.0183, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0003361110801093815, |
|
"loss": 0.0168, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0003347246972286373, |
|
"loss": 0.0172, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000333313627559624, |
|
"loss": 0.0197, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0003318781038831018, |
|
"loss": 0.0139, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0003304183630139492, |
|
"loss": 0.016, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0003289346457620967, |
|
"loss": 0.0205, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00032742719689280103, |
|
"loss": 0.0179, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00032589626508626625, |
|
"loss": 0.0143, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00032434210289662013, |
|
"loss": 0.0175, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0003227649667102507, |
|
"loss": 0.0156, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003211651167035108, |
|
"loss": 0.0206, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003195428167997974, |
|
"loss": 0.0176, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00031789833462601304, |
|
"loss": 0.0151, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0003162319414684155, |
|
"loss": 0.0159, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00031455240573465427, |
|
"loss": 0.0168, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003128431249707856, |
|
"loss": 0.0148, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003111127671688482, |
|
"loss": 0.0236, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00030937042477134576, |
|
"loss": 0.0146, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00030759887447136833, |
|
"loss": 0.0157, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0003058071122646787, |
|
"loss": 0.0216, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00030399543373396665, |
|
"loss": 0.0171, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0003021641377474699, |
|
"loss": 0.0174, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0003003135264096706, |
|
"loss": 0.0174, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002984439050114576, |
|
"loss": 0.0155, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00029655558197976386, |
|
"loss": 0.0181, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002946488688266855, |
|
"loss": 0.0165, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00029272408009809267, |
|
"loss": 0.0186, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00029078153332173963, |
|
"loss": 0.0159, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00028882154895488294, |
|
"loss": 0.0148, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00028684445033141644, |
|
"loss": 0.0159, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00028485056360853155, |
|
"loss": 0.015, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00028284021771291196, |
|
"loss": 0.0146, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002808137442864714, |
|
"loss": 0.0156, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002787817276943337, |
|
"loss": 0.0154, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002767240811582267, |
|
"loss": 0.0146, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002746513160555949, |
|
"loss": 0.0138, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002725637743254982, |
|
"loss": 0.0159, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00027046180034466093, |
|
"loss": 0.0149, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026834574087066106, |
|
"loss": 0.0131, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002662159449847264, |
|
"loss": 0.0143, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026407276403414754, |
|
"loss": 0.0209, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002619165515743168, |
|
"loss": 0.0136, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002597476633104033, |
|
"loss": 0.0148, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002575664570386728, |
|
"loss": 0.0169, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002553732925874632, |
|
"loss": 0.0116, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00025316853175782447, |
|
"loss": 0.0164, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00025096364556859434, |
|
"loss": 0.0154, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00024873683840049996, |
|
"loss": 0.0141, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002464995296538175, |
|
"loss": 0.0144, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00024425208841197493, |
|
"loss": 0.0122, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002419948854299333, |
|
"loss": 0.0129, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00023972829307302465, |
|
"loss": 0.0133, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00023745268525552336, |
|
"loss": 0.013, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00023516843737896262, |
|
"loss": 0.0162, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00023287592627020543, |
|
"loss": 0.0156, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00023057553011928013, |
|
"loss": 0.0137, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002282676284169916, |
|
"loss": 0.0146, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00022595260189231734, |
|
"loss": 0.0143, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00022363083244959957, |
|
"loss": 0.0173, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00022131435893762175, |
|
"loss": 0.0141, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00021899196729468126, |
|
"loss": 0.0152, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00021665232533016668, |
|
"loss": 0.0132, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00021430747469180223, |
|
"loss": 0.0138, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00021195780220394222, |
|
"loss": 0.0144, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00020960369548639, |
|
"loss": 0.017, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00020724554289045297, |
|
"loss": 0.0131, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00020488373343487728, |
|
"loss": 0.0151, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002025186567416726, |
|
"loss": 0.0134, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00020015070297183673, |
|
"loss": 0.015, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000197780262760992, |
|
"loss": 0.0153, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019540772715494277, |
|
"loss": 0.0151, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019303348754516576, |
|
"loss": 0.0153, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019065793560424322, |
|
"loss": 0.0125, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018828146322124917, |
|
"loss": 0.0126, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018590446243710106, |
|
"loss": 0.0146, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001835273253798852, |
|
"loss": 0.0123, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018115044420016874, |
|
"loss": 0.0173, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00017877421100630705, |
|
"loss": 0.0151, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00017639901779975916, |
|
"loss": 0.0174, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00017402525641041958, |
|
"loss": 0.0122, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00017166517293872442, |
|
"loss": 0.0193, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00016929543761778923, |
|
"loss": 0.0182, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00016692830597455233, |
|
"loss": 0.0136, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001645641685090116, |
|
"loss": 0.0133, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.000162203415227223, |
|
"loss": 0.0107, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00015984643557696138, |
|
"loss": 0.0128, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00015749361838347518, |
|
"loss": 0.0134, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.000155145351785342, |
|
"loss": 0.0122, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000152802023170439, |
|
"loss": 0.0145, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001504640191120359, |
|
"loss": 0.0145, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00014813172530502337, |
|
"loss": 0.0163, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00014580552650228558, |
|
"loss": 0.0106, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00014348580645122863, |
|
"loss": 0.0132, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00014117294783047438, |
|
"loss": 0.0117, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00013886733218673142, |
|
"loss": 0.0129, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001365693398718515, |
|
"loss": 0.0134, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00013427934998008455, |
|
"loss": 0.0127, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00013199774028553978, |
|
"loss": 0.0112, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001297248871798657, |
|
"loss": 0.0137, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012746116561015725, |
|
"loss": 0.0167, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001252069490171022, |
|
"loss": 0.0135, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00012296260927337492, |
|
"loss": 0.0103, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00012072851662228994, |
|
"loss": 0.0121, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011850503961672364, |
|
"loss": 0.0134, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011630357960773998, |
|
"loss": 0.0126, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.000114102374844354, |
|
"loss": 0.0126, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011191287882511448, |
|
"loss": 0.0131, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001097354527458991, |
|
"loss": 0.0135, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010757045581143638, |
|
"loss": 0.0162, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010541824517604906, |
|
"loss": 0.012, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010327917588473477, |
|
"loss": 0.0134, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010115360081459545, |
|
"loss": 0.0144, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.904187061662376e-05, |
|
"loss": 0.0195, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.69443336578574e-05, |
|
"loss": 0.0157, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.486133596390939e-05, |
|
"loss": 0.0131, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.279322116188555e-05, |
|
"loss": 0.0146, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.074033042369668e-05, |
|
"loss": 0.0118, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 8.870300240977657e-05, |
|
"loss": 0.0153, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 8.668157321321404e-05, |
|
"loss": 0.0107, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 8.467637630430823e-05, |
|
"loss": 0.0145, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.268774247555691e-05, |
|
"loss": 0.0124, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.07159997870866e-05, |
|
"loss": 0.0117, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.876147351253296e-05, |
|
"loss": 0.0153, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.682448608538163e-05, |
|
"loss": 0.0118, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.49149077432921e-05, |
|
"loss": 0.0133, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.301386202782465e-05, |
|
"loss": 0.0129, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.114066962139441e-05, |
|
"loss": 0.0105, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.927681374580455e-05, |
|
"loss": 0.0132, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.74320613790631e-05, |
|
"loss": 0.0148, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.560671684551868e-05, |
|
"loss": 0.0135, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.380108126785653e-05, |
|
"loss": 0.0147, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.20154525174229e-05, |
|
"loss": 0.0139, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.025012516508621e-05, |
|
"loss": 0.0125, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.850539043264194e-05, |
|
"loss": 0.0105, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.6781536144770985e-05, |
|
"loss": 0.0162, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.5078846681557415e-05, |
|
"loss": 0.0111, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.339760293157545e-05, |
|
"loss": 0.0111, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.173808224555153e-05, |
|
"loss": 0.0108, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.010055839061078e-05, |
|
"loss": 0.0124, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.849332195813418e-05, |
|
"loss": 0.013, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.690048518324095e-05, |
|
"loss": 0.0128, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.5338236363359446e-05, |
|
"loss": 0.0156, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.3791132442674044e-05, |
|
"loss": 0.0116, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.226733634221389e-05, |
|
"loss": 0.0131, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.0767099438946455e-05, |
|
"loss": 0.0115, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.929066922333545e-05, |
|
"loss": 0.013, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.78382892585125e-05, |
|
"loss": 0.0126, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6410199140097565e-05, |
|
"loss": 0.0115, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.500663445667306e-05, |
|
"loss": 0.0137, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.3627826750919713e-05, |
|
"loss": 0.012, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.2274003481419156e-05, |
|
"loss": 0.0139, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.094538798513103e-05, |
|
"loss": 0.0124, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.964219944054926e-05, |
|
"loss": 0.0159, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.8364652831545036e-05, |
|
"loss": 0.0136, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.7112958911901083e-05, |
|
"loss": 0.0159, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.588732417054455e-05, |
|
"loss": 0.011, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.4693882010091674e-05, |
|
"loss": 0.0152, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.3520835081962702e-05, |
|
"loss": 0.0161, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.2374439916125642e-05, |
|
"loss": 0.0117, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.1254885630626012e-05, |
|
"loss": 0.0137, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.016235691563421e-05, |
|
"loss": 0.0105, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.909703400297725e-05, |
|
"loss": 0.0125, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.8059092636406624e-05, |
|
"loss": 0.0124, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.704870404260628e-05, |
|
"loss": 0.0119, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6066034902945553e-05, |
|
"loss": 0.0096, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.5111247325982417e-05, |
|
"loss": 0.0114, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4184498820720606e-05, |
|
"loss": 0.0129, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.3285942270625904e-05, |
|
"loss": 0.0143, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2415725908405126e-05, |
|
"loss": 0.0135, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1573993291552778e-05, |
|
"loss": 0.0111, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.0760883278668474e-05, |
|
"loss": 0.012, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.976530006549986e-06, |
|
"loss": 0.0152, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.221062868064821e-06, |
|
"loss": 0.0124, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.494606490804811e-06, |
|
"loss": 0.0163, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.800694690729383e-06, |
|
"loss": 0.012, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.132468049208598e-06, |
|
"loss": 0.0112, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.493596719362824e-06, |
|
"loss": 0.0106, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.884186094254458e-06, |
|
"loss": 0.0132, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.304336706884555e-06, |
|
"loss": 0.0137, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.754144213608023e-06, |
|
"loss": 0.0101, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.233699378353457e-06, |
|
"loss": 0.009, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.743088057650132e-06, |
|
"loss": 0.014, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2823911864643436e-06, |
|
"loss": 0.0129, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8516847648477812e-06, |
|
"loss": 0.0132, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4510398454000396e-06, |
|
"loss": 0.0121, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.0805225215471503e-06, |
|
"loss": 0.0115, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.740193916638432e-06, |
|
"loss": 0.0121, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.430110173862991e-06, |
|
"loss": 0.0145, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1503224469880126e-06, |
|
"loss": 0.013, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.008768919199934e-07, |
|
"loss": 0.0127, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.818146590905163e-07, |
|
"loss": 0.0138, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.931718866678032e-07, |
|
"loss": 0.0113, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.356948644792021e-07, |
|
"loss": 0.0169, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0782690935592252e-07, |
|
"loss": 0.0109, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1045660725531198e-07, |
|
"loss": 0.0119, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.360002112121604e-08, |
|
"loss": 0.0102, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.2681801234168665e-09, |
|
"loss": 0.0114, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9751466987432912, |
|
"eval_auc": 0.9180107216187284, |
|
"eval_f1": 0.15952083840732592, |
|
"eval_loss": 0.2542668879032135, |
|
"eval_mcc": 0.27048987280764264, |
|
"eval_precision": 0.0879081257328702, |
|
"eval_recall": 0.8605598317425242, |
|
"eval_runtime": 27823.2385, |
|
"eval_samples_per_second": 16.313, |
|
"eval_steps_per_second": 0.453, |
|
"step": 48919 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 48919, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 1.56321913484268e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|