diff --git a/cross_cell_type_generization/L1/Neutrophils/fold3/checkpoint-1309/trainer_state.json b/cross_cell_type_generization/L1/Neutrophils/fold3/checkpoint-1309/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fa1a7de0df2cceb78089ea7622926d355f162f38 --- /dev/null +++ b/cross_cell_type_generization/L1/Neutrophils/fold3/checkpoint-1309/trainer_state.json @@ -0,0 +1,398 @@ +{ + "best_metric": 0.4226238429546356, + "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/data_diversity/L1/Neutrophils/fold3/checkpoint-935", + "epoch": 7.0, + "global_step": 1309, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13, + "learning_rate": 2.5e-06, + "loss": 0.6975, + "step": 25 + }, + { + "epoch": 0.27, + "learning_rate": 5e-06, + "loss": 0.6966, + "step": 50 + }, + { + "epoch": 0.4, + "learning_rate": 7.5e-06, + "loss": 0.6943, + "step": 75 + }, + { + "epoch": 0.53, + "learning_rate": 1e-05, + "loss": 0.6909, + "step": 100 + }, + { + "epoch": 0.67, + "learning_rate": 1.25e-05, + "loss": 0.693, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.5e-05, + "loss": 0.6925, + "step": 150 + }, + { + "epoch": 0.94, + "learning_rate": 1.75e-05, + "loss": 0.6932, + "step": 175 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5154766817994222, + "eval_loss": 0.691068708896637, + "eval_macro_f1": 0.4829185650936648, + "eval_runtime": 12.1349, + "eval_samples_per_second": 399.345, + "eval_steps_per_second": 3.873, + "step": 187 + }, + { + "epoch": 1.07, + "learning_rate": 2e-05, + "loss": 0.689, + "step": 200 + }, + { + "epoch": 1.2, + "learning_rate": 2.25e-05, + "loss": 0.6908, + "step": 225 + }, + { + "epoch": 1.34, + "learning_rate": 2.5e-05, + "loss": 0.6886, + "step": 250 + }, + { + "epoch": 1.47, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.6884, + "step": 275 + }, + { + "epoch": 1.6, + "learning_rate": 3e-05, + "loss": 0.6875, + "step": 300 + }, + { + "epoch": 1.74, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.6887, + "step": 325 + }, + { + "epoch": 1.87, + "learning_rate": 3.5e-05, + "loss": 0.6864, + "step": 350 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5435410647957079, + "eval_loss": 0.6844078302383423, + "eval_macro_f1": 0.43449410435016383, + "eval_runtime": 11.9965, + "eval_samples_per_second": 403.952, + "eval_steps_per_second": 3.918, + "step": 374 + }, + { + "epoch": 2.01, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.6836, + "step": 375 + }, + { + "epoch": 2.14, + "learning_rate": 4e-05, + "loss": 0.6815, + "step": 400 + }, + { + "epoch": 2.27, + "learning_rate": 4.25e-05, + "loss": 0.6778, + "step": 425 + }, + { + "epoch": 2.41, + "learning_rate": 4.5e-05, + "loss": 0.6637, + "step": 450 + }, + { + "epoch": 2.54, + "learning_rate": 4.75e-05, + "loss": 0.6206, + "step": 475 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.565, + "step": 500 + }, + { + "epoch": 2.81, + "learning_rate": 4.9858757062146896e-05, + "loss": 0.5312, + "step": 525 + }, + { + "epoch": 2.94, + "learning_rate": 4.971751412429379e-05, + "loss": 0.4995, + "step": 550 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.7602146099876187, + "eval_loss": 0.4883873462677002, + "eval_macro_f1": 0.7547402082097463, + "eval_runtime": 11.6796, + "eval_samples_per_second": 414.91, + "eval_steps_per_second": 4.024, + "step": 561 + }, + { + "epoch": 3.07, + "learning_rate": 4.957627118644068e-05, + "loss": 0.4676, + "step": 575 + }, + { + "epoch": 3.21, + "learning_rate": 4.9435028248587575e-05, + "loss": 0.4293, + "step": 600 + }, + { + "epoch": 3.34, + "learning_rate": 4.929378531073446e-05, + "loss": 0.4152, + "step": 625 + }, + { + "epoch": 3.48, + "learning_rate": 4.915254237288136e-05, + "loss": 0.4157, + "step": 650 + }, + { + "epoch": 3.61, + "learning_rate": 4.9011299435028255e-05, + "loss": 0.4014, + "step": 675 + }, + { + "epoch": 3.74, + "learning_rate": 4.887005649717514e-05, + "loss": 0.4173, + "step": 700 + }, + { + "epoch": 3.88, + "learning_rate": 4.8728813559322034e-05, + "loss": 0.4106, + "step": 725 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7963268675196038, + "eval_loss": 0.428460955619812, + "eval_macro_f1": 0.7931237082942566, + "eval_runtime": 12.1845, + "eval_samples_per_second": 397.719, + "eval_steps_per_second": 3.857, + "step": 748 + }, + { + "epoch": 4.01, + "learning_rate": 4.8587570621468934e-05, + "loss": 0.3815, + "step": 750 + }, + { + "epoch": 4.14, + "learning_rate": 4.844632768361582e-05, + "loss": 0.3347, + "step": 775 + }, + { + "epoch": 4.28, + "learning_rate": 4.8305084745762714e-05, + "loss": 0.3282, + "step": 800 + }, + { + "epoch": 4.41, + "learning_rate": 4.816384180790961e-05, + "loss": 0.3406, + "step": 825 + }, + { + "epoch": 4.55, + "learning_rate": 4.80225988700565e-05, + "loss": 0.3185, + "step": 850 + }, + { + "epoch": 4.68, + "learning_rate": 4.788135593220339e-05, + "loss": 0.3311, + "step": 875 + }, + { + "epoch": 4.81, + "learning_rate": 4.7740112994350286e-05, + "loss": 0.3294, + "step": 900 + }, + { + "epoch": 4.95, + "learning_rate": 4.759887005649718e-05, + "loss": 0.3394, + "step": 925 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.797358646306232, + "eval_loss": 0.4226238429546356, + "eval_macro_f1": 0.79729180116589, + "eval_runtime": 11.9309, + "eval_samples_per_second": 406.172, + "eval_steps_per_second": 3.939, + "step": 935 + }, + { + "epoch": 5.08, + "learning_rate": 4.745762711864407e-05, + "loss": 0.2855, + "step": 950 + }, + { + "epoch": 5.21, + "learning_rate": 4.7316384180790966e-05, + "loss": 0.2783, + "step": 975 + }, + { + "epoch": 5.35, + "learning_rate": 4.717514124293785e-05, + "loss": 0.2821, + "step": 1000 + }, + { + "epoch": 5.48, + "learning_rate": 4.703389830508475e-05, + "loss": 0.2609, + "step": 1025 + }, + { + "epoch": 5.61, + "learning_rate": 4.689265536723164e-05, + "loss": 0.2822, + "step": 1050 + }, + { + "epoch": 5.75, + "learning_rate": 4.675141242937853e-05, + "loss": 0.273, + "step": 1075 + }, + { + "epoch": 5.88, + "learning_rate": 4.6610169491525425e-05, + "loss": 0.2868, + "step": 1100 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8052001650846059, + "eval_loss": 0.4286108911037445, + "eval_macro_f1": 0.8033503768417375, + "eval_runtime": 12.1437, + "eval_samples_per_second": 399.055, + "eval_steps_per_second": 3.87, + "step": 1122 + }, + { + "epoch": 6.02, + "learning_rate": 4.646892655367232e-05, + "loss": 0.2971, + "step": 1125 + }, + { + "epoch": 6.15, + "learning_rate": 4.632768361581921e-05, + "loss": 0.2251, + "step": 1150 + }, + { + "epoch": 6.28, + "learning_rate": 4.6186440677966104e-05, + "loss": 0.2308, + "step": 1175 + }, + { + "epoch": 6.42, + "learning_rate": 4.6045197740113e-05, + "loss": 0.2524, + "step": 1200 + }, + { + "epoch": 6.55, + "learning_rate": 4.590395480225989e-05, + "loss": 0.2353, + "step": 1225 + }, + { + "epoch": 6.68, + "learning_rate": 4.5762711864406784e-05, + "loss": 0.2587, + "step": 1250 + }, + { + "epoch": 6.82, + "learning_rate": 4.562146892655367e-05, + "loss": 0.2358, + "step": 1275 + }, + { + "epoch": 6.95, + "learning_rate": 4.548022598870056e-05, + "loss": 0.231, + "step": 1300 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.8037556747833264, + "eval_loss": 0.4610762596130371, + "eval_macro_f1": 0.8037225017799383, + "eval_runtime": 11.9283, + "eval_samples_per_second": 406.26, + "eval_steps_per_second": 3.94, + "step": 1309 + } + ], + "max_steps": 9350, + "num_train_epochs": 50, + "total_flos": 990317516267520.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/.DS_Store b/scaling_performance/2000/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c7f904beb86df78a95edcae78cb161d4ea2d02bf Binary files /dev/null and b/scaling_performance/2000/.DS_Store differ diff --git a/scaling_performance/2000/L2/.DS_Store b/scaling_performance/2000/L2/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..713fd9972e55d4a20ebcdb7f71e8dd6c0f9f5131 Binary files /dev/null and b/scaling_performance/2000/L2/.DS_Store differ diff --git a/scaling_performance/2000/L2/fold0/all_results.json b/scaling_performance/2000/L2/fold0/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2a890ed04d9dae187ad7ce17693576e9cf71ab65 --- /dev/null +++ b/scaling_performance/2000/L2/fold0/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.575, + "test_loss": 0.6815145611763, + "test_macro_f1": 0.36507936507936506, + "test_runtime": 0.9708, + "test_samples_per_second": 412.051, + "test_steps_per_second": 4.121 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold0/checkpoint-208/config.json b/scaling_performance/2000/L2/fold0/checkpoint-208/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold0/checkpoint-208/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold0/checkpoint-208/trainer_state.json b/scaling_performance/2000/L2/fold0/checkpoint-208/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dc25ad2423cb0f979625c6d639b4dcd2e8eea4ee --- /dev/null +++ b/scaling_performance/2000/L2/fold0/checkpoint-208/trainer_state.json @@ -0,0 +1,770 @@ +{ + "best_metric": 0.6815145611763, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers2_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold0/checkpoint-176", + "epoch": 13.0, + "global_step": 208, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6985, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.6829, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6781, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.7016, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6837, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6822, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6957, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6711, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6868653297424316, + "eval_macro_f1": 0.4007568475653582, + "eval_runtime": 0.9079, + "eval_samples_per_second": 440.586, + "eval_steps_per_second": 4.406, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.687, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6824, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6806, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6971, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.6899, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6869, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6945, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6883, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.57, + "eval_loss": 0.6863564848899841, + "eval_macro_f1": 0.40203031567236824, + "eval_runtime": 0.8835, + "eval_samples_per_second": 452.754, + "eval_steps_per_second": 4.528, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.706, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6818, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.6838, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6986, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.6781, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6807, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6958, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6728, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6860520839691162, + "eval_macro_f1": 0.3988134475939354, + "eval_runtime": 0.9358, + "eval_samples_per_second": 427.462, + "eval_steps_per_second": 4.275, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6949, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6832, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.679, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.7016, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6823, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6809, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6962, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6735, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6853413581848145, + "eval_macro_f1": 0.38949829969921185, + "eval_runtime": 0.9811, + "eval_samples_per_second": 407.707, + "eval_steps_per_second": 4.077, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.7103, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6672, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6879, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.6956, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6704, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6779, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6982, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6842, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.57, + "eval_loss": 0.6844683885574341, + "eval_macro_f1": 0.3630573248407643, + "eval_runtime": 1.1196, + "eval_samples_per_second": 357.275, + "eval_steps_per_second": 3.573, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.6918, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6828, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6874, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6971, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6732, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6778, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6941, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6805, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.57, + "eval_loss": 0.6841095089912415, + "eval_macro_f1": 0.3630573248407643, + "eval_runtime": 1.0842, + "eval_samples_per_second": 368.94, + "eval_steps_per_second": 3.689, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6922, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6873, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6759, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.6972, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6703, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6669, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.7025, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6864, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.683830738067627, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 0.9335, + "eval_samples_per_second": 428.484, + "eval_steps_per_second": 4.285, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.7027, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6771, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6702, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.7039, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6806, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.675, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.6876, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6704, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.683628261089325, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 0.9805, + "eval_samples_per_second": 407.957, + "eval_steps_per_second": 4.08, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.683, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.6854, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.6849, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.7049, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6788, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.6739, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.6886, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.656, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.575, + "eval_loss": 0.682921290397644, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.0912, + "eval_samples_per_second": 366.574, + "eval_steps_per_second": 3.666, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.6808, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6705, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6771, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.7135, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.6682, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6772, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.7109, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6511, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6817663311958313, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 0.9437, + "eval_samples_per_second": 423.851, + "eval_steps_per_second": 4.239, + "step": 160 + }, + { + "epoch": 10.12, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.7026, + "step": 162 + }, + { + "epoch": 10.25, + "learning_rate": 1.3666666666666666e-05, + "loss": 0.6795, + "step": 164 + }, + { + "epoch": 10.38, + "learning_rate": 1.3833333333333334e-05, + "loss": 0.674, + "step": 166 + }, + { + "epoch": 10.5, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6749, + "step": 168 + }, + { + "epoch": 10.62, + "learning_rate": 1.4166666666666668e-05, + "loss": 0.6828, + "step": 170 + }, + { + "epoch": 10.75, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.6673, + "step": 172 + }, + { + "epoch": 10.88, + "learning_rate": 1.45e-05, + "loss": 0.7079, + "step": 174 + }, + { + "epoch": 11.0, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.6569, + "step": 176 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6815145611763, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 0.9326, + "eval_samples_per_second": 428.928, + "eval_steps_per_second": 4.289, + "step": 176 + }, + { + "epoch": 11.12, + "learning_rate": 1.4833333333333336e-05, + "loss": 0.6956, + "step": 178 + }, + { + "epoch": 11.25, + "learning_rate": 1.5e-05, + "loss": 0.6764, + "step": 180 + }, + { + "epoch": 11.38, + "learning_rate": 1.5166666666666668e-05, + "loss": 0.6698, + "step": 182 + }, + { + "epoch": 11.5, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.6934, + "step": 184 + }, + { + "epoch": 11.62, + "learning_rate": 1.55e-05, + "loss": 0.6608, + "step": 186 + }, + { + "epoch": 11.75, + "learning_rate": 1.5666666666666667e-05, + "loss": 0.6743, + "step": 188 + }, + { + "epoch": 11.88, + "learning_rate": 1.5833333333333333e-05, + "loss": 0.6935, + "step": 190 + }, + { + "epoch": 12.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6719, + "step": 192 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6815577745437622, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 0.9976, + "eval_samples_per_second": 400.946, + "eval_steps_per_second": 4.009, + "step": 192 + }, + { + "epoch": 12.12, + "learning_rate": 1.6166666666666665e-05, + "loss": 0.6993, + "step": 194 + }, + { + "epoch": 12.25, + "learning_rate": 1.6333333333333335e-05, + "loss": 0.6802, + "step": 196 + }, + { + "epoch": 12.38, + "learning_rate": 1.65e-05, + "loss": 0.6714, + "step": 198 + }, + { + "epoch": 12.5, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.6889, + "step": 200 + }, + { + "epoch": 12.62, + "learning_rate": 1.6833333333333334e-05, + "loss": 0.6715, + "step": 202 + }, + { + "epoch": 12.75, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6763, + "step": 204 + }, + { + "epoch": 12.88, + "learning_rate": 1.7166666666666666e-05, + "loss": 0.6868, + "step": 206 + }, + { + "epoch": 13.0, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.6516, + "step": 208 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.585, + "eval_loss": 0.6822723150253296, + "eval_macro_f1": 0.40501792114695345, + "eval_runtime": 0.8812, + "eval_samples_per_second": 453.944, + "eval_steps_per_second": 4.539, + "step": 208 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 286523483750400.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L2/fold0/config.json b/scaling_performance/2000/L2/fold0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold0/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold0/eval_results.json b/scaling_performance/2000/L2/fold0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2a890ed04d9dae187ad7ce17693576e9cf71ab65 --- /dev/null +++ b/scaling_performance/2000/L2/fold0/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.575, + "test_loss": 0.6815145611763, + "test_macro_f1": 0.36507936507936506, + "test_runtime": 0.9708, + "test_samples_per_second": 412.051, + "test_steps_per_second": 4.121 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold1/all_results.json b/scaling_performance/2000/L2/fold1/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..185e75d1d39dfbb7d9a7377f6b61fbd62c89ce4b --- /dev/null +++ b/scaling_performance/2000/L2/fold1/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.565, + "test_loss": 0.6680055260658264, + "test_macro_f1": 0.4771006130544537, + "test_runtime": 0.9664, + "test_samples_per_second": 413.918, + "test_steps_per_second": 4.139 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold1/checkpoint-320/config.json b/scaling_performance/2000/L2/fold1/checkpoint-320/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold1/checkpoint-320/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold1/checkpoint-320/trainer_state.json b/scaling_performance/2000/L2/fold1/checkpoint-320/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3cce36f9d100f7583e483daf52a3e9e11e881fdb --- /dev/null +++ b/scaling_performance/2000/L2/fold1/checkpoint-320/trainer_state.json @@ -0,0 +1,1176 @@ +{ + "best_metric": 0.6680055260658264, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers2_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold1/checkpoint-320", + "epoch": 20.0, + "global_step": 320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6891, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.6971, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6939, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6935, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6909, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6984, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6935, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6935, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5175, + "eval_loss": 0.6928809285163879, + "eval_macro_f1": 0.5169898204979009, + "eval_runtime": 0.8872, + "eval_samples_per_second": 450.834, + "eval_steps_per_second": 4.508, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.6901, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6877, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6905, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6956, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.6957, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6918, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6924, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6942, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5175, + "eval_loss": 0.691374659538269, + "eval_macro_f1": 0.5107452262803329, + "eval_runtime": 1.0093, + "eval_samples_per_second": 396.31, + "eval_steps_per_second": 3.963, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.6911, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6922, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.6917, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6915, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.684, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6924, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6935, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6851, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.54, + "eval_loss": 0.6890630125999451, + "eval_macro_f1": 0.4837840870833801, + "eval_runtime": 0.929, + "eval_samples_per_second": 430.549, + "eval_steps_per_second": 4.305, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6922, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6847, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6855, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.6965, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6935, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.684, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6892, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6844, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6868659853935242, + "eval_macro_f1": 0.4407158836689038, + "eval_runtime": 0.9167, + "eval_samples_per_second": 436.364, + "eval_steps_per_second": 4.364, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.695, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6833, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.678, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.6914, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6837, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6733, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6994, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6786, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6852134466171265, + "eval_macro_f1": 0.3752175581717081, + "eval_runtime": 0.9926, + "eval_samples_per_second": 403.001, + "eval_steps_per_second": 4.03, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.7009, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6761, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.68, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6956, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6853, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6761, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6912, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6608, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6843925714492798, + "eval_macro_f1": 0.3702714130209879, + "eval_runtime": 0.9143, + "eval_samples_per_second": 437.474, + "eval_steps_per_second": 4.375, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.689, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6762, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6624, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.7094, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6789, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6724, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.6967, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.679, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.683676540851593, + "eval_macro_f1": 0.36, + "eval_runtime": 0.9489, + "eval_samples_per_second": 421.522, + "eval_steps_per_second": 4.215, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.6991, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6841, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6881, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.6963, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6736, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6722, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.6922, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6501, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.56, + "eval_loss": 0.6837745904922485, + "eval_macro_f1": 0.36413887784963334, + "eval_runtime": 0.978, + "eval_samples_per_second": 408.981, + "eval_steps_per_second": 4.09, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.689, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.6726, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.6784, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.6942, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6816, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.6716, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.6876, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.6843, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6833199858665466, + "eval_macro_f1": 0.36, + "eval_runtime": 1.072, + "eval_samples_per_second": 373.151, + "eval_steps_per_second": 3.732, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.7016, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6703, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6783, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.6892, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.6673, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6707, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.6946, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6606, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6831080913543701, + "eval_macro_f1": 0.36, + "eval_runtime": 0.9603, + "eval_samples_per_second": 416.552, + "eval_steps_per_second": 4.166, + "step": 160 + }, + { + "epoch": 10.12, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6884, + "step": 162 + }, + { + "epoch": 10.25, + "learning_rate": 1.3666666666666666e-05, + "loss": 0.6678, + "step": 164 + }, + { + "epoch": 10.38, + "learning_rate": 1.3833333333333334e-05, + "loss": 0.6673, + "step": 166 + }, + { + "epoch": 10.5, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.697, + "step": 168 + }, + { + "epoch": 10.62, + "learning_rate": 1.4166666666666668e-05, + "loss": 0.6654, + "step": 170 + }, + { + "epoch": 10.75, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.6878, + "step": 172 + }, + { + "epoch": 10.88, + "learning_rate": 1.45e-05, + "loss": 0.6999, + "step": 174 + }, + { + "epoch": 11.0, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.6517, + "step": 176 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6825976371765137, + "eval_macro_f1": 0.36, + "eval_runtime": 0.9579, + "eval_samples_per_second": 417.581, + "eval_steps_per_second": 4.176, + "step": 176 + }, + { + "epoch": 11.12, + "learning_rate": 1.4833333333333336e-05, + "loss": 0.7032, + "step": 178 + }, + { + "epoch": 11.25, + "learning_rate": 1.5e-05, + "loss": 0.6567, + "step": 180 + }, + { + "epoch": 11.38, + "learning_rate": 1.5166666666666668e-05, + "loss": 0.6711, + "step": 182 + }, + { + "epoch": 11.5, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.691, + "step": 184 + }, + { + "epoch": 11.62, + "learning_rate": 1.55e-05, + "loss": 0.659, + "step": 186 + }, + { + "epoch": 11.75, + "learning_rate": 1.5666666666666667e-05, + "loss": 0.6765, + "step": 188 + }, + { + "epoch": 11.88, + "learning_rate": 1.5833333333333333e-05, + "loss": 0.7001, + "step": 190 + }, + { + "epoch": 12.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6475, + "step": 192 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6822869777679443, + "eval_macro_f1": 0.36, + "eval_runtime": 0.9321, + "eval_samples_per_second": 429.124, + "eval_steps_per_second": 4.291, + "step": 192 + }, + { + "epoch": 12.12, + "learning_rate": 1.6166666666666665e-05, + "loss": 0.6934, + "step": 194 + }, + { + "epoch": 12.25, + "learning_rate": 1.6333333333333335e-05, + "loss": 0.6718, + "step": 196 + }, + { + "epoch": 12.38, + "learning_rate": 1.65e-05, + "loss": 0.6756, + "step": 198 + }, + { + "epoch": 12.5, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.6962, + "step": 200 + }, + { + "epoch": 12.62, + "learning_rate": 1.6833333333333334e-05, + "loss": 0.6585, + "step": 202 + }, + { + "epoch": 12.75, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6658, + "step": 204 + }, + { + "epoch": 12.88, + "learning_rate": 1.7166666666666666e-05, + "loss": 0.6825, + "step": 206 + }, + { + "epoch": 13.0, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.6503, + "step": 208 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6819019913673401, + "eval_macro_f1": 0.36, + "eval_runtime": 0.974, + "eval_samples_per_second": 410.695, + "eval_steps_per_second": 4.107, + "step": 208 + }, + { + "epoch": 13.12, + "learning_rate": 1.75e-05, + "loss": 0.6959, + "step": 210 + }, + { + "epoch": 13.25, + "learning_rate": 1.7666666666666668e-05, + "loss": 0.6671, + "step": 212 + }, + { + "epoch": 13.38, + "learning_rate": 1.7833333333333334e-05, + "loss": 0.6638, + "step": 214 + }, + { + "epoch": 13.5, + "learning_rate": 1.8e-05, + "loss": 0.6956, + "step": 216 + }, + { + "epoch": 13.62, + "learning_rate": 1.8166666666666667e-05, + "loss": 0.6559, + "step": 218 + }, + { + "epoch": 13.75, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.6626, + "step": 220 + }, + { + "epoch": 13.88, + "learning_rate": 1.85e-05, + "loss": 0.6882, + "step": 222 + }, + { + "epoch": 14.0, + "learning_rate": 1.866666666666667e-05, + "loss": 0.6718, + "step": 224 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.681367814540863, + "eval_macro_f1": 0.36, + "eval_runtime": 0.8835, + "eval_samples_per_second": 452.731, + "eval_steps_per_second": 4.527, + "step": 224 + }, + { + "epoch": 14.12, + "learning_rate": 1.8833333333333335e-05, + "loss": 0.6819, + "step": 226 + }, + { + "epoch": 14.25, + "learning_rate": 1.9e-05, + "loss": 0.6612, + "step": 228 + }, + { + "epoch": 14.38, + "learning_rate": 1.9166666666666667e-05, + "loss": 0.6404, + "step": 230 + }, + { + "epoch": 14.5, + "learning_rate": 1.9333333333333333e-05, + "loss": 0.6926, + "step": 232 + }, + { + "epoch": 14.62, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.6661, + "step": 234 + }, + { + "epoch": 14.75, + "learning_rate": 1.9666666666666666e-05, + "loss": 0.6755, + "step": 236 + }, + { + "epoch": 14.88, + "learning_rate": 1.9833333333333335e-05, + "loss": 0.6928, + "step": 238 + }, + { + "epoch": 15.0, + "learning_rate": 2e-05, + "loss": 0.6628, + "step": 240 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.5575, + "eval_loss": 0.6806111931800842, + "eval_macro_f1": 0.3579454253611557, + "eval_runtime": 0.931, + "eval_samples_per_second": 429.659, + "eval_steps_per_second": 4.297, + "step": 240 + }, + { + "epoch": 15.12, + "learning_rate": 2.0166666666666668e-05, + "loss": 0.6946, + "step": 242 + }, + { + "epoch": 15.25, + "learning_rate": 2.0333333333333334e-05, + "loss": 0.6502, + "step": 244 + }, + { + "epoch": 15.38, + "learning_rate": 2.05e-05, + "loss": 0.6611, + "step": 246 + }, + { + "epoch": 15.5, + "learning_rate": 2.0666666666666666e-05, + "loss": 0.6792, + "step": 248 + }, + { + "epoch": 15.62, + "learning_rate": 2.0833333333333336e-05, + "loss": 0.6507, + "step": 250 + }, + { + "epoch": 15.75, + "learning_rate": 2.1e-05, + "loss": 0.6608, + "step": 252 + }, + { + "epoch": 15.88, + "learning_rate": 2.116666666666667e-05, + "loss": 0.6996, + "step": 254 + }, + { + "epoch": 16.0, + "learning_rate": 2.1333333333333335e-05, + "loss": 0.6546, + "step": 256 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6797133684158325, + "eval_macro_f1": 0.3752175581717081, + "eval_runtime": 0.889, + "eval_samples_per_second": 449.963, + "eval_steps_per_second": 4.5, + "step": 256 + }, + { + "epoch": 16.12, + "learning_rate": 2.15e-05, + "loss": 0.6848, + "step": 258 + }, + { + "epoch": 16.25, + "learning_rate": 2.1666666666666667e-05, + "loss": 0.6618, + "step": 260 + }, + { + "epoch": 16.38, + "learning_rate": 2.1833333333333333e-05, + "loss": 0.6626, + "step": 262 + }, + { + "epoch": 16.5, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.6863, + "step": 264 + }, + { + "epoch": 16.62, + "learning_rate": 2.216666666666667e-05, + "loss": 0.6545, + "step": 266 + }, + { + "epoch": 16.75, + "learning_rate": 2.2333333333333335e-05, + "loss": 0.6562, + "step": 268 + }, + { + "epoch": 16.88, + "learning_rate": 2.25e-05, + "loss": 0.6782, + "step": 270 + }, + { + "epoch": 17.0, + "learning_rate": 2.2666666666666668e-05, + "loss": 0.6436, + "step": 272 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6785295009613037, + "eval_macro_f1": 0.3917820259283674, + "eval_runtime": 0.9187, + "eval_samples_per_second": 435.418, + "eval_steps_per_second": 4.354, + "step": 272 + }, + { + "epoch": 17.12, + "learning_rate": 2.2833333333333334e-05, + "loss": 0.6759, + "step": 274 + }, + { + "epoch": 17.25, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6623, + "step": 276 + }, + { + "epoch": 17.38, + "learning_rate": 2.3166666666666666e-05, + "loss": 0.667, + "step": 278 + }, + { + "epoch": 17.5, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.676, + "step": 280 + }, + { + "epoch": 17.62, + "learning_rate": 2.35e-05, + "loss": 0.634, + "step": 282 + }, + { + "epoch": 17.75, + "learning_rate": 2.3666666666666668e-05, + "loss": 0.6521, + "step": 284 + }, + { + "epoch": 17.88, + "learning_rate": 2.3833333333333334e-05, + "loss": 0.672, + "step": 286 + }, + { + "epoch": 18.0, + "learning_rate": 2.4e-05, + "loss": 0.655, + "step": 288 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6765074133872986, + "eval_macro_f1": 0.4492916554651286, + "eval_runtime": 0.8818, + "eval_samples_per_second": 453.604, + "eval_steps_per_second": 4.536, + "step": 288 + }, + { + "epoch": 18.12, + "learning_rate": 2.4166666666666667e-05, + "loss": 0.6554, + "step": 290 + }, + { + "epoch": 18.25, + "learning_rate": 2.4333333333333336e-05, + "loss": 0.6581, + "step": 292 + }, + { + "epoch": 18.38, + "learning_rate": 2.45e-05, + "loss": 0.6406, + "step": 294 + }, + { + "epoch": 18.5, + "learning_rate": 2.466666666666667e-05, + "loss": 0.674, + "step": 296 + }, + { + "epoch": 18.62, + "learning_rate": 2.4833333333333335e-05, + "loss": 0.646, + "step": 298 + }, + { + "epoch": 18.75, + "learning_rate": 2.5e-05, + "loss": 0.6509, + "step": 300 + }, + { + "epoch": 18.88, + "learning_rate": 2.5166666666666667e-05, + "loss": 0.6736, + "step": 302 + }, + { + "epoch": 19.0, + "learning_rate": 2.5333333333333337e-05, + "loss": 0.6412, + "step": 304 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.585, + "eval_loss": 0.6732388138771057, + "eval_macro_f1": 0.47361745306950787, + "eval_runtime": 0.8851, + "eval_samples_per_second": 451.932, + "eval_steps_per_second": 4.519, + "step": 304 + }, + { + "epoch": 19.12, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.6649, + "step": 306 + }, + { + "epoch": 19.25, + "learning_rate": 2.5666666666666666e-05, + "loss": 0.6332, + "step": 308 + }, + { + "epoch": 19.38, + "learning_rate": 2.5833333333333336e-05, + "loss": 0.6379, + "step": 310 + }, + { + "epoch": 19.5, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6874, + "step": 312 + }, + { + "epoch": 19.62, + "learning_rate": 2.6166666666666668e-05, + "loss": 0.6256, + "step": 314 + }, + { + "epoch": 19.75, + "learning_rate": 2.633333333333333e-05, + "loss": 0.6299, + "step": 316 + }, + { + "epoch": 19.88, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.651, + "step": 318 + }, + { + "epoch": 20.0, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.6525, + "step": 320 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.565, + "eval_loss": 0.6680055260658264, + "eval_macro_f1": 0.4771006130544537, + "eval_runtime": 0.9151, + "eval_samples_per_second": 437.089, + "eval_steps_per_second": 4.371, + "step": 320 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 440805359616000.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L2/fold1/config.json b/scaling_performance/2000/L2/fold1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold1/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold1/eval_results.json b/scaling_performance/2000/L2/fold1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..185e75d1d39dfbb7d9a7377f6b61fbd62c89ce4b --- /dev/null +++ b/scaling_performance/2000/L2/fold1/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.565, + "test_loss": 0.6680055260658264, + "test_macro_f1": 0.4771006130544537, + "test_runtime": 0.9664, + "test_samples_per_second": 413.918, + "test_steps_per_second": 4.139 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold2/all_results.json b/scaling_performance/2000/L2/fold2/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e2d5156fdf67cc352e71705b92244542685232f3 --- /dev/null +++ b/scaling_performance/2000/L2/fold2/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5725, + "test_loss": 0.6825410723686218, + "test_macro_f1": 0.3640699523052464, + "test_runtime": 0.9, + "test_samples_per_second": 444.435, + "test_steps_per_second": 4.444 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold2/checkpoint-176/config.json b/scaling_performance/2000/L2/fold2/checkpoint-176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold2/checkpoint-176/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold2/checkpoint-176/trainer_state.json b/scaling_performance/2000/L2/fold2/checkpoint-176/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..089ddbe63d7b704e7793d9e2a1cb043882760476 --- /dev/null +++ b/scaling_performance/2000/L2/fold2/checkpoint-176/trainer_state.json @@ -0,0 +1,654 @@ +{ + "best_metric": 0.6825410723686218, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers2_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold2/checkpoint-144", + "epoch": 11.0, + "global_step": 176, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6951, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.6952, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6951, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6935, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6991, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7022, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6951, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6907, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.51, + "eval_loss": 0.6931056380271912, + "eval_macro_f1": 0.5034329001038738, + "eval_runtime": 0.9276, + "eval_samples_per_second": 431.232, + "eval_steps_per_second": 4.312, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.6917, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6957, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6985, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6933, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.6958, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6945, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.697, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6915, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.52, + "eval_loss": 0.6914763450622559, + "eval_macro_f1": 0.5156285476424733, + "eval_runtime": 0.9117, + "eval_samples_per_second": 438.75, + "eval_steps_per_second": 4.387, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.695, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6966, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.693, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6895, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.695, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6897, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6951, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6909, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.5225, + "eval_loss": 0.6889094710350037, + "eval_macro_f1": 0.48419851605803443, + "eval_runtime": 0.9272, + "eval_samples_per_second": 431.396, + "eval_steps_per_second": 4.314, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6979, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6887, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6828, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.6968, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6933, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6863, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6978, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6794, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.555, + "eval_loss": 0.6868128776550293, + "eval_macro_f1": 0.4569031273836766, + "eval_runtime": 0.9184, + "eval_samples_per_second": 435.532, + "eval_steps_per_second": 4.355, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6924, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.682, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6801, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.6992, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6884, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6889, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6991, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6817, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.565, + "eval_loss": 0.6849876642227173, + "eval_macro_f1": 0.4037828947368421, + "eval_runtime": 0.9207, + "eval_samples_per_second": 434.464, + "eval_steps_per_second": 4.345, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.6966, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6822, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6888, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6925, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6816, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6834, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6894, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.68, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6844278573989868, + "eval_macro_f1": 0.4012983677409641, + "eval_runtime": 0.9679, + "eval_samples_per_second": 413.286, + "eval_steps_per_second": 4.133, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6914, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6837, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6772, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.6985, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6801, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6817, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.6974, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6693, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6832872033119202, + "eval_macro_f1": 0.37571150526972935, + "eval_runtime": 1.0002, + "eval_samples_per_second": 399.934, + "eval_steps_per_second": 3.999, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.6856, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6872, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6762, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.707, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6748, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6752, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.6962, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6656, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6828912496566772, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 1.0195, + "eval_samples_per_second": 392.347, + "eval_steps_per_second": 3.923, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.6853, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.6768, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.6818, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.7109, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6753, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.665, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.703, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.6611, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6825410723686218, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 0.8832, + "eval_samples_per_second": 452.887, + "eval_steps_per_second": 4.529, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.699, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6714, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6725, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.7002, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.6654, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6717, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.7024, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6777, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6827515363693237, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 0.9287, + "eval_samples_per_second": 430.704, + "eval_steps_per_second": 4.307, + "step": 160 + }, + { + "epoch": 10.12, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6908, + "step": 162 + }, + { + "epoch": 10.25, + "learning_rate": 1.3666666666666666e-05, + "loss": 0.6851, + "step": 164 + }, + { + "epoch": 10.38, + "learning_rate": 1.3833333333333334e-05, + "loss": 0.6769, + "step": 166 + }, + { + "epoch": 10.5, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6925, + "step": 168 + }, + { + "epoch": 10.62, + "learning_rate": 1.4166666666666668e-05, + "loss": 0.669, + "step": 170 + }, + { + "epoch": 10.75, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.6761, + "step": 172 + }, + { + "epoch": 10.88, + "learning_rate": 1.45e-05, + "loss": 0.6964, + "step": 174 + }, + { + "epoch": 11.0, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.6529, + "step": 176 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6835572719573975, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 0.9118, + "eval_samples_per_second": 438.682, + "eval_steps_per_second": 4.387, + "step": 176 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 242442947788800.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L2/fold2/config.json b/scaling_performance/2000/L2/fold2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold2/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold2/eval_results.json b/scaling_performance/2000/L2/fold2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e2d5156fdf67cc352e71705b92244542685232f3 --- /dev/null +++ b/scaling_performance/2000/L2/fold2/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5725, + "test_loss": 0.6825410723686218, + "test_macro_f1": 0.3640699523052464, + "test_runtime": 0.9, + "test_samples_per_second": 444.435, + "test_steps_per_second": 4.444 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold3/all_results.json b/scaling_performance/2000/L2/fold3/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d9bf5fe0efeb36d318924dc904bccebb0b5d321f --- /dev/null +++ b/scaling_performance/2000/L2/fold3/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5425, + "test_loss": 0.6928573846817017, + "test_macro_f1": 0.36142928178241485, + "test_runtime": 0.9417, + "test_samples_per_second": 424.768, + "test_steps_per_second": 4.248 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold3/checkpoint-128/config.json b/scaling_performance/2000/L2/fold3/checkpoint-128/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold3/checkpoint-128/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold3/checkpoint-128/trainer_state.json b/scaling_performance/2000/L2/fold3/checkpoint-128/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e1fc20683376358fe628db79a7214050c4d20dea --- /dev/null +++ b/scaling_performance/2000/L2/fold3/checkpoint-128/trainer_state.json @@ -0,0 +1,480 @@ +{ + "best_metric": 0.6928573846817017, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers2_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold3/checkpoint-96", + "epoch": 8.0, + "global_step": 128, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6926, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.7166, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.718, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6976, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.723, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7089, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.7068, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6981, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.4475, + "eval_loss": 0.7057616710662842, + "eval_macro_f1": 0.32394710880321814, + "eval_runtime": 0.9148, + "eval_samples_per_second": 437.24, + "eval_steps_per_second": 4.372, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.6998, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.7138, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.714, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.699, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.7097, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.7077, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6976, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.719, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.45, + "eval_loss": 0.7029770612716675, + "eval_macro_f1": 0.34523809523809523, + "eval_runtime": 0.8837, + "eval_samples_per_second": 452.645, + "eval_steps_per_second": 4.526, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.6957, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.7042, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.7073, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6925, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.7099, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6996, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6964, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7045, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.44, + "eval_loss": 0.6990571618080139, + "eval_macro_f1": 0.3783131192584164, + "eval_runtime": 0.9721, + "eval_samples_per_second": 411.464, + "eval_steps_per_second": 4.115, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6963, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6986, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6979, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.6959, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6955, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6924, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6915, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6959, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5, + "eval_loss": 0.6955833435058594, + "eval_macro_f1": 0.4666666666666667, + "eval_runtime": 0.9061, + "eval_samples_per_second": 441.46, + "eval_steps_per_second": 4.415, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6924, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6883, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6917, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.694, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.686, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6822, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6976, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6852, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5375, + "eval_loss": 0.6935336589813232, + "eval_macro_f1": 0.397153587343484, + "eval_runtime": 1.0976, + "eval_samples_per_second": 364.418, + "eval_steps_per_second": 3.644, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.6904, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6867, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6842, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6996, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6824, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6809, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6865, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6746, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5425, + "eval_loss": 0.6928573846817017, + "eval_macro_f1": 0.36142928178241485, + "eval_runtime": 0.9106, + "eval_samples_per_second": 439.253, + "eval_steps_per_second": 4.393, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6961, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6679, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.683, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.7001, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6785, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6704, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.6951, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.681, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.54, + "eval_loss": 0.6930812001228333, + "eval_macro_f1": 0.35064935064935066, + "eval_runtime": 0.8868, + "eval_samples_per_second": 451.047, + "eval_steps_per_second": 4.51, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.699, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6715, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6644, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.7048, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6792, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6736, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.6959, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6655, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.54, + "eval_loss": 0.6934822797775269, + "eval_macro_f1": 0.35064935064935066, + "eval_runtime": 0.9074, + "eval_samples_per_second": 440.807, + "eval_steps_per_second": 4.408, + "step": 128 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 176322143846400.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L2/fold3/config.json b/scaling_performance/2000/L2/fold3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold3/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold3/eval_results.json b/scaling_performance/2000/L2/fold3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d9bf5fe0efeb36d318924dc904bccebb0b5d321f --- /dev/null +++ b/scaling_performance/2000/L2/fold3/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5425, + "test_loss": 0.6928573846817017, + "test_macro_f1": 0.36142928178241485, + "test_runtime": 0.9417, + "test_samples_per_second": 424.768, + "test_steps_per_second": 4.248 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold4/all_results.json b/scaling_performance/2000/L2/fold4/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb18a0b01420505b67575bf8567ad8f273785b5b --- /dev/null +++ b/scaling_performance/2000/L2/fold4/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5775, + "test_loss": 0.6842468976974487, + "test_macro_f1": 0.38194285714285714, + "test_runtime": 1.0481, + "test_samples_per_second": 381.652, + "test_steps_per_second": 3.817 +} \ No newline at end of file diff --git a/scaling_performance/2000/L2/fold4/checkpoint-160/config.json b/scaling_performance/2000/L2/fold4/checkpoint-160/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold4/checkpoint-160/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold4/checkpoint-160/trainer_state.json b/scaling_performance/2000/L2/fold4/checkpoint-160/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..25e64ee5e38b427172bb036f84d88844c368a381 --- /dev/null +++ b/scaling_performance/2000/L2/fold4/checkpoint-160/trainer_state.json @@ -0,0 +1,596 @@ +{ + "best_metric": 0.6842468976974487, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers2_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold4/checkpoint-128", + "epoch": 10.0, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6937, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.7018, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.7031, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6949, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.694, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7025, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6975, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.7029, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.4575, + "eval_loss": 0.6974406242370605, + "eval_macro_f1": 0.4533144386710754, + "eval_runtime": 0.9243, + "eval_samples_per_second": 432.744, + "eval_steps_per_second": 4.327, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.6934, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.7034, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6945, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6894, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.7029, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.7002, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6943, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6983, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.485, + "eval_loss": 0.695452094078064, + "eval_macro_f1": 0.4841746794871795, + "eval_runtime": 0.9144, + "eval_samples_per_second": 437.423, + "eval_steps_per_second": 4.374, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.6959, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6983, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.6905, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6929, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.694, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6945, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6967, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6968, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.495, + "eval_loss": 0.6920776963233948, + "eval_macro_f1": 0.47608673098869175, + "eval_runtime": 0.9068, + "eval_samples_per_second": 441.113, + "eval_steps_per_second": 4.411, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6908, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6944, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6914, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.6911, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6893, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6949, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6949, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6868, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5325, + "eval_loss": 0.6893474459648132, + "eval_macro_f1": 0.46399524188289587, + "eval_runtime": 0.9384, + "eval_samples_per_second": 426.254, + "eval_steps_per_second": 4.263, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6984, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6822, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6864, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.7002, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6897, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6903, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6941, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6779, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6872670650482178, + "eval_macro_f1": 0.41748903508771934, + "eval_runtime": 0.928, + "eval_samples_per_second": 431.013, + "eval_steps_per_second": 4.31, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.7031, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6892, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6823, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6936, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6907, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.681, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6965, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6675, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.58, + "eval_loss": 0.6864795088768005, + "eval_macro_f1": 0.4159365874009178, + "eval_runtime": 0.9734, + "eval_samples_per_second": 410.925, + "eval_steps_per_second": 4.109, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6914, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6865, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6801, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.699, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6816, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6803, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.6934, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6765, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6850780248641968, + "eval_macro_f1": 0.38696484116404134, + "eval_runtime": 0.9277, + "eval_samples_per_second": 431.159, + "eval_steps_per_second": 4.312, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.6959, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6815, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6763, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.698, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6784, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6672, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.7066, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6902, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6842468976974487, + "eval_macro_f1": 0.38194285714285714, + "eval_runtime": 0.884, + "eval_samples_per_second": 452.506, + "eval_steps_per_second": 4.525, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.6862, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.6784, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.6774, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.7004, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6755, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.68, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.7023, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.6802, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6846153140068054, + "eval_macro_f1": 0.38696484116404134, + "eval_runtime": 0.9145, + "eval_samples_per_second": 437.397, + "eval_steps_per_second": 4.374, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.6962, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6717, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6783, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.7028, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.683, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6835, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.687, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6693, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.5875, + "eval_loss": 0.6855114698410034, + "eval_macro_f1": 0.4199077125906394, + "eval_runtime": 1.013, + "eval_samples_per_second": 394.875, + "eval_steps_per_second": 3.949, + "step": 160 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 220402679808000.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L2/fold4/config.json b/scaling_performance/2000/L2/fold4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e65edc258393bd4895cab39ec2c8922a43c44c01 --- /dev/null +++ b/scaling_performance/2000/L2/fold4/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L2/fold4/eval_results.json b/scaling_performance/2000/L2/fold4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb18a0b01420505b67575bf8567ad8f273785b5b --- /dev/null +++ b/scaling_performance/2000/L2/fold4/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5775, + "test_loss": 0.6842468976974487, + "test_macro_f1": 0.38194285714285714, + "test_runtime": 1.0481, + "test_samples_per_second": 381.652, + "test_steps_per_second": 3.817 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/.DS_Store b/scaling_performance/2000/L4/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..713fd9972e55d4a20ebcdb7f71e8dd6c0f9f5131 Binary files /dev/null and b/scaling_performance/2000/L4/.DS_Store differ diff --git a/scaling_performance/2000/L4/fold0/all_results.json b/scaling_performance/2000/L4/fold0/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4ef171236c0d5af1a96d6048054a4b6f097f3117 --- /dev/null +++ b/scaling_performance/2000/L4/fold0/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.575, + "test_loss": 0.6820950508117676, + "test_macro_f1": 0.36507936507936506, + "test_runtime": 1.1206, + "test_samples_per_second": 356.963, + "test_steps_per_second": 3.57 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold0/checkpoint-208/config.json b/scaling_performance/2000/L4/fold0/checkpoint-208/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold0/checkpoint-208/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold0/checkpoint-208/trainer_state.json b/scaling_performance/2000/L4/fold0/checkpoint-208/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cbd07cbac6d047dea55cd848daa7e518ed5895ae --- /dev/null +++ b/scaling_performance/2000/L4/fold0/checkpoint-208/trainer_state.json @@ -0,0 +1,770 @@ +{ + "best_metric": 0.6820950508117676, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers4_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold0/checkpoint-176", + "epoch": 13.0, + "global_step": 208, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.7054, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.6834, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6727, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6992, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.686, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.68, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6949, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6664, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6863978505134583, + "eval_macro_f1": 0.3704636350170345, + "eval_runtime": 1.1223, + "eval_samples_per_second": 356.401, + "eval_steps_per_second": 3.564, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.6883, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6829, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6752, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7028, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.6865, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6806, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6967, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6913, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6860420107841492, + "eval_macro_f1": 0.37150773064085757, + "eval_runtime": 1.1219, + "eval_samples_per_second": 356.528, + "eval_steps_per_second": 3.565, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.7063, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6795, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.6816, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.698, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.677, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6769, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6981, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6727, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6859269738197327, + "eval_macro_f1": 0.37150773064085757, + "eval_runtime": 1.199, + "eval_samples_per_second": 333.604, + "eval_steps_per_second": 3.336, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.7026, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6807, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6795, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.7013, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6788, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6777, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6967, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6749, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6854825615882874, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.1334, + "eval_samples_per_second": 352.933, + "eval_steps_per_second": 3.529, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.7027, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6667, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6906, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.7023, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6743, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6708, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6992, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.685, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6849209666252136, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.0714, + "eval_samples_per_second": 373.352, + "eval_steps_per_second": 3.734, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.6917, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.684, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6846, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6967, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.673, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6808, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6873, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6798, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6847447156906128, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.2096, + "eval_samples_per_second": 330.697, + "eval_steps_per_second": 3.307, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6922, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6879, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6686, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.7015, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6721, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6711, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.7008, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6833, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6845291256904602, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.0732, + "eval_samples_per_second": 372.73, + "eval_steps_per_second": 3.727, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.7052, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6767, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6733, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.7014, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6805, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6752, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.6834, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6714, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6844103932380676, + "eval_macro_f1": 0.37150773064085757, + "eval_runtime": 1.1163, + "eval_samples_per_second": 358.34, + "eval_steps_per_second": 3.583, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.682, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.6834, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.6834, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.7047, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6771, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.6801, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.6895, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.65, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6835783123970032, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.1415, + "eval_samples_per_second": 350.408, + "eval_steps_per_second": 3.504, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.6848, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6753, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6681, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.7129, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.6681, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6715, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.7159, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6494, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6823403835296631, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.2482, + "eval_samples_per_second": 320.459, + "eval_steps_per_second": 3.205, + "step": 160 + }, + { + "epoch": 10.12, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6997, + "step": 162 + }, + { + "epoch": 10.25, + "learning_rate": 1.3666666666666666e-05, + "loss": 0.6782, + "step": 164 + }, + { + "epoch": 10.38, + "learning_rate": 1.3833333333333334e-05, + "loss": 0.6738, + "step": 166 + }, + { + "epoch": 10.5, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6772, + "step": 168 + }, + { + "epoch": 10.62, + "learning_rate": 1.4166666666666668e-05, + "loss": 0.6782, + "step": 170 + }, + { + "epoch": 10.75, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.6663, + "step": 172 + }, + { + "epoch": 10.88, + "learning_rate": 1.45e-05, + "loss": 0.7095, + "step": 174 + }, + { + "epoch": 11.0, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.6549, + "step": 176 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.575, + "eval_loss": 0.6820950508117676, + "eval_macro_f1": 0.36507936507936506, + "eval_runtime": 1.1022, + "eval_samples_per_second": 362.906, + "eval_steps_per_second": 3.629, + "step": 176 + }, + { + "epoch": 11.12, + "learning_rate": 1.4833333333333336e-05, + "loss": 0.6936, + "step": 178 + }, + { + "epoch": 11.25, + "learning_rate": 1.5e-05, + "loss": 0.6768, + "step": 180 + }, + { + "epoch": 11.38, + "learning_rate": 1.5166666666666668e-05, + "loss": 0.6692, + "step": 182 + }, + { + "epoch": 11.5, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.6876, + "step": 184 + }, + { + "epoch": 11.62, + "learning_rate": 1.55e-05, + "loss": 0.6642, + "step": 186 + }, + { + "epoch": 11.75, + "learning_rate": 1.5666666666666667e-05, + "loss": 0.6752, + "step": 188 + }, + { + "epoch": 11.88, + "learning_rate": 1.5833333333333333e-05, + "loss": 0.6933, + "step": 190 + }, + { + "epoch": 12.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6803, + "step": 192 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.682160496711731, + "eval_macro_f1": 0.37150773064085757, + "eval_runtime": 1.0867, + "eval_samples_per_second": 368.078, + "eval_steps_per_second": 3.681, + "step": 192 + }, + { + "epoch": 12.12, + "learning_rate": 1.6166666666666665e-05, + "loss": 0.7001, + "step": 194 + }, + { + "epoch": 12.25, + "learning_rate": 1.6333333333333335e-05, + "loss": 0.6741, + "step": 196 + }, + { + "epoch": 12.38, + "learning_rate": 1.65e-05, + "loss": 0.6719, + "step": 198 + }, + { + "epoch": 12.5, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.6893, + "step": 200 + }, + { + "epoch": 12.62, + "learning_rate": 1.6833333333333334e-05, + "loss": 0.6698, + "step": 202 + }, + { + "epoch": 12.75, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6743, + "step": 204 + }, + { + "epoch": 12.88, + "learning_rate": 1.7166666666666666e-05, + "loss": 0.6883, + "step": 206 + }, + { + "epoch": 13.0, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.6503, + "step": 208 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.58, + "eval_loss": 0.6827612519264221, + "eval_macro_f1": 0.3830560757959678, + "eval_runtime": 1.1101, + "eval_samples_per_second": 360.318, + "eval_steps_per_second": 3.603, + "step": 208 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 555968928153600.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L4/fold0/config.json b/scaling_performance/2000/L4/fold0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold0/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold0/eval_results.json b/scaling_performance/2000/L4/fold0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4ef171236c0d5af1a96d6048054a4b6f097f3117 --- /dev/null +++ b/scaling_performance/2000/L4/fold0/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.575, + "test_loss": 0.6820950508117676, + "test_macro_f1": 0.36507936507936506, + "test_runtime": 1.1206, + "test_samples_per_second": 356.963, + "test_steps_per_second": 3.57 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold1/all_results.json b/scaling_performance/2000/L4/fold1/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0eda9c5aa6e8bc718e840274668ce59922899235 --- /dev/null +++ b/scaling_performance/2000/L4/fold1/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.6025, + "test_loss": 0.6501033902168274, + "test_macro_f1": 0.5765617613965472, + "test_runtime": 1.0691, + "test_samples_per_second": 374.158, + "test_steps_per_second": 3.742 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold1/checkpoint-320/config.json b/scaling_performance/2000/L4/fold1/checkpoint-320/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold1/checkpoint-320/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold1/checkpoint-320/trainer_state.json b/scaling_performance/2000/L4/fold1/checkpoint-320/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5511bfd2ab8485231bed84dbf5579b44e3eca76e --- /dev/null +++ b/scaling_performance/2000/L4/fold1/checkpoint-320/trainer_state.json @@ -0,0 +1,1176 @@ +{ + "best_metric": 0.6501033902168274, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers4_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold1/checkpoint-320", + "epoch": 20.0, + "global_step": 320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6968, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.6823, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6887, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6899, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6783, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6856, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6903, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6805, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5925, + "eval_loss": 0.6838886737823486, + "eval_macro_f1": 0.4353951800760311, + "eval_runtime": 1.1995, + "eval_samples_per_second": 333.478, + "eval_steps_per_second": 3.335, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.696, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6879, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6825, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6948, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.6753, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6828, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6947, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6785, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5925, + "eval_loss": 0.6835730075836182, + "eval_macro_f1": 0.4312184313143914, + "eval_runtime": 1.1121, + "eval_samples_per_second": 359.692, + "eval_steps_per_second": 3.597, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.6944, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6794, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.6868, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6993, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.6775, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6831, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6893, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6733, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6828555464744568, + "eval_macro_f1": 0.37970998721331994, + "eval_runtime": 1.0735, + "eval_samples_per_second": 372.607, + "eval_steps_per_second": 3.726, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6887, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6732, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6738, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.7063, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6928, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6805, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6933, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6643, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6821349859237671, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.2162, + "eval_samples_per_second": 328.882, + "eval_steps_per_second": 3.289, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6995, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6752, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6691, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.6907, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6876, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.668, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.7008, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6813, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6816044449806213, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.1264, + "eval_samples_per_second": 355.105, + "eval_steps_per_second": 3.551, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.7075, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6692, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6761, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.7005, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6846, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6708, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6929, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6608, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6814809441566467, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.0689, + "eval_samples_per_second": 374.209, + "eval_steps_per_second": 3.742, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6881, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6718, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6604, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.7045, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6827, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6657, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.705, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6781, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.565, + "eval_loss": 0.681130051612854, + "eval_macro_f1": 0.3610223642172524, + "eval_runtime": 1.125, + "eval_samples_per_second": 355.558, + "eval_steps_per_second": 3.556, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.6963, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6823, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6817, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.7042, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6723, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6753, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.6875, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6456, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.681651771068573, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.0931, + "eval_samples_per_second": 365.947, + "eval_steps_per_second": 3.659, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.6914, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.673, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.675, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.6956, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6771, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.6728, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.6846, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.6766, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6811038851737976, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.1076, + "eval_samples_per_second": 361.146, + "eval_steps_per_second": 3.611, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.6936, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6697, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6804, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.6915, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.6697, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6739, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.6938, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6678, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6808483600616455, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.0842, + "eval_samples_per_second": 368.92, + "eval_steps_per_second": 3.689, + "step": 160 + }, + { + "epoch": 10.12, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6873, + "step": 162 + }, + { + "epoch": 10.25, + "learning_rate": 1.3666666666666666e-05, + "loss": 0.6714, + "step": 164 + }, + { + "epoch": 10.38, + "learning_rate": 1.3833333333333334e-05, + "loss": 0.6646, + "step": 166 + }, + { + "epoch": 10.5, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6975, + "step": 168 + }, + { + "epoch": 10.62, + "learning_rate": 1.4166666666666668e-05, + "loss": 0.6645, + "step": 170 + }, + { + "epoch": 10.75, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.6868, + "step": 172 + }, + { + "epoch": 10.88, + "learning_rate": 1.45e-05, + "loss": 0.6971, + "step": 174 + }, + { + "epoch": 11.0, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.6485, + "step": 176 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6800907850265503, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.0751, + "eval_samples_per_second": 372.049, + "eval_steps_per_second": 3.72, + "step": 176 + }, + { + "epoch": 11.12, + "learning_rate": 1.4833333333333336e-05, + "loss": 0.7018, + "step": 178 + }, + { + "epoch": 11.25, + "learning_rate": 1.5e-05, + "loss": 0.6526, + "step": 180 + }, + { + "epoch": 11.38, + "learning_rate": 1.5166666666666668e-05, + "loss": 0.6648, + "step": 182 + }, + { + "epoch": 11.5, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.6932, + "step": 184 + }, + { + "epoch": 11.62, + "learning_rate": 1.55e-05, + "loss": 0.6572, + "step": 186 + }, + { + "epoch": 11.75, + "learning_rate": 1.5666666666666667e-05, + "loss": 0.6796, + "step": 188 + }, + { + "epoch": 11.88, + "learning_rate": 1.5833333333333333e-05, + "loss": 0.7018, + "step": 190 + }, + { + "epoch": 12.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6509, + "step": 192 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6796495318412781, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.1087, + "eval_samples_per_second": 360.786, + "eval_steps_per_second": 3.608, + "step": 192 + }, + { + "epoch": 12.12, + "learning_rate": 1.6166666666666665e-05, + "loss": 0.6939, + "step": 194 + }, + { + "epoch": 12.25, + "learning_rate": 1.6333333333333335e-05, + "loss": 0.6679, + "step": 196 + }, + { + "epoch": 12.38, + "learning_rate": 1.65e-05, + "loss": 0.6673, + "step": 198 + }, + { + "epoch": 12.5, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.7003, + "step": 200 + }, + { + "epoch": 12.62, + "learning_rate": 1.6833333333333334e-05, + "loss": 0.6588, + "step": 202 + }, + { + "epoch": 12.75, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6595, + "step": 204 + }, + { + "epoch": 12.88, + "learning_rate": 1.7166666666666666e-05, + "loss": 0.6808, + "step": 206 + }, + { + "epoch": 13.0, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.6625, + "step": 208 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.57, + "eval_loss": 0.6790809631347656, + "eval_macro_f1": 0.3735431235431235, + "eval_runtime": 1.07, + "eval_samples_per_second": 373.831, + "eval_steps_per_second": 3.738, + "step": 208 + }, + { + "epoch": 13.12, + "learning_rate": 1.75e-05, + "loss": 0.6897, + "step": 210 + }, + { + "epoch": 13.25, + "learning_rate": 1.7666666666666668e-05, + "loss": 0.6605, + "step": 212 + }, + { + "epoch": 13.38, + "learning_rate": 1.7833333333333334e-05, + "loss": 0.6618, + "step": 214 + }, + { + "epoch": 13.5, + "learning_rate": 1.8e-05, + "loss": 0.6938, + "step": 216 + }, + { + "epoch": 13.62, + "learning_rate": 1.8166666666666667e-05, + "loss": 0.6514, + "step": 218 + }, + { + "epoch": 13.75, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.6627, + "step": 220 + }, + { + "epoch": 13.88, + "learning_rate": 1.85e-05, + "loss": 0.6814, + "step": 222 + }, + { + "epoch": 14.0, + "learning_rate": 1.866666666666667e-05, + "loss": 0.6725, + "step": 224 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.57, + "eval_loss": 0.6779997944831848, + "eval_macro_f1": 0.3735431235431235, + "eval_runtime": 1.09, + "eval_samples_per_second": 366.986, + "eval_steps_per_second": 3.67, + "step": 224 + }, + { + "epoch": 14.12, + "learning_rate": 1.8833333333333335e-05, + "loss": 0.6773, + "step": 226 + }, + { + "epoch": 14.25, + "learning_rate": 1.9e-05, + "loss": 0.6551, + "step": 228 + }, + { + "epoch": 14.38, + "learning_rate": 1.9166666666666667e-05, + "loss": 0.645, + "step": 230 + }, + { + "epoch": 14.5, + "learning_rate": 1.9333333333333333e-05, + "loss": 0.6858, + "step": 232 + }, + { + "epoch": 14.62, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.6615, + "step": 234 + }, + { + "epoch": 14.75, + "learning_rate": 1.9666666666666666e-05, + "loss": 0.6648, + "step": 236 + }, + { + "epoch": 14.88, + "learning_rate": 1.9833333333333335e-05, + "loss": 0.6945, + "step": 238 + }, + { + "epoch": 15.0, + "learning_rate": 2e-05, + "loss": 0.664, + "step": 240 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.676581621170044, + "eval_macro_f1": 0.37970998721331994, + "eval_runtime": 1.1251, + "eval_samples_per_second": 355.515, + "eval_steps_per_second": 3.555, + "step": 240 + }, + { + "epoch": 15.12, + "learning_rate": 2.0166666666666668e-05, + "loss": 0.6893, + "step": 242 + }, + { + "epoch": 15.25, + "learning_rate": 2.0333333333333334e-05, + "loss": 0.6482, + "step": 244 + }, + { + "epoch": 15.38, + "learning_rate": 2.05e-05, + "loss": 0.6561, + "step": 246 + }, + { + "epoch": 15.5, + "learning_rate": 2.0666666666666666e-05, + "loss": 0.676, + "step": 248 + }, + { + "epoch": 15.62, + "learning_rate": 2.0833333333333336e-05, + "loss": 0.6469, + "step": 250 + }, + { + "epoch": 15.75, + "learning_rate": 2.1e-05, + "loss": 0.6502, + "step": 252 + }, + { + "epoch": 15.88, + "learning_rate": 2.116666666666667e-05, + "loss": 0.69, + "step": 254 + }, + { + "epoch": 16.0, + "learning_rate": 2.1333333333333335e-05, + "loss": 0.6424, + "step": 256 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.67472243309021, + "eval_macro_f1": 0.38004268848916406, + "eval_runtime": 1.087, + "eval_samples_per_second": 367.976, + "eval_steps_per_second": 3.68, + "step": 256 + }, + { + "epoch": 16.12, + "learning_rate": 2.15e-05, + "loss": 0.674, + "step": 258 + }, + { + "epoch": 16.25, + "learning_rate": 2.1666666666666667e-05, + "loss": 0.6524, + "step": 260 + }, + { + "epoch": 16.38, + "learning_rate": 2.1833333333333333e-05, + "loss": 0.6601, + "step": 262 + }, + { + "epoch": 16.5, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.6738, + "step": 264 + }, + { + "epoch": 16.62, + "learning_rate": 2.216666666666667e-05, + "loss": 0.6426, + "step": 266 + }, + { + "epoch": 16.75, + "learning_rate": 2.2333333333333335e-05, + "loss": 0.6442, + "step": 268 + }, + { + "epoch": 16.88, + "learning_rate": 2.25e-05, + "loss": 0.6747, + "step": 270 + }, + { + "epoch": 17.0, + "learning_rate": 2.2666666666666668e-05, + "loss": 0.6318, + "step": 272 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.5925, + "eval_loss": 0.6718035936355591, + "eval_macro_f1": 0.49046178854164224, + "eval_runtime": 1.0788, + "eval_samples_per_second": 370.789, + "eval_steps_per_second": 3.708, + "step": 272 + }, + { + "epoch": 17.12, + "learning_rate": 2.2833333333333334e-05, + "loss": 0.6695, + "step": 274 + }, + { + "epoch": 17.25, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6515, + "step": 276 + }, + { + "epoch": 17.38, + "learning_rate": 2.3166666666666666e-05, + "loss": 0.6501, + "step": 278 + }, + { + "epoch": 17.5, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.6595, + "step": 280 + }, + { + "epoch": 17.62, + "learning_rate": 2.35e-05, + "loss": 0.6255, + "step": 282 + }, + { + "epoch": 17.75, + "learning_rate": 2.3666666666666668e-05, + "loss": 0.6335, + "step": 284 + }, + { + "epoch": 17.88, + "learning_rate": 2.3833333333333334e-05, + "loss": 0.6627, + "step": 286 + }, + { + "epoch": 18.0, + "learning_rate": 2.4e-05, + "loss": 0.629, + "step": 288 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.6025, + "eval_loss": 0.6667739152908325, + "eval_macro_f1": 0.516125350923242, + "eval_runtime": 1.1647, + "eval_samples_per_second": 343.422, + "eval_steps_per_second": 3.434, + "step": 288 + }, + { + "epoch": 18.12, + "learning_rate": 2.4166666666666667e-05, + "loss": 0.6506, + "step": 290 + }, + { + "epoch": 18.25, + "learning_rate": 2.4333333333333336e-05, + "loss": 0.631, + "step": 292 + }, + { + "epoch": 18.38, + "learning_rate": 2.45e-05, + "loss": 0.6132, + "step": 294 + }, + { + "epoch": 18.5, + "learning_rate": 2.466666666666667e-05, + "loss": 0.6558, + "step": 296 + }, + { + "epoch": 18.62, + "learning_rate": 2.4833333333333335e-05, + "loss": 0.6165, + "step": 298 + }, + { + "epoch": 18.75, + "learning_rate": 2.5e-05, + "loss": 0.6268, + "step": 300 + }, + { + "epoch": 18.88, + "learning_rate": 2.5166666666666667e-05, + "loss": 0.6561, + "step": 302 + }, + { + "epoch": 19.0, + "learning_rate": 2.5333333333333337e-05, + "loss": 0.6098, + "step": 304 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.61, + "eval_loss": 0.6590699553489685, + "eval_macro_f1": 0.568536342515765, + "eval_runtime": 1.1087, + "eval_samples_per_second": 360.779, + "eval_steps_per_second": 3.608, + "step": 304 + }, + { + "epoch": 19.12, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.6329, + "step": 306 + }, + { + "epoch": 19.25, + "learning_rate": 2.5666666666666666e-05, + "loss": 0.5953, + "step": 308 + }, + { + "epoch": 19.38, + "learning_rate": 2.5833333333333336e-05, + "loss": 0.5907, + "step": 310 + }, + { + "epoch": 19.5, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6452, + "step": 312 + }, + { + "epoch": 19.62, + "learning_rate": 2.6166666666666668e-05, + "loss": 0.5927, + "step": 314 + }, + { + "epoch": 19.75, + "learning_rate": 2.633333333333333e-05, + "loss": 0.6043, + "step": 316 + }, + { + "epoch": 19.88, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.6168, + "step": 318 + }, + { + "epoch": 20.0, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.6312, + "step": 320 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.6025, + "eval_loss": 0.6501033902168274, + "eval_macro_f1": 0.5765617613965472, + "eval_runtime": 1.0695, + "eval_samples_per_second": 374.002, + "eval_steps_per_second": 3.74, + "step": 320 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 855336812544000.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L4/fold1/config.json b/scaling_performance/2000/L4/fold1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold1/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold1/eval_results.json b/scaling_performance/2000/L4/fold1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0eda9c5aa6e8bc718e840274668ce59922899235 --- /dev/null +++ b/scaling_performance/2000/L4/fold1/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.6025, + "test_loss": 0.6501033902168274, + "test_macro_f1": 0.5765617613965472, + "test_runtime": 1.0691, + "test_samples_per_second": 374.158, + "test_steps_per_second": 3.742 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold2/all_results.json b/scaling_performance/2000/L4/fold2/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..00bf1561a1f96311fac63490ee55483e105d36b2 --- /dev/null +++ b/scaling_performance/2000/L4/fold2/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5725, + "test_loss": 0.6870080828666687, + "test_macro_f1": 0.3746285714285715, + "test_runtime": 1.1028, + "test_samples_per_second": 362.72, + "test_steps_per_second": 3.627 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold2/checkpoint-176/config.json b/scaling_performance/2000/L4/fold2/checkpoint-176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold2/checkpoint-176/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold2/checkpoint-176/trainer_state.json b/scaling_performance/2000/L4/fold2/checkpoint-176/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..edef4afccee5d37045bb4508349019afa44f5830 --- /dev/null +++ b/scaling_performance/2000/L4/fold2/checkpoint-176/trainer_state.json @@ -0,0 +1,654 @@ +{ + "best_metric": 0.6870080828666687, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers4_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold2/checkpoint-144", + "epoch": 11.0, + "global_step": 176, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6894, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.6904, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6886, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6976, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6826, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6875, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6929, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6785, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5325, + "eval_loss": 0.6903218030929565, + "eval_macro_f1": 0.41212992871682425, + "eval_runtime": 1.1311, + "eval_samples_per_second": 353.644, + "eval_steps_per_second": 3.536, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.7031, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6875, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6885, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6939, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.6791, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6777, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6922, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6888, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5475, + "eval_loss": 0.6896157264709473, + "eval_macro_f1": 0.42094360598571556, + "eval_runtime": 1.1158, + "eval_samples_per_second": 358.473, + "eval_steps_per_second": 3.585, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.6953, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6884, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.681, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6901, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.6818, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6891, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.6946, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6736, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.565, + "eval_loss": 0.6884350776672363, + "eval_macro_f1": 0.4037828947368421, + "eval_runtime": 1.2491, + "eval_samples_per_second": 320.242, + "eval_steps_per_second": 3.202, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6931, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6844, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6748, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.6965, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6859, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6835, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6947, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6711, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6877972483634949, + "eval_macro_f1": 0.3871279149064307, + "eval_runtime": 1.1515, + "eval_samples_per_second": 347.367, + "eval_steps_per_second": 3.474, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6906, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6733, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6821, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.7026, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6734, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6782, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6937, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6811, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.57, + "eval_loss": 0.6872588992118835, + "eval_macro_f1": 0.38351254480286734, + "eval_runtime": 1.2012, + "eval_samples_per_second": 333.012, + "eval_steps_per_second": 3.33, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.7024, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.677, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6837, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6908, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6805, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6789, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6878, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6707, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6876999735832214, + "eval_macro_f1": 0.39632385654840313, + "eval_runtime": 1.0921, + "eval_samples_per_second": 366.259, + "eval_steps_per_second": 3.663, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6923, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6788, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6735, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.699, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6727, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6805, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.6942, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6672, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6871394515037537, + "eval_macro_f1": 0.37970998721331994, + "eval_runtime": 1.0755, + "eval_samples_per_second": 371.911, + "eval_steps_per_second": 3.719, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.6833, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.684, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.673, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.7003, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6652, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6791, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.6976, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6704, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.687021791934967, + "eval_macro_f1": 0.3746285714285715, + "eval_runtime": 1.1916, + "eval_samples_per_second": 335.684, + "eval_steps_per_second": 3.357, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.6797, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.6764, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.6775, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.7075, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6697, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.6655, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.7055, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.6551, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6870080828666687, + "eval_macro_f1": 0.3746285714285715, + "eval_runtime": 1.0662, + "eval_samples_per_second": 375.163, + "eval_steps_per_second": 3.752, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.6922, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6673, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6716, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.6974, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.6653, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6635, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.6943, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6778, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.565, + "eval_loss": 0.6875166296958923, + "eval_macro_f1": 0.3994823123382226, + "eval_runtime": 1.1086, + "eval_samples_per_second": 360.799, + "eval_steps_per_second": 3.608, + "step": 160 + }, + { + "epoch": 10.12, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6836, + "step": 162 + }, + { + "epoch": 10.25, + "learning_rate": 1.3666666666666666e-05, + "loss": 0.6788, + "step": 164 + }, + { + "epoch": 10.38, + "learning_rate": 1.3833333333333334e-05, + "loss": 0.6747, + "step": 166 + }, + { + "epoch": 10.5, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6941, + "step": 168 + }, + { + "epoch": 10.62, + "learning_rate": 1.4166666666666668e-05, + "loss": 0.6735, + "step": 170 + }, + { + "epoch": 10.75, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.6701, + "step": 172 + }, + { + "epoch": 10.88, + "learning_rate": 1.45e-05, + "loss": 0.6859, + "step": 174 + }, + { + "epoch": 11.0, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.6503, + "step": 176 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6887872219085693, + "eval_macro_f1": 0.39820665583438647, + "eval_runtime": 1.0813, + "eval_samples_per_second": 369.917, + "eval_steps_per_second": 3.699, + "step": 176 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 470435246899200.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L4/fold2/config.json b/scaling_performance/2000/L4/fold2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold2/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold2/eval_results.json b/scaling_performance/2000/L4/fold2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..00bf1561a1f96311fac63490ee55483e105d36b2 --- /dev/null +++ b/scaling_performance/2000/L4/fold2/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5725, + "test_loss": 0.6870080828666687, + "test_macro_f1": 0.3746285714285715, + "test_runtime": 1.1028, + "test_samples_per_second": 362.72, + "test_steps_per_second": 3.627 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold3/all_results.json b/scaling_performance/2000/L4/fold3/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9db78306aa5147706673856a44c1c27cc9c1bd --- /dev/null +++ b/scaling_performance/2000/L4/fold3/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5425, + "test_loss": 0.6905966401100159, + "test_macro_f1": 0.35170178282009723, + "test_runtime": 1.0988, + "test_samples_per_second": 364.045, + "test_steps_per_second": 3.64 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold3/checkpoint-112/config.json b/scaling_performance/2000/L4/fold3/checkpoint-112/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold3/checkpoint-112/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold3/checkpoint-112/trainer_state.json b/scaling_performance/2000/L4/fold3/checkpoint-112/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f41b4c12e93917b8444917124ac84c7c94982e73 --- /dev/null +++ b/scaling_performance/2000/L4/fold3/checkpoint-112/trainer_state.json @@ -0,0 +1,422 @@ +{ + "best_metric": 0.6905966401100159, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers4_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold3/checkpoint-80", + "epoch": 7.0, + "global_step": 112, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6967, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.7142, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.7082, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6925, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.7141, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7065, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6982, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.465, + "eval_loss": 0.7003238797187805, + "eval_macro_f1": 0.3600287089924938, + "eval_runtime": 1.158, + "eval_samples_per_second": 345.415, + "eval_steps_per_second": 3.454, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.6935, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.709, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.7093, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6941, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.7045, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6984, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6953, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.7116, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.48, + "eval_loss": 0.6973231434822083, + "eval_macro_f1": 0.4407249065634159, + "eval_runtime": 1.3824, + "eval_samples_per_second": 289.36, + "eval_steps_per_second": 2.894, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.69, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.7007, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.6977, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6954, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.6978, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.695, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.695, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6939, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.5, + "eval_loss": 0.6936335563659668, + "eval_macro_f1": 0.49269480519480524, + "eval_runtime": 1.1744, + "eval_samples_per_second": 340.601, + "eval_steps_per_second": 3.406, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6958, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6893, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6905, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.695, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6853, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6878, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6906, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6851, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5175, + "eval_loss": 0.6912351846694946, + "eval_macro_f1": 0.37108455328266166, + "eval_runtime": 1.115, + "eval_samples_per_second": 358.733, + "eval_steps_per_second": 3.587, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.689, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6775, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6853, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.7045, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6794, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.675, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6976, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6789, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5425, + "eval_loss": 0.6905966401100159, + "eval_macro_f1": 0.35170178282009723, + "eval_runtime": 1.1543, + "eval_samples_per_second": 346.521, + "eval_steps_per_second": 3.465, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.6893, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6804, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6794, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.6969, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.675, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6785, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6877, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6668, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5425, + "eval_loss": 0.6907917261123657, + "eval_macro_f1": 0.35170178282009723, + "eval_runtime": 1.0917, + "eval_samples_per_second": 366.384, + "eval_steps_per_second": 3.664, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.7032, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6576, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6808, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.6971, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6781, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.667, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.6914, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6841, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.5425, + "eval_loss": 0.6911839246749878, + "eval_macro_f1": 0.35170178282009723, + "eval_runtime": 1.1306, + "eval_samples_per_second": 353.8, + "eval_steps_per_second": 3.538, + "step": 112 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 299367884390400.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L4/fold3/config.json b/scaling_performance/2000/L4/fold3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold3/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold3/eval_results.json b/scaling_performance/2000/L4/fold3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9db78306aa5147706673856a44c1c27cc9c1bd --- /dev/null +++ b/scaling_performance/2000/L4/fold3/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.5425, + "test_loss": 0.6905966401100159, + "test_macro_f1": 0.35170178282009723, + "test_runtime": 1.0988, + "test_samples_per_second": 364.045, + "test_steps_per_second": 3.64 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold4/all_results.json b/scaling_performance/2000/L4/fold4/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b5ef072fce8123f84f569d44b26b5e0e57adf2aa --- /dev/null +++ b/scaling_performance/2000/L4/fold4/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.6975, + "test_loss": 0.5687375068664551, + "test_macro_f1": 0.6942881145029971, + "test_runtime": 1.6145, + "test_samples_per_second": 247.752, + "test_steps_per_second": 2.478 +} \ No newline at end of file diff --git a/scaling_performance/2000/L4/fold4/checkpoint-160/config.json b/scaling_performance/2000/L4/fold4/checkpoint-160/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold4/checkpoint-160/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold4/checkpoint-160/trainer_state.json b/scaling_performance/2000/L4/fold4/checkpoint-160/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..64806dac51fea833414d7d8b753a11f303d69433 --- /dev/null +++ b/scaling_performance/2000/L4/fold4/checkpoint-160/trainer_state.json @@ -0,0 +1,596 @@ +{ + "best_metric": 0.6825920343399048, + "best_model_checkpoint": "./models/240626_geneformer_CellClassifier_PM25_Layers4_L2048_B26_LR5e-05_LSlinear_WU600_E20_Oadamw_F0_fold4/checkpoint-128", + "epoch": 10.0, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.6905, + "step": 2 + }, + { + "epoch": 0.25, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.7029, + "step": 4 + }, + { + "epoch": 0.38, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6978, + "step": 6 + }, + { + "epoch": 0.5, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6968, + "step": 8 + }, + { + "epoch": 0.62, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6954, + "step": 10 + }, + { + "epoch": 0.75, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7, + "step": 12 + }, + { + "epoch": 0.88, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.697, + "step": 14 + }, + { + "epoch": 1.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6996, + "step": 16 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5, + "eval_loss": 0.6958171129226685, + "eval_macro_f1": 0.493298877657014, + "eval_runtime": 1.1509, + "eval_samples_per_second": 347.552, + "eval_steps_per_second": 3.476, + "step": 16 + }, + { + "epoch": 1.12, + "learning_rate": 1.5e-06, + "loss": 0.69, + "step": 18 + }, + { + "epoch": 1.25, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.7017, + "step": 20 + }, + { + "epoch": 1.38, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.6939, + "step": 22 + }, + { + "epoch": 1.5, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6928, + "step": 24 + }, + { + "epoch": 1.62, + "learning_rate": 2.166666666666667e-06, + "loss": 0.699, + "step": 26 + }, + { + "epoch": 1.75, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6927, + "step": 28 + }, + { + "epoch": 1.88, + "learning_rate": 2.5e-06, + "loss": 0.6986, + "step": 30 + }, + { + "epoch": 2.0, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6999, + "step": 32 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.51, + "eval_loss": 0.6933969259262085, + "eval_macro_f1": 0.5090057366166487, + "eval_runtime": 1.199, + "eval_samples_per_second": 333.601, + "eval_steps_per_second": 3.336, + "step": 32 + }, + { + "epoch": 2.12, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.6941, + "step": 34 + }, + { + "epoch": 2.25, + "learning_rate": 3e-06, + "loss": 0.6918, + "step": 36 + }, + { + "epoch": 2.38, + "learning_rate": 3.166666666666667e-06, + "loss": 0.6899, + "step": 38 + }, + { + "epoch": 2.5, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6906, + "step": 40 + }, + { + "epoch": 2.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.6914, + "step": 42 + }, + { + "epoch": 2.75, + "learning_rate": 3.666666666666667e-06, + "loss": 0.6919, + "step": 44 + }, + { + "epoch": 2.88, + "learning_rate": 3.833333333333334e-06, + "loss": 0.7009, + "step": 46 + }, + { + "epoch": 3.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6881, + "step": 48 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.545, + "eval_loss": 0.6895008087158203, + "eval_macro_f1": 0.4814814814814815, + "eval_runtime": 1.1808, + "eval_samples_per_second": 338.766, + "eval_steps_per_second": 3.388, + "step": 48 + }, + { + "epoch": 3.12, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6938, + "step": 50 + }, + { + "epoch": 3.25, + "learning_rate": 4.333333333333334e-06, + "loss": 0.6876, + "step": 52 + }, + { + "epoch": 3.38, + "learning_rate": 4.5e-06, + "loss": 0.6855, + "step": 54 + }, + { + "epoch": 3.5, + "learning_rate": 4.666666666666667e-06, + "loss": 0.6973, + "step": 56 + }, + { + "epoch": 3.62, + "learning_rate": 4.833333333333333e-06, + "loss": 0.69, + "step": 58 + }, + { + "epoch": 3.75, + "learning_rate": 5e-06, + "loss": 0.6902, + "step": 60 + }, + { + "epoch": 3.88, + "learning_rate": 5.166666666666667e-06, + "loss": 0.6913, + "step": 62 + }, + { + "epoch": 4.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.6799, + "step": 64 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5625, + "eval_loss": 0.6866650581359863, + "eval_macro_f1": 0.41072976909025094, + "eval_runtime": 1.1114, + "eval_samples_per_second": 359.916, + "eval_steps_per_second": 3.599, + "step": 64 + }, + { + "epoch": 4.12, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6976, + "step": 66 + }, + { + "epoch": 4.25, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6823, + "step": 68 + }, + { + "epoch": 4.38, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6863, + "step": 70 + }, + { + "epoch": 4.5, + "learning_rate": 6e-06, + "loss": 0.6955, + "step": 72 + }, + { + "epoch": 4.62, + "learning_rate": 6.166666666666667e-06, + "loss": 0.6842, + "step": 74 + }, + { + "epoch": 4.75, + "learning_rate": 6.333333333333334e-06, + "loss": 0.6873, + "step": 76 + }, + { + "epoch": 4.88, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6987, + "step": 78 + }, + { + "epoch": 5.0, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6779, + "step": 80 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5675, + "eval_loss": 0.6847481727600098, + "eval_macro_f1": 0.3673142857142857, + "eval_runtime": 1.2206, + "eval_samples_per_second": 327.716, + "eval_steps_per_second": 3.277, + "step": 80 + }, + { + "epoch": 5.12, + "learning_rate": 6.833333333333333e-06, + "loss": 0.695, + "step": 82 + }, + { + "epoch": 5.25, + "learning_rate": 7.000000000000001e-06, + "loss": 0.689, + "step": 84 + }, + { + "epoch": 5.38, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6814, + "step": 86 + }, + { + "epoch": 5.5, + "learning_rate": 7.333333333333334e-06, + "loss": 0.693, + "step": 88 + }, + { + "epoch": 5.62, + "learning_rate": 7.5e-06, + "loss": 0.6876, + "step": 90 + }, + { + "epoch": 5.75, + "learning_rate": 7.666666666666667e-06, + "loss": 0.6822, + "step": 92 + }, + { + "epoch": 5.88, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6933, + "step": 94 + }, + { + "epoch": 6.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6579, + "step": 96 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.57, + "eval_loss": 0.6842648386955261, + "eval_macro_f1": 0.3630573248407643, + "eval_runtime": 1.1578, + "eval_samples_per_second": 345.493, + "eval_steps_per_second": 3.455, + "step": 96 + }, + { + "epoch": 6.12, + "learning_rate": 8.166666666666668e-06, + "loss": 0.6891, + "step": 98 + }, + { + "epoch": 6.25, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6925, + "step": 100 + }, + { + "epoch": 6.38, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6739, + "step": 102 + }, + { + "epoch": 6.5, + "learning_rate": 8.666666666666668e-06, + "loss": 0.6974, + "step": 104 + }, + { + "epoch": 6.62, + "learning_rate": 8.833333333333334e-06, + "loss": 0.6804, + "step": 106 + }, + { + "epoch": 6.75, + "learning_rate": 9e-06, + "loss": 0.6762, + "step": 108 + }, + { + "epoch": 6.88, + "learning_rate": 9.166666666666666e-06, + "loss": 0.7002, + "step": 110 + }, + { + "epoch": 7.0, + "learning_rate": 9.333333333333334e-06, + "loss": 0.669, + "step": 112 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.683117687702179, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 1.117, + "eval_samples_per_second": 358.09, + "eval_steps_per_second": 3.581, + "step": 112 + }, + { + "epoch": 7.12, + "learning_rate": 9.5e-06, + "loss": 0.6975, + "step": 114 + }, + { + "epoch": 7.25, + "learning_rate": 9.666666666666667e-06, + "loss": 0.6847, + "step": 116 + }, + { + "epoch": 7.38, + "learning_rate": 9.833333333333333e-06, + "loss": 0.6715, + "step": 118 + }, + { + "epoch": 7.5, + "learning_rate": 1e-05, + "loss": 0.7029, + "step": 120 + }, + { + "epoch": 7.62, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.6696, + "step": 122 + }, + { + "epoch": 7.75, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.6602, + "step": 124 + }, + { + "epoch": 7.88, + "learning_rate": 1.05e-05, + "loss": 0.7126, + "step": 126 + }, + { + "epoch": 8.0, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.6814, + "step": 128 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5775, + "eval_loss": 0.6825920343399048, + "eval_macro_f1": 0.36608557844690964, + "eval_runtime": 1.1048, + "eval_samples_per_second": 362.052, + "eval_steps_per_second": 3.621, + "step": 128 + }, + { + "epoch": 8.12, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.6947, + "step": 130 + }, + { + "epoch": 8.25, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.6809, + "step": 132 + }, + { + "epoch": 8.38, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.6765, + "step": 134 + }, + { + "epoch": 8.5, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.6964, + "step": 136 + }, + { + "epoch": 8.62, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.6771, + "step": 138 + }, + { + "epoch": 8.75, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.667, + "step": 140 + }, + { + "epoch": 8.88, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.7035, + "step": 142 + }, + { + "epoch": 9.0, + "learning_rate": 1.2e-05, + "loss": 0.6773, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.5725, + "eval_loss": 0.6831978559494019, + "eval_macro_f1": 0.3640699523052464, + "eval_runtime": 1.1552, + "eval_samples_per_second": 346.251, + "eval_steps_per_second": 3.463, + "step": 144 + }, + { + "epoch": 9.12, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.6942, + "step": 146 + }, + { + "epoch": 9.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.6726, + "step": 148 + }, + { + "epoch": 9.38, + "learning_rate": 1.25e-05, + "loss": 0.6732, + "step": 150 + }, + { + "epoch": 9.5, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.7068, + "step": 152 + }, + { + "epoch": 9.62, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.6755, + "step": 154 + }, + { + "epoch": 9.75, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.6788, + "step": 156 + }, + { + "epoch": 9.88, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.691, + "step": 158 + }, + { + "epoch": 10.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.669, + "step": 160 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.565, + "eval_loss": 0.6841627359390259, + "eval_macro_f1": 0.3950771798080934, + "eval_runtime": 1.123, + "eval_samples_per_second": 356.196, + "eval_steps_per_second": 3.562, + "step": 160 + } + ], + "max_steps": 320, + "num_train_epochs": 20, + "total_flos": 427668406272000.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/2000/L4/fold4/config.json b/scaling_performance/2000/L4/fold4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..024d326aee4a2165ea7a3d641fbbf7d796d64eef --- /dev/null +++ b/scaling_performance/2000/L4/fold4/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/2000/L4/fold4/eval_results.json b/scaling_performance/2000/L4/fold4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b5ef072fce8123f84f569d44b26b5e0e57adf2aa --- /dev/null +++ b/scaling_performance/2000/L4/fold4/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.6975, + "test_loss": 0.5687375068664551, + "test_macro_f1": 0.6942881145029971, + "test_runtime": 1.6145, + "test_samples_per_second": 247.752, + "test_steps_per_second": 2.478 +} \ No newline at end of file diff --git a/scaling_performance/2000/fine-tuned/.DS_Store b/scaling_performance/2000/fine-tuned/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..713fd9972e55d4a20ebcdb7f71e8dd6c0f9f5131 Binary files /dev/null and b/scaling_performance/2000/fine-tuned/.DS_Store differ diff --git a/scaling_performance/2000/fine-tuned/fold2/all_results.json b/scaling_performance/2000/fine-tuned/fold2/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..79b2c6739ee0e90468c45f50811100c53c6c418f --- /dev/null +++ b/scaling_performance/2000/fine-tuned/fold2/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.6525, + "test_loss": 0.6257244348526001, + "test_macro_f1": 0.6517141801188933, + "test_runtime": 1.3086, + "test_samples_per_second": 305.663, + "test_steps_per_second": 3.057 +} \ No newline at end of file diff --git a/scaling_performance/2000/fine-tuned/fold2/config.json b/scaling_performance/2000/fine-tuned/fold2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b62d8f806865910412b7988d64e1ee1f6579b72b --- /dev/null +++ b/scaling_performance/2000/fine-tuned/fold2/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/vsphhome/fengguoqing/Geneformer", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "relu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 25426 +} diff --git a/scaling_performance/2000/fine-tuned/fold2/eval_results.json b/scaling_performance/2000/fine-tuned/fold2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..79b2c6739ee0e90468c45f50811100c53c6c418f --- /dev/null +++ b/scaling_performance/2000/fine-tuned/fold2/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.6525, + "test_loss": 0.6257244348526001, + "test_macro_f1": 0.6517141801188933, + "test_runtime": 1.3086, + "test_samples_per_second": 305.663, + "test_steps_per_second": 3.057 +} \ No newline at end of file diff --git a/scaling_performance/8000/L6/fold0/all_results.json b/scaling_performance/8000/L6/fold0/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..29b7cc76111698653d7bd2babc7ecc4ff7175daf --- /dev/null +++ b/scaling_performance/8000/L6/fold0/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.749375, + "test_loss": 0.5108256340026855, + "test_macro_f1": 0.7489347531667834, + "test_runtime": 6.0862, + "test_samples_per_second": 262.89, + "test_steps_per_second": 2.629 +} \ No newline at end of file diff --git a/scaling_performance/8000/L6/fold0/checkpoint-682/config.json b/scaling_performance/8000/L6/fold0/checkpoint-682/config.json new file mode 100644 index 0000000000000000000000000000000000000000..315d942e83135c8839ef6c5f099d4ed8632593c8 --- /dev/null +++ b/scaling_performance/8000/L6/fold0/checkpoint-682/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/8000/L6/fold0/checkpoint-682/trainer_state.json b/scaling_performance/8000/L6/fold0/checkpoint-682/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ceb3e1b13dfeb1beb53d64d4e1a7a4887b66fe1 --- /dev/null +++ b/scaling_performance/8000/L6/fold0/checkpoint-682/trainer_state.json @@ -0,0 +1,636 @@ +{ + "best_metric": 0.5108256340026855, + "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/5folds_allmodels/8000samples/L6/fold0/checkpoint-558", + "epoch": 11.0, + "global_step": 682, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13, + "learning_rate": 8.000000000000001e-07, + "loss": 0.6996, + "step": 8 + }, + { + "epoch": 0.26, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.6963, + "step": 16 + }, + { + "epoch": 0.39, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.6924, + "step": 24 + }, + { + "epoch": 0.52, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.6943, + "step": 32 + }, + { + "epoch": 0.65, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6948, + "step": 40 + }, + { + "epoch": 0.77, + "learning_rate": 4.800000000000001e-06, + "loss": 0.6932, + "step": 48 + }, + { + "epoch": 0.9, + "learning_rate": 5.600000000000001e-06, + "loss": 0.6956, + "step": 56 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.505, + "eval_loss": 0.6936476230621338, + "eval_macro_f1": 0.41951549332937554, + "eval_runtime": 5.583, + "eval_samples_per_second": 286.582, + "eval_steps_per_second": 2.866, + "step": 62 + }, + { + "epoch": 1.03, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.6968, + "step": 64 + }, + { + "epoch": 1.16, + "learning_rate": 7.2e-06, + "loss": 0.6922, + "step": 72 + }, + { + "epoch": 1.29, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6948, + "step": 80 + }, + { + "epoch": 1.42, + "learning_rate": 8.8e-06, + "loss": 0.6932, + "step": 88 + }, + { + "epoch": 1.55, + "learning_rate": 9.600000000000001e-06, + "loss": 0.6923, + "step": 96 + }, + { + "epoch": 1.68, + "learning_rate": 1.04e-05, + "loss": 0.6924, + "step": 104 + }, + { + "epoch": 1.81, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.692, + "step": 112 + }, + { + "epoch": 1.94, + "learning_rate": 1.2e-05, + "loss": 0.6925, + "step": 120 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.518125, + "eval_loss": 0.692840576171875, + "eval_macro_f1": 0.3723922812797784, + "eval_runtime": 5.9603, + "eval_samples_per_second": 268.441, + "eval_steps_per_second": 2.684, + "step": 124 + }, + { + "epoch": 2.06, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.6876, + "step": 128 + }, + { + "epoch": 2.19, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.6915, + "step": 136 + }, + { + "epoch": 2.32, + "learning_rate": 1.44e-05, + "loss": 0.688, + "step": 144 + }, + { + "epoch": 2.45, + "learning_rate": 1.52e-05, + "loss": 0.682, + "step": 152 + }, + { + "epoch": 2.58, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6967, + "step": 160 + }, + { + "epoch": 2.71, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.6892, + "step": 168 + }, + { + "epoch": 2.84, + "learning_rate": 1.76e-05, + "loss": 0.6898, + "step": 176 + }, + { + "epoch": 2.97, + "learning_rate": 1.84e-05, + "loss": 0.6893, + "step": 184 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.53125, + "eval_loss": 0.6909370422363281, + "eval_macro_f1": 0.5251053078979736, + "eval_runtime": 5.7016, + "eval_samples_per_second": 280.623, + "eval_steps_per_second": 2.806, + "step": 186 + }, + { + "epoch": 3.1, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.6884, + "step": 192 + }, + { + "epoch": 3.23, + "learning_rate": 2e-05, + "loss": 0.6851, + "step": 200 + }, + { + "epoch": 3.35, + "learning_rate": 2.08e-05, + "loss": 0.6863, + "step": 208 + }, + { + "epoch": 3.48, + "learning_rate": 2.16e-05, + "loss": 0.686, + "step": 216 + }, + { + "epoch": 3.61, + "learning_rate": 2.2400000000000002e-05, + "loss": 0.6877, + "step": 224 + }, + { + "epoch": 3.74, + "learning_rate": 2.32e-05, + "loss": 0.6847, + "step": 232 + }, + { + "epoch": 3.87, + "learning_rate": 2.4e-05, + "loss": 0.6835, + "step": 240 + }, + { + "epoch": 4.0, + "learning_rate": 2.48e-05, + "loss": 0.6807, + "step": 248 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.519375, + "eval_loss": 0.6910722255706787, + "eval_macro_f1": 0.39346107261361385, + "eval_runtime": 5.7298, + "eval_samples_per_second": 279.242, + "eval_steps_per_second": 2.792, + "step": 248 + }, + { + "epoch": 4.13, + "learning_rate": 2.5600000000000002e-05, + "loss": 0.6783, + "step": 256 + }, + { + "epoch": 4.26, + "learning_rate": 2.64e-05, + "loss": 0.6769, + "step": 264 + }, + { + "epoch": 4.39, + "learning_rate": 2.7200000000000004e-05, + "loss": 0.6796, + "step": 272 + }, + { + "epoch": 4.52, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6722, + "step": 280 + }, + { + "epoch": 4.65, + "learning_rate": 2.88e-05, + "loss": 0.6819, + "step": 288 + }, + { + "epoch": 4.77, + "learning_rate": 2.96e-05, + "loss": 0.6695, + "step": 296 + }, + { + "epoch": 4.9, + "learning_rate": 3.04e-05, + "loss": 0.6721, + "step": 304 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5525, + "eval_loss": 0.6876064538955688, + "eval_macro_f1": 0.5086458767600137, + "eval_runtime": 5.7972, + "eval_samples_per_second": 275.998, + "eval_steps_per_second": 2.76, + "step": 310 + }, + { + "epoch": 5.03, + "learning_rate": 3.12e-05, + "loss": 0.6688, + "step": 312 + }, + { + "epoch": 5.16, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6543, + "step": 320 + }, + { + "epoch": 5.29, + "learning_rate": 3.2800000000000004e-05, + "loss": 0.6597, + "step": 328 + }, + { + "epoch": 5.42, + "learning_rate": 3.3600000000000004e-05, + "loss": 0.6414, + "step": 336 + }, + { + "epoch": 5.55, + "learning_rate": 3.4399999999999996e-05, + "loss": 0.6513, + "step": 344 + }, + { + "epoch": 5.68, + "learning_rate": 3.52e-05, + "loss": 0.6365, + "step": 352 + }, + { + "epoch": 5.81, + "learning_rate": 3.6e-05, + "loss": 0.6249, + "step": 360 + }, + { + "epoch": 5.94, + "learning_rate": 3.68e-05, + "loss": 0.6383, + "step": 368 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.601875, + "eval_loss": 0.6667760610580444, + "eval_macro_f1": 0.5957466134061243, + "eval_runtime": 5.5913, + "eval_samples_per_second": 286.158, + "eval_steps_per_second": 2.862, + "step": 372 + }, + { + "epoch": 6.06, + "learning_rate": 3.76e-05, + "loss": 0.6234, + "step": 376 + }, + { + "epoch": 6.19, + "learning_rate": 3.8400000000000005e-05, + "loss": 0.591, + "step": 384 + }, + { + "epoch": 6.32, + "learning_rate": 3.9200000000000004e-05, + "loss": 0.5782, + "step": 392 + }, + { + "epoch": 6.45, + "learning_rate": 4e-05, + "loss": 0.589, + "step": 400 + }, + { + "epoch": 6.58, + "learning_rate": 4.08e-05, + "loss": 0.5659, + "step": 408 + }, + { + "epoch": 6.71, + "learning_rate": 4.16e-05, + "loss": 0.546, + "step": 416 + }, + { + "epoch": 6.84, + "learning_rate": 4.24e-05, + "loss": 0.5229, + "step": 424 + }, + { + "epoch": 6.97, + "learning_rate": 4.32e-05, + "loss": 0.5267, + "step": 432 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.67, + "eval_loss": 0.6224550008773804, + "eval_macro_f1": 0.6536137662286543, + "eval_runtime": 5.64, + "eval_samples_per_second": 283.69, + "eval_steps_per_second": 2.837, + "step": 434 + }, + { + "epoch": 7.1, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5204, + "step": 440 + }, + { + "epoch": 7.23, + "learning_rate": 4.4800000000000005e-05, + "loss": 0.4801, + "step": 448 + }, + { + "epoch": 7.35, + "learning_rate": 4.5600000000000004e-05, + "loss": 0.494, + "step": 456 + }, + { + "epoch": 7.48, + "learning_rate": 4.64e-05, + "loss": 0.4519, + "step": 464 + }, + { + "epoch": 7.61, + "learning_rate": 4.72e-05, + "loss": 0.414, + "step": 472 + }, + { + "epoch": 7.74, + "learning_rate": 4.8e-05, + "loss": 0.486, + "step": 480 + }, + { + "epoch": 7.87, + "learning_rate": 4.88e-05, + "loss": 0.4709, + "step": 488 + }, + { + "epoch": 8.0, + "learning_rate": 4.96e-05, + "loss": 0.4366, + "step": 496 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.73625, + "eval_loss": 0.5209737420082092, + "eval_macro_f1": 0.7358273237179487, + "eval_runtime": 5.7071, + "eval_samples_per_second": 280.351, + "eval_steps_per_second": 2.804, + "step": 496 + }, + { + "epoch": 8.13, + "learning_rate": 4.992307692307693e-05, + "loss": 0.4005, + "step": 504 + }, + { + "epoch": 8.26, + "learning_rate": 4.9769230769230775e-05, + "loss": 0.4148, + "step": 512 + }, + { + "epoch": 8.39, + "learning_rate": 4.961538461538462e-05, + "loss": 0.4191, + "step": 520 + }, + { + "epoch": 8.52, + "learning_rate": 4.9461538461538466e-05, + "loss": 0.3694, + "step": 528 + }, + { + "epoch": 8.65, + "learning_rate": 4.930769230769231e-05, + "loss": 0.3948, + "step": 536 + }, + { + "epoch": 8.77, + "learning_rate": 4.9153846153846157e-05, + "loss": 0.3646, + "step": 544 + }, + { + "epoch": 8.9, + "learning_rate": 4.9e-05, + "loss": 0.344, + "step": 552 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.749375, + "eval_loss": 0.5108256340026855, + "eval_macro_f1": 0.7489347531667834, + "eval_runtime": 6.1542, + "eval_samples_per_second": 259.986, + "eval_steps_per_second": 2.6, + "step": 558 + }, + { + "epoch": 9.03, + "learning_rate": 4.884615384615385e-05, + "loss": 0.3726, + "step": 560 + }, + { + "epoch": 9.16, + "learning_rate": 4.8692307692307696e-05, + "loss": 0.3328, + "step": 568 + }, + { + "epoch": 9.29, + "learning_rate": 4.853846153846154e-05, + "loss": 0.4589, + "step": 576 + }, + { + "epoch": 9.42, + "learning_rate": 4.8384615384615386e-05, + "loss": 0.3619, + "step": 584 + }, + { + "epoch": 9.55, + "learning_rate": 4.8230769230769235e-05, + "loss": 0.3868, + "step": 592 + }, + { + "epoch": 9.68, + "learning_rate": 4.8076923076923084e-05, + "loss": 0.3756, + "step": 600 + }, + { + "epoch": 9.81, + "learning_rate": 4.7923076923076926e-05, + "loss": 0.5009, + "step": 608 + }, + { + "epoch": 9.94, + "learning_rate": 4.7769230769230774e-05, + "loss": 0.4744, + "step": 616 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.691875, + "eval_loss": 0.6369074583053589, + "eval_macro_f1": 0.680725200649721, + "eval_runtime": 5.5706, + "eval_samples_per_second": 287.222, + "eval_steps_per_second": 2.872, + "step": 620 + }, + { + "epoch": 10.06, + "learning_rate": 4.7615384615384616e-05, + "loss": 0.4718, + "step": 624 + }, + { + "epoch": 10.19, + "learning_rate": 4.7461538461538465e-05, + "loss": 0.4543, + "step": 632 + }, + { + "epoch": 10.32, + "learning_rate": 4.730769230769231e-05, + "loss": 0.3738, + "step": 640 + }, + { + "epoch": 10.45, + "learning_rate": 4.7153846153846155e-05, + "loss": 0.2972, + "step": 648 + }, + { + "epoch": 10.58, + "learning_rate": 4.7e-05, + "loss": 0.3194, + "step": 656 + }, + { + "epoch": 10.71, + "learning_rate": 4.684615384615385e-05, + "loss": 0.3237, + "step": 664 + }, + { + "epoch": 10.84, + "learning_rate": 4.6692307692307695e-05, + "loss": 0.375, + "step": 672 + }, + { + "epoch": 10.97, + "learning_rate": 4.653846153846154e-05, + "loss": 0.3084, + "step": 680 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.751875, + "eval_loss": 0.5174940228462219, + "eval_macro_f1": 0.7512990046087208, + "eval_runtime": 5.6228, + "eval_samples_per_second": 284.554, + "eval_steps_per_second": 2.846, + "step": 682 + } + ], + "max_steps": 3100, + "num_train_epochs": 50, + "total_flos": 2793710184038400.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/8000/L6/fold0/config.json b/scaling_performance/8000/L6/fold0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..315d942e83135c8839ef6c5f099d4ed8632593c8 --- /dev/null +++ b/scaling_performance/8000/L6/fold0/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/8000/L6/fold0/eval_results.json b/scaling_performance/8000/L6/fold0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..29b7cc76111698653d7bd2babc7ecc4ff7175daf --- /dev/null +++ b/scaling_performance/8000/L6/fold0/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.749375, + "test_loss": 0.5108256340026855, + "test_macro_f1": 0.7489347531667834, + "test_runtime": 6.0862, + "test_samples_per_second": 262.89, + "test_steps_per_second": 2.629 +} \ No newline at end of file diff --git a/scaling_performance/8000/L6/fold1/all_results.json b/scaling_performance/8000/L6/fold1/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..083bb25f58554201319a949b58629d2e876a9137 --- /dev/null +++ b/scaling_performance/8000/L6/fold1/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.7875, + "test_loss": 0.4764532744884491, + "test_macro_f1": 0.7853535353535352, + "test_runtime": 8.7208, + "test_samples_per_second": 183.47, + "test_steps_per_second": 1.835 +} \ No newline at end of file diff --git a/scaling_performance/8000/L6/fold1/checkpoint-806/config.json b/scaling_performance/8000/L6/fold1/checkpoint-806/config.json new file mode 100644 index 0000000000000000000000000000000000000000..315d942e83135c8839ef6c5f099d4ed8632593c8 --- /dev/null +++ b/scaling_performance/8000/L6/fold1/checkpoint-806/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/8000/L6/fold1/checkpoint-806/trainer_state.json b/scaling_performance/8000/L6/fold1/checkpoint-806/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ae71bc244c5db861272ba75fbb7b42f2c123549f --- /dev/null +++ b/scaling_performance/8000/L6/fold1/checkpoint-806/trainer_state.json @@ -0,0 +1,746 @@ +{ + "best_metric": 0.4764532744884491, + "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/5folds_allmodels/8000samples/L6/fold1/checkpoint-682", + "epoch": 13.0, + "global_step": 806, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13, + "learning_rate": 8.000000000000001e-07, + "loss": 0.6943, + "step": 8 + }, + { + "epoch": 0.26, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.6946, + "step": 16 + }, + { + "epoch": 0.39, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.6918, + "step": 24 + }, + { + "epoch": 0.52, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.692, + "step": 32 + }, + { + "epoch": 0.65, + "learning_rate": 4.000000000000001e-06, + "loss": 0.692, + "step": 40 + }, + { + "epoch": 0.77, + "learning_rate": 4.800000000000001e-06, + "loss": 0.6908, + "step": 48 + }, + { + "epoch": 0.9, + "learning_rate": 5.600000000000001e-06, + "loss": 0.689, + "step": 56 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.52625, + "eval_loss": 0.6924512386322021, + "eval_macro_f1": 0.35971077172590893, + "eval_runtime": 8.7777, + "eval_samples_per_second": 182.28, + "eval_steps_per_second": 1.823, + "step": 62 + }, + { + "epoch": 1.03, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.688, + "step": 64 + }, + { + "epoch": 1.16, + "learning_rate": 7.2e-06, + "loss": 0.6924, + "step": 72 + }, + { + "epoch": 1.29, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6884, + "step": 80 + }, + { + "epoch": 1.42, + "learning_rate": 8.8e-06, + "loss": 0.6893, + "step": 88 + }, + { + "epoch": 1.55, + "learning_rate": 9.600000000000001e-06, + "loss": 0.697, + "step": 96 + }, + { + "epoch": 1.68, + "learning_rate": 1.04e-05, + "loss": 0.6921, + "step": 104 + }, + { + "epoch": 1.81, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6902, + "step": 112 + }, + { + "epoch": 1.94, + "learning_rate": 1.2e-05, + "loss": 0.6913, + "step": 120 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5125, + "eval_loss": 0.6914636492729187, + "eval_macro_f1": 0.4902885261006582, + "eval_runtime": 8.4927, + "eval_samples_per_second": 188.398, + "eval_steps_per_second": 1.884, + "step": 124 + }, + { + "epoch": 2.06, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.6893, + "step": 128 + }, + { + "epoch": 2.19, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.692, + "step": 136 + }, + { + "epoch": 2.32, + "learning_rate": 1.44e-05, + "loss": 0.6862, + "step": 144 + }, + { + "epoch": 2.45, + "learning_rate": 1.52e-05, + "loss": 0.6878, + "step": 152 + }, + { + "epoch": 2.58, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6858, + "step": 160 + }, + { + "epoch": 2.71, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.6901, + "step": 168 + }, + { + "epoch": 2.84, + "learning_rate": 1.76e-05, + "loss": 0.6917, + "step": 176 + }, + { + "epoch": 2.97, + "learning_rate": 1.84e-05, + "loss": 0.688, + "step": 184 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.53625, + "eval_loss": 0.690558910369873, + "eval_macro_f1": 0.536231884057971, + "eval_runtime": 8.4686, + "eval_samples_per_second": 188.933, + "eval_steps_per_second": 1.889, + "step": 186 + }, + { + "epoch": 3.1, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.6899, + "step": 192 + }, + { + "epoch": 3.23, + "learning_rate": 2e-05, + "loss": 0.6849, + "step": 200 + }, + { + "epoch": 3.35, + "learning_rate": 2.08e-05, + "loss": 0.6887, + "step": 208 + }, + { + "epoch": 3.48, + "learning_rate": 2.16e-05, + "loss": 0.6837, + "step": 216 + }, + { + "epoch": 3.61, + "learning_rate": 2.2400000000000002e-05, + "loss": 0.6849, + "step": 224 + }, + { + "epoch": 3.74, + "learning_rate": 2.32e-05, + "loss": 0.6874, + "step": 232 + }, + { + "epoch": 3.87, + "learning_rate": 2.4e-05, + "loss": 0.6836, + "step": 240 + }, + { + "epoch": 4.0, + "learning_rate": 2.48e-05, + "loss": 0.6787, + "step": 248 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.541875, + "eval_loss": 0.6866192817687988, + "eval_macro_f1": 0.5252973064426886, + "eval_runtime": 8.3837, + "eval_samples_per_second": 190.846, + "eval_steps_per_second": 1.908, + "step": 248 + }, + { + "epoch": 4.13, + "learning_rate": 2.5600000000000002e-05, + "loss": 0.6802, + "step": 256 + }, + { + "epoch": 4.26, + "learning_rate": 2.64e-05, + "loss": 0.6777, + "step": 264 + }, + { + "epoch": 4.39, + "learning_rate": 2.7200000000000004e-05, + "loss": 0.6844, + "step": 272 + }, + { + "epoch": 4.52, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6689, + "step": 280 + }, + { + "epoch": 4.65, + "learning_rate": 2.88e-05, + "loss": 0.6804, + "step": 288 + }, + { + "epoch": 4.77, + "learning_rate": 2.96e-05, + "loss": 0.6715, + "step": 296 + }, + { + "epoch": 4.9, + "learning_rate": 3.04e-05, + "loss": 0.6648, + "step": 304 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.54375, + "eval_loss": 0.6859390139579773, + "eval_macro_f1": 0.5297573419050625, + "eval_runtime": 8.3537, + "eval_samples_per_second": 191.532, + "eval_steps_per_second": 1.915, + "step": 310 + }, + { + "epoch": 5.03, + "learning_rate": 3.12e-05, + "loss": 0.6724, + "step": 312 + }, + { + "epoch": 5.16, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6522, + "step": 320 + }, + { + "epoch": 5.29, + "learning_rate": 3.2800000000000004e-05, + "loss": 0.65, + "step": 328 + }, + { + "epoch": 5.42, + "learning_rate": 3.3600000000000004e-05, + "loss": 0.6495, + "step": 336 + }, + { + "epoch": 5.55, + "learning_rate": 3.4399999999999996e-05, + "loss": 0.642, + "step": 344 + }, + { + "epoch": 5.68, + "learning_rate": 3.52e-05, + "loss": 0.6428, + "step": 352 + }, + { + "epoch": 5.81, + "learning_rate": 3.6e-05, + "loss": 0.6436, + "step": 360 + }, + { + "epoch": 5.94, + "learning_rate": 3.68e-05, + "loss": 0.6361, + "step": 368 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.58375, + "eval_loss": 0.6975635290145874, + "eval_macro_f1": 0.5177267987486966, + "eval_runtime": 8.3485, + "eval_samples_per_second": 191.652, + "eval_steps_per_second": 1.917, + "step": 372 + }, + { + "epoch": 6.06, + "learning_rate": 3.76e-05, + "loss": 0.6478, + "step": 376 + }, + { + "epoch": 6.19, + "learning_rate": 3.8400000000000005e-05, + "loss": 0.607, + "step": 384 + }, + { + "epoch": 6.32, + "learning_rate": 3.9200000000000004e-05, + "loss": 0.609, + "step": 392 + }, + { + "epoch": 6.45, + "learning_rate": 4e-05, + "loss": 0.6034, + "step": 400 + }, + { + "epoch": 6.58, + "learning_rate": 4.08e-05, + "loss": 0.5976, + "step": 408 + }, + { + "epoch": 6.71, + "learning_rate": 4.16e-05, + "loss": 0.5644, + "step": 416 + }, + { + "epoch": 6.84, + "learning_rate": 4.24e-05, + "loss": 0.5453, + "step": 424 + }, + { + "epoch": 6.97, + "learning_rate": 4.32e-05, + "loss": 0.5347, + "step": 432 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.680625, + "eval_loss": 0.600900411605835, + "eval_macro_f1": 0.6711921977318858, + "eval_runtime": 8.3265, + "eval_samples_per_second": 192.158, + "eval_steps_per_second": 1.922, + "step": 434 + }, + { + "epoch": 7.1, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.4829, + "step": 440 + }, + { + "epoch": 7.23, + "learning_rate": 4.4800000000000005e-05, + "loss": 0.4778, + "step": 448 + }, + { + "epoch": 7.35, + "learning_rate": 4.5600000000000004e-05, + "loss": 0.5609, + "step": 456 + }, + { + "epoch": 7.48, + "learning_rate": 4.64e-05, + "loss": 0.5043, + "step": 464 + }, + { + "epoch": 7.61, + "learning_rate": 4.72e-05, + "loss": 0.4756, + "step": 472 + }, + { + "epoch": 7.74, + "learning_rate": 4.8e-05, + "loss": 0.4833, + "step": 480 + }, + { + "epoch": 7.87, + "learning_rate": 4.88e-05, + "loss": 0.4815, + "step": 488 + }, + { + "epoch": 8.0, + "learning_rate": 4.96e-05, + "loss": 0.4853, + "step": 496 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.736875, + "eval_loss": 0.5097616910934448, + "eval_macro_f1": 0.7356929486598462, + "eval_runtime": 8.1592, + "eval_samples_per_second": 196.098, + "eval_steps_per_second": 1.961, + "step": 496 + }, + { + "epoch": 8.13, + "learning_rate": 4.992307692307693e-05, + "loss": 0.4148, + "step": 504 + }, + { + "epoch": 8.26, + "learning_rate": 4.9769230769230775e-05, + "loss": 0.4216, + "step": 512 + }, + { + "epoch": 8.39, + "learning_rate": 4.961538461538462e-05, + "loss": 0.3825, + "step": 520 + }, + { + "epoch": 8.52, + "learning_rate": 4.9461538461538466e-05, + "loss": 0.4196, + "step": 528 + }, + { + "epoch": 8.65, + "learning_rate": 4.930769230769231e-05, + "loss": 0.4347, + "step": 536 + }, + { + "epoch": 8.77, + "learning_rate": 4.9153846153846157e-05, + "loss": 0.4395, + "step": 544 + }, + { + "epoch": 8.9, + "learning_rate": 4.9e-05, + "loss": 0.4236, + "step": 552 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.763125, + "eval_loss": 0.48266342282295227, + "eval_macro_f1": 0.7628239902908027, + "eval_runtime": 8.5359, + "eval_samples_per_second": 187.443, + "eval_steps_per_second": 1.874, + "step": 558 + }, + { + "epoch": 9.03, + "learning_rate": 4.884615384615385e-05, + "loss": 0.3554, + "step": 560 + }, + { + "epoch": 9.16, + "learning_rate": 4.8692307692307696e-05, + "loss": 0.335, + "step": 568 + }, + { + "epoch": 9.29, + "learning_rate": 4.853846153846154e-05, + "loss": 0.4026, + "step": 576 + }, + { + "epoch": 9.42, + "learning_rate": 4.8384615384615386e-05, + "loss": 0.3768, + "step": 584 + }, + { + "epoch": 9.55, + "learning_rate": 4.8230769230769235e-05, + "loss": 0.3631, + "step": 592 + }, + { + "epoch": 9.68, + "learning_rate": 4.8076923076923084e-05, + "loss": 0.3641, + "step": 600 + }, + { + "epoch": 9.81, + "learning_rate": 4.7923076923076926e-05, + "loss": 0.286, + "step": 608 + }, + { + "epoch": 9.94, + "learning_rate": 4.7769230769230774e-05, + "loss": 0.3274, + "step": 616 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.773125, + "eval_loss": 0.5151547193527222, + "eval_macro_f1": 0.7633613445378151, + "eval_runtime": 8.1235, + "eval_samples_per_second": 196.96, + "eval_steps_per_second": 1.97, + "step": 620 + }, + { + "epoch": 10.06, + "learning_rate": 4.7615384615384616e-05, + "loss": 0.325, + "step": 624 + }, + { + "epoch": 10.19, + "learning_rate": 4.7461538461538465e-05, + "loss": 0.2915, + "step": 632 + }, + { + "epoch": 10.32, + "learning_rate": 4.730769230769231e-05, + "loss": 0.278, + "step": 640 + }, + { + "epoch": 10.45, + "learning_rate": 4.7153846153846155e-05, + "loss": 0.2784, + "step": 648 + }, + { + "epoch": 10.58, + "learning_rate": 4.7e-05, + "loss": 0.2792, + "step": 656 + }, + { + "epoch": 10.71, + "learning_rate": 4.684615384615385e-05, + "loss": 0.276, + "step": 664 + }, + { + "epoch": 10.84, + "learning_rate": 4.6692307692307695e-05, + "loss": 0.3678, + "step": 672 + }, + { + "epoch": 10.97, + "learning_rate": 4.653846153846154e-05, + "loss": 0.2699, + "step": 680 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.7875, + "eval_loss": 0.4764532744884491, + "eval_macro_f1": 0.7853535353535352, + "eval_runtime": 8.3095, + "eval_samples_per_second": 192.55, + "eval_steps_per_second": 1.925, + "step": 682 + }, + { + "epoch": 11.1, + "learning_rate": 4.6384615384615385e-05, + "loss": 0.2474, + "step": 688 + }, + { + "epoch": 11.23, + "learning_rate": 4.6230769230769234e-05, + "loss": 0.2564, + "step": 696 + }, + { + "epoch": 11.35, + "learning_rate": 4.6076923076923076e-05, + "loss": 0.2491, + "step": 704 + }, + { + "epoch": 11.48, + "learning_rate": 4.5923076923076924e-05, + "loss": 0.2472, + "step": 712 + }, + { + "epoch": 11.61, + "learning_rate": 4.576923076923077e-05, + "loss": 0.2252, + "step": 720 + }, + { + "epoch": 11.74, + "learning_rate": 4.5615384615384615e-05, + "loss": 0.2357, + "step": 728 + }, + { + "epoch": 11.87, + "learning_rate": 4.5461538461538464e-05, + "loss": 0.3328, + "step": 736 + }, + { + "epoch": 12.0, + "learning_rate": 4.530769230769231e-05, + "loss": 0.3101, + "step": 744 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.796875, + "eval_loss": 0.4923836886882782, + "eval_macro_f1": 0.7945832390193878, + "eval_runtime": 8.5278, + "eval_samples_per_second": 187.621, + "eval_steps_per_second": 1.876, + "step": 744 + }, + { + "epoch": 12.13, + "learning_rate": 4.515384615384616e-05, + "loss": 0.2096, + "step": 752 + }, + { + "epoch": 12.26, + "learning_rate": 4.5e-05, + "loss": 0.2537, + "step": 760 + }, + { + "epoch": 12.39, + "learning_rate": 4.484615384615385e-05, + "loss": 0.2281, + "step": 768 + }, + { + "epoch": 12.52, + "learning_rate": 4.4692307692307693e-05, + "loss": 0.2218, + "step": 776 + }, + { + "epoch": 12.65, + "learning_rate": 4.453846153846154e-05, + "loss": 0.2124, + "step": 784 + }, + { + "epoch": 12.77, + "learning_rate": 4.4384615384615384e-05, + "loss": 0.2128, + "step": 792 + }, + { + "epoch": 12.9, + "learning_rate": 4.423076923076923e-05, + "loss": 0.2601, + "step": 800 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.79125, + "eval_loss": 0.48863792419433594, + "eval_macro_f1": 0.7889228746934958, + "eval_runtime": 8.5033, + "eval_samples_per_second": 188.161, + "eval_steps_per_second": 1.882, + "step": 806 + } + ], + "max_steps": 3100, + "num_train_epochs": 50, + "total_flos": 3301657490227200.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/8000/L6/fold1/config.json b/scaling_performance/8000/L6/fold1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..315d942e83135c8839ef6c5f099d4ed8632593c8 --- /dev/null +++ b/scaling_performance/8000/L6/fold1/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/8000/L6/fold1/eval_results.json b/scaling_performance/8000/L6/fold1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..083bb25f58554201319a949b58629d2e876a9137 --- /dev/null +++ b/scaling_performance/8000/L6/fold1/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.7875, + "test_loss": 0.4764532744884491, + "test_macro_f1": 0.7853535353535352, + "test_runtime": 8.7208, + "test_samples_per_second": 183.47, + "test_steps_per_second": 1.835 +} \ No newline at end of file diff --git a/scaling_performance/8000/L6/fold3/all_results.json b/scaling_performance/8000/L6/fold3/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..605f92499785074ebb0606292374219f00cbe1c0 --- /dev/null +++ b/scaling_performance/8000/L6/fold3/all_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.759375, + "test_loss": 0.4913812279701233, + "test_macro_f1": 0.7589523191252161, + "test_runtime": 5.8537, + "test_samples_per_second": 273.33, + "test_steps_per_second": 2.733 +} \ No newline at end of file diff --git a/scaling_performance/8000/L6/fold3/checkpoint-682/config.json b/scaling_performance/8000/L6/fold3/checkpoint-682/config.json new file mode 100644 index 0000000000000000000000000000000000000000..315d942e83135c8839ef6c5f099d4ed8632593c8 --- /dev/null +++ b/scaling_performance/8000/L6/fold3/checkpoint-682/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/8000/L6/fold3/checkpoint-682/trainer_state.json b/scaling_performance/8000/L6/fold3/checkpoint-682/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d7e9adbe492820003ac15c9e6c9b33d6e9c567a6 --- /dev/null +++ b/scaling_performance/8000/L6/fold3/checkpoint-682/trainer_state.json @@ -0,0 +1,636 @@ +{ + "best_metric": 0.4913812279701233, + "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/5folds_allmodels/8000samples/L6/fold3/checkpoint-558", + "epoch": 11.0, + "global_step": 682, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13, + "learning_rate": 8.000000000000001e-07, + "loss": 0.6934, + "step": 8 + }, + { + "epoch": 0.26, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.6975, + "step": 16 + }, + { + "epoch": 0.39, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.6942, + "step": 24 + }, + { + "epoch": 0.52, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.6976, + "step": 32 + }, + { + "epoch": 0.65, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6939, + "step": 40 + }, + { + "epoch": 0.77, + "learning_rate": 4.800000000000001e-06, + "loss": 0.6918, + "step": 48 + }, + { + "epoch": 0.9, + "learning_rate": 5.600000000000001e-06, + "loss": 0.6929, + "step": 56 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.53375, + "eval_loss": 0.6917985677719116, + "eval_macro_f1": 0.40064795900946226, + "eval_runtime": 5.8817, + "eval_samples_per_second": 272.028, + "eval_steps_per_second": 2.72, + "step": 62 + }, + { + "epoch": 1.03, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.6922, + "step": 64 + }, + { + "epoch": 1.16, + "learning_rate": 7.2e-06, + "loss": 0.6918, + "step": 72 + }, + { + "epoch": 1.29, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6925, + "step": 80 + }, + { + "epoch": 1.42, + "learning_rate": 8.8e-06, + "loss": 0.6954, + "step": 88 + }, + { + "epoch": 1.55, + "learning_rate": 9.600000000000001e-06, + "loss": 0.6904, + "step": 96 + }, + { + "epoch": 1.68, + "learning_rate": 1.04e-05, + "loss": 0.6933, + "step": 104 + }, + { + "epoch": 1.81, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6908, + "step": 112 + }, + { + "epoch": 1.94, + "learning_rate": 1.2e-05, + "loss": 0.6947, + "step": 120 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.53375, + "eval_loss": 0.6904119253158569, + "eval_macro_f1": 0.4145334612569872, + "eval_runtime": 5.6816, + "eval_samples_per_second": 281.611, + "eval_steps_per_second": 2.816, + "step": 124 + }, + { + "epoch": 2.06, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.6865, + "step": 128 + }, + { + "epoch": 2.19, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.6885, + "step": 136 + }, + { + "epoch": 2.32, + "learning_rate": 1.44e-05, + "loss": 0.6889, + "step": 144 + }, + { + "epoch": 2.45, + "learning_rate": 1.52e-05, + "loss": 0.6877, + "step": 152 + }, + { + "epoch": 2.58, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6914, + "step": 160 + }, + { + "epoch": 2.71, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.6906, + "step": 168 + }, + { + "epoch": 2.84, + "learning_rate": 1.76e-05, + "loss": 0.6917, + "step": 176 + }, + { + "epoch": 2.97, + "learning_rate": 1.84e-05, + "loss": 0.6912, + "step": 184 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.538125, + "eval_loss": 0.6887416243553162, + "eval_macro_f1": 0.406720853230145, + "eval_runtime": 6.1245, + "eval_samples_per_second": 261.247, + "eval_steps_per_second": 2.612, + "step": 186 + }, + { + "epoch": 3.1, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.6913, + "step": 192 + }, + { + "epoch": 3.23, + "learning_rate": 2e-05, + "loss": 0.6848, + "step": 200 + }, + { + "epoch": 3.35, + "learning_rate": 2.08e-05, + "loss": 0.686, + "step": 208 + }, + { + "epoch": 3.48, + "learning_rate": 2.16e-05, + "loss": 0.6877, + "step": 216 + }, + { + "epoch": 3.61, + "learning_rate": 2.2400000000000002e-05, + "loss": 0.6825, + "step": 224 + }, + { + "epoch": 3.74, + "learning_rate": 2.32e-05, + "loss": 0.6838, + "step": 232 + }, + { + "epoch": 3.87, + "learning_rate": 2.4e-05, + "loss": 0.6859, + "step": 240 + }, + { + "epoch": 4.0, + "learning_rate": 2.48e-05, + "loss": 0.679, + "step": 248 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5575, + "eval_loss": 0.6862605214118958, + "eval_macro_f1": 0.5511708070842744, + "eval_runtime": 5.7628, + "eval_samples_per_second": 277.645, + "eval_steps_per_second": 2.776, + "step": 248 + }, + { + "epoch": 4.13, + "learning_rate": 2.5600000000000002e-05, + "loss": 0.6796, + "step": 256 + }, + { + "epoch": 4.26, + "learning_rate": 2.64e-05, + "loss": 0.6859, + "step": 264 + }, + { + "epoch": 4.39, + "learning_rate": 2.7200000000000004e-05, + "loss": 0.6743, + "step": 272 + }, + { + "epoch": 4.52, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6746, + "step": 280 + }, + { + "epoch": 4.65, + "learning_rate": 2.88e-05, + "loss": 0.6728, + "step": 288 + }, + { + "epoch": 4.77, + "learning_rate": 2.96e-05, + "loss": 0.6844, + "step": 296 + }, + { + "epoch": 4.9, + "learning_rate": 3.04e-05, + "loss": 0.6679, + "step": 304 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.559375, + "eval_loss": 0.6813169717788696, + "eval_macro_f1": 0.4886614762179988, + "eval_runtime": 5.7359, + "eval_samples_per_second": 278.944, + "eval_steps_per_second": 2.789, + "step": 310 + }, + { + "epoch": 5.03, + "learning_rate": 3.12e-05, + "loss": 0.6704, + "step": 312 + }, + { + "epoch": 5.16, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6663, + "step": 320 + }, + { + "epoch": 5.29, + "learning_rate": 3.2800000000000004e-05, + "loss": 0.6541, + "step": 328 + }, + { + "epoch": 5.42, + "learning_rate": 3.3600000000000004e-05, + "loss": 0.6605, + "step": 336 + }, + { + "epoch": 5.55, + "learning_rate": 3.4399999999999996e-05, + "loss": 0.6446, + "step": 344 + }, + { + "epoch": 5.68, + "learning_rate": 3.52e-05, + "loss": 0.6494, + "step": 352 + }, + { + "epoch": 5.81, + "learning_rate": 3.6e-05, + "loss": 0.6399, + "step": 360 + }, + { + "epoch": 5.94, + "learning_rate": 3.68e-05, + "loss": 0.6282, + "step": 368 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.600625, + "eval_loss": 0.6557914018630981, + "eval_macro_f1": 0.5727036898189883, + "eval_runtime": 5.6944, + "eval_samples_per_second": 280.979, + "eval_steps_per_second": 2.81, + "step": 372 + }, + { + "epoch": 6.06, + "learning_rate": 3.76e-05, + "loss": 0.6262, + "step": 376 + }, + { + "epoch": 6.19, + "learning_rate": 3.8400000000000005e-05, + "loss": 0.6088, + "step": 384 + }, + { + "epoch": 6.32, + "learning_rate": 3.9200000000000004e-05, + "loss": 0.6064, + "step": 392 + }, + { + "epoch": 6.45, + "learning_rate": 4e-05, + "loss": 0.5791, + "step": 400 + }, + { + "epoch": 6.58, + "learning_rate": 4.08e-05, + "loss": 0.5639, + "step": 408 + }, + { + "epoch": 6.71, + "learning_rate": 4.16e-05, + "loss": 0.5972, + "step": 416 + }, + { + "epoch": 6.84, + "learning_rate": 4.24e-05, + "loss": 0.5458, + "step": 424 + }, + { + "epoch": 6.97, + "learning_rate": 4.32e-05, + "loss": 0.5355, + "step": 432 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.703125, + "eval_loss": 0.5857735872268677, + "eval_macro_f1": 0.7031109673543163, + "eval_runtime": 5.8389, + "eval_samples_per_second": 274.024, + "eval_steps_per_second": 2.74, + "step": 434 + }, + { + "epoch": 7.1, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5233, + "step": 440 + }, + { + "epoch": 7.23, + "learning_rate": 4.4800000000000005e-05, + "loss": 0.4984, + "step": 448 + }, + { + "epoch": 7.35, + "learning_rate": 4.5600000000000004e-05, + "loss": 0.4636, + "step": 456 + }, + { + "epoch": 7.48, + "learning_rate": 4.64e-05, + "loss": 0.5201, + "step": 464 + }, + { + "epoch": 7.61, + "learning_rate": 4.72e-05, + "loss": 0.5419, + "step": 472 + }, + { + "epoch": 7.74, + "learning_rate": 4.8e-05, + "loss": 0.5032, + "step": 480 + }, + { + "epoch": 7.87, + "learning_rate": 4.88e-05, + "loss": 0.4703, + "step": 488 + }, + { + "epoch": 8.0, + "learning_rate": 4.96e-05, + "loss": 0.4611, + "step": 496 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7275, + "eval_loss": 0.5498225688934326, + "eval_macro_f1": 0.7273619770008567, + "eval_runtime": 5.7393, + "eval_samples_per_second": 278.778, + "eval_steps_per_second": 2.788, + "step": 496 + }, + { + "epoch": 8.13, + "learning_rate": 4.992307692307693e-05, + "loss": 0.4274, + "step": 504 + }, + { + "epoch": 8.26, + "learning_rate": 4.9769230769230775e-05, + "loss": 0.4369, + "step": 512 + }, + { + "epoch": 8.39, + "learning_rate": 4.961538461538462e-05, + "loss": 0.4427, + "step": 520 + }, + { + "epoch": 8.52, + "learning_rate": 4.9461538461538466e-05, + "loss": 0.416, + "step": 528 + }, + { + "epoch": 8.65, + "learning_rate": 4.930769230769231e-05, + "loss": 0.3764, + "step": 536 + }, + { + "epoch": 8.77, + "learning_rate": 4.9153846153846157e-05, + "loss": 0.3652, + "step": 544 + }, + { + "epoch": 8.9, + "learning_rate": 4.9e-05, + "loss": 0.3738, + "step": 552 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.759375, + "eval_loss": 0.4913812279701233, + "eval_macro_f1": 0.7589523191252161, + "eval_runtime": 5.797, + "eval_samples_per_second": 276.005, + "eval_steps_per_second": 2.76, + "step": 558 + }, + { + "epoch": 9.03, + "learning_rate": 4.884615384615385e-05, + "loss": 0.3838, + "step": 560 + }, + { + "epoch": 9.16, + "learning_rate": 4.8692307692307696e-05, + "loss": 0.3242, + "step": 568 + }, + { + "epoch": 9.29, + "learning_rate": 4.853846153846154e-05, + "loss": 0.3228, + "step": 576 + }, + { + "epoch": 9.42, + "learning_rate": 4.8384615384615386e-05, + "loss": 0.3785, + "step": 584 + }, + { + "epoch": 9.55, + "learning_rate": 4.8230769230769235e-05, + "loss": 0.3398, + "step": 592 + }, + { + "epoch": 9.68, + "learning_rate": 4.8076923076923084e-05, + "loss": 0.3217, + "step": 600 + }, + { + "epoch": 9.81, + "learning_rate": 4.7923076923076926e-05, + "loss": 0.3501, + "step": 608 + }, + { + "epoch": 9.94, + "learning_rate": 4.7769230769230774e-05, + "loss": 0.289, + "step": 616 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.755625, + "eval_loss": 0.5262109041213989, + "eval_macro_f1": 0.754796576402071, + "eval_runtime": 5.6035, + "eval_samples_per_second": 285.535, + "eval_steps_per_second": 2.855, + "step": 620 + }, + { + "epoch": 10.06, + "learning_rate": 4.7615384615384616e-05, + "loss": 0.3175, + "step": 624 + }, + { + "epoch": 10.19, + "learning_rate": 4.7461538461538465e-05, + "loss": 0.2702, + "step": 632 + }, + { + "epoch": 10.32, + "learning_rate": 4.730769230769231e-05, + "loss": 0.2641, + "step": 640 + }, + { + "epoch": 10.45, + "learning_rate": 4.7153846153846155e-05, + "loss": 0.2163, + "step": 648 + }, + { + "epoch": 10.58, + "learning_rate": 4.7e-05, + "loss": 0.2785, + "step": 656 + }, + { + "epoch": 10.71, + "learning_rate": 4.684615384615385e-05, + "loss": 0.3486, + "step": 664 + }, + { + "epoch": 10.84, + "learning_rate": 4.6692307692307695e-05, + "loss": 0.263, + "step": 672 + }, + { + "epoch": 10.97, + "learning_rate": 4.653846153846154e-05, + "loss": 0.2306, + "step": 680 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.783125, + "eval_loss": 0.5292184352874756, + "eval_macro_f1": 0.7768164914603767, + "eval_runtime": 5.6415, + "eval_samples_per_second": 283.612, + "eval_steps_per_second": 2.836, + "step": 682 + } + ], + "max_steps": 3100, + "num_train_epochs": 50, + "total_flos": 2793710184038400.0, + "trial_name": null, + "trial_params": null +} diff --git a/scaling_performance/8000/L6/fold3/config.json b/scaling_performance/8000/L6/fold3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..315d942e83135c8839ef6c5f099d4ed8632593c8 --- /dev/null +++ b/scaling_performance/8000/L6/fold3/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/8000/L6/fold3/eval_results.json b/scaling_performance/8000/L6/fold3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..605f92499785074ebb0606292374219f00cbe1c0 --- /dev/null +++ b/scaling_performance/8000/L6/fold3/eval_results.json @@ -0,0 +1,8 @@ +{ + "test_accuracy": 0.759375, + "test_loss": 0.4913812279701233, + "test_macro_f1": 0.7589523191252161, + "test_runtime": 5.8537, + "test_samples_per_second": 273.33, + "test_steps_per_second": 2.733 +} \ No newline at end of file diff --git a/scaling_performance/8000/L6/fold4/checkpoint-682/config.json b/scaling_performance/8000/L6/fold4/checkpoint-682/config.json new file mode 100644 index 0000000000000000000000000000000000000000..315d942e83135c8839ef6c5f099d4ed8632593c8 --- /dev/null +++ b/scaling_performance/8000/L6/fold4/checkpoint-682/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.02, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.02, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 512, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 2048, + "model_type": "bert", + "num_attention_heads": 4, + "num_hidden_layers": 6, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.28.0", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 30522 +} diff --git a/scaling_performance/8000/L6/fold4/checkpoint-682/trainer_state.json b/scaling_performance/8000/L6/fold4/checkpoint-682/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8cd0676830e308a68260f4ced6cd25582af5d840 --- /dev/null +++ b/scaling_performance/8000/L6/fold4/checkpoint-682/trainer_state.json @@ -0,0 +1,636 @@ +{ + "best_metric": 0.4765923321247101, + "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/5folds_allmodels/8000samples/L6/fold4/checkpoint-558", + "epoch": 11.0, + "global_step": 682, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13, + "learning_rate": 8.000000000000001e-07, + "loss": 0.6922, + "step": 8 + }, + { + "epoch": 0.26, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.6914, + "step": 16 + }, + { + "epoch": 0.39, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.6928, + "step": 24 + }, + { + "epoch": 0.52, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.6916, + "step": 32 + }, + { + "epoch": 0.65, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6955, + "step": 40 + }, + { + "epoch": 0.77, + "learning_rate": 4.800000000000001e-06, + "loss": 0.6921, + "step": 48 + }, + { + "epoch": 0.9, + "learning_rate": 5.600000000000001e-06, + "loss": 0.6932, + "step": 56 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.536875, + "eval_loss": 0.6912399530410767, + "eval_macro_f1": 0.4291698263098978, + "eval_runtime": 5.7778, + "eval_samples_per_second": 276.921, + "eval_steps_per_second": 2.769, + "step": 62 + }, + { + "epoch": 1.03, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.6919, + "step": 64 + }, + { + "epoch": 1.16, + "learning_rate": 7.2e-06, + "loss": 0.6919, + "step": 72 + }, + { + "epoch": 1.29, + "learning_rate": 8.000000000000001e-06, + "loss": 0.691, + "step": 80 + }, + { + "epoch": 1.42, + "learning_rate": 8.8e-06, + "loss": 0.6893, + "step": 88 + }, + { + "epoch": 1.55, + "learning_rate": 9.600000000000001e-06, + "loss": 0.6904, + "step": 96 + }, + { + "epoch": 1.68, + "learning_rate": 1.04e-05, + "loss": 0.6918, + "step": 104 + }, + { + "epoch": 1.81, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6872, + "step": 112 + }, + { + "epoch": 1.94, + "learning_rate": 1.2e-05, + "loss": 0.6958, + "step": 120 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5375, + "eval_loss": 0.6898691058158875, + "eval_macro_f1": 0.4175876717476895, + "eval_runtime": 5.6561, + "eval_samples_per_second": 282.882, + "eval_steps_per_second": 2.829, + "step": 124 + }, + { + "epoch": 2.06, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.6897, + "step": 128 + }, + { + "epoch": 2.19, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.6889, + "step": 136 + }, + { + "epoch": 2.32, + "learning_rate": 1.44e-05, + "loss": 0.693, + "step": 144 + }, + { + "epoch": 2.45, + "learning_rate": 1.52e-05, + "loss": 0.6888, + "step": 152 + }, + { + "epoch": 2.58, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6887, + "step": 160 + }, + { + "epoch": 2.71, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.6898, + "step": 168 + }, + { + "epoch": 2.84, + "learning_rate": 1.76e-05, + "loss": 0.6896, + "step": 176 + }, + { + "epoch": 2.97, + "learning_rate": 1.84e-05, + "loss": 0.6877, + "step": 184 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.544375, + "eval_loss": 0.6882508993148804, + "eval_macro_f1": 0.4689470135326542, + "eval_runtime": 6.1577, + "eval_samples_per_second": 259.837, + "eval_steps_per_second": 2.598, + "step": 186 + }, + { + "epoch": 3.1, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.6863, + "step": 192 + }, + { + "epoch": 3.23, + "learning_rate": 2e-05, + "loss": 0.6867, + "step": 200 + }, + { + "epoch": 3.35, + "learning_rate": 2.08e-05, + "loss": 0.6841, + "step": 208 + }, + { + "epoch": 3.48, + "learning_rate": 2.16e-05, + "loss": 0.6847, + "step": 216 + }, + { + "epoch": 3.61, + "learning_rate": 2.2400000000000002e-05, + "loss": 0.6873, + "step": 224 + }, + { + "epoch": 3.74, + "learning_rate": 2.32e-05, + "loss": 0.6843, + "step": 232 + }, + { + "epoch": 3.87, + "learning_rate": 2.4e-05, + "loss": 0.6851, + "step": 240 + }, + { + "epoch": 4.0, + "learning_rate": 2.48e-05, + "loss": 0.6818, + "step": 248 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.54875, + "eval_loss": 0.6853930950164795, + "eval_macro_f1": 0.4892753120849699, + "eval_runtime": 5.628, + "eval_samples_per_second": 284.295, + "eval_steps_per_second": 2.843, + "step": 248 + }, + { + "epoch": 4.13, + "learning_rate": 2.5600000000000002e-05, + "loss": 0.6805, + "step": 256 + }, + { + "epoch": 4.26, + "learning_rate": 2.64e-05, + "loss": 0.6778, + "step": 264 + }, + { + "epoch": 4.39, + "learning_rate": 2.7200000000000004e-05, + "loss": 0.6742, + "step": 272 + }, + { + "epoch": 4.52, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6733, + "step": 280 + }, + { + "epoch": 4.65, + "learning_rate": 2.88e-05, + "loss": 0.6747, + "step": 288 + }, + { + "epoch": 4.77, + "learning_rate": 2.96e-05, + "loss": 0.6772, + "step": 296 + }, + { + "epoch": 4.9, + "learning_rate": 3.04e-05, + "loss": 0.676, + "step": 304 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.505, + "eval_loss": 0.7046102285385132, + "eval_macro_f1": 0.44295081333049646, + "eval_runtime": 5.7971, + "eval_samples_per_second": 276.0, + "eval_steps_per_second": 2.76, + "step": 310 + }, + { + "epoch": 5.03, + "learning_rate": 3.12e-05, + "loss": 0.6738, + "step": 312 + }, + { + "epoch": 5.16, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6644, + "step": 320 + }, + { + "epoch": 5.29, + "learning_rate": 3.2800000000000004e-05, + "loss": 0.6634, + "step": 328 + }, + { + "epoch": 5.42, + "learning_rate": 3.3600000000000004e-05, + "loss": 0.6521, + "step": 336 + }, + { + "epoch": 5.55, + "learning_rate": 3.4399999999999996e-05, + "loss": 0.6552, + "step": 344 + }, + { + "epoch": 5.68, + "learning_rate": 3.52e-05, + "loss": 0.6489, + "step": 352 + }, + { + "epoch": 5.81, + "learning_rate": 3.6e-05, + "loss": 0.6551, + "step": 360 + }, + { + "epoch": 5.94, + "learning_rate": 3.68e-05, + "loss": 0.6438, + "step": 368 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5875, + "eval_loss": 0.6736068725585938, + "eval_macro_f1": 0.5832274299498452, + "eval_runtime": 5.6924, + "eval_samples_per_second": 281.078, + "eval_steps_per_second": 2.811, + "step": 372 + }, + { + "epoch": 6.06, + "learning_rate": 3.76e-05, + "loss": 0.652, + "step": 376 + }, + { + "epoch": 6.19, + "learning_rate": 3.8400000000000005e-05, + "loss": 0.6048, + "step": 384 + }, + { + "epoch": 6.32, + "learning_rate": 3.9200000000000004e-05, + "loss": 0.6292, + "step": 392 + }, + { + "epoch": 6.45, + "learning_rate": 4e-05, + "loss": 0.5919, + "step": 400 + }, + { + "epoch": 6.58, + "learning_rate": 4.08e-05, + "loss": 0.6041, + "step": 408 + }, + { + "epoch": 6.71, + "learning_rate": 4.16e-05, + "loss": 0.5826, + "step": 416 + }, + { + "epoch": 6.84, + "learning_rate": 4.24e-05, + "loss": 0.5706, + "step": 424 + }, + { + "epoch": 6.97, + "learning_rate": 4.32e-05, + "loss": 0.5847, + "step": 432 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.651875, + "eval_loss": 0.6167514324188232, + "eval_macro_f1": 0.6353099517579558, + "eval_runtime": 5.6393, + "eval_samples_per_second": 283.723, + "eval_steps_per_second": 2.837, + "step": 434 + }, + { + "epoch": 7.1, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5375, + "step": 440 + }, + { + "epoch": 7.23, + "learning_rate": 4.4800000000000005e-05, + "loss": 0.5206, + "step": 448 + }, + { + "epoch": 7.35, + "learning_rate": 4.5600000000000004e-05, + "loss": 0.5205, + "step": 456 + }, + { + "epoch": 7.48, + "learning_rate": 4.64e-05, + "loss": 0.4902, + "step": 464 + }, + { + "epoch": 7.61, + "learning_rate": 4.72e-05, + "loss": 0.5022, + "step": 472 + }, + { + "epoch": 7.74, + "learning_rate": 4.8e-05, + "loss": 0.4609, + "step": 480 + }, + { + "epoch": 7.87, + "learning_rate": 4.88e-05, + "loss": 0.4861, + "step": 488 + }, + { + "epoch": 8.0, + "learning_rate": 4.96e-05, + "loss": 0.4652, + "step": 496 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.744375, + "eval_loss": 0.5122479200363159, + "eval_macro_f1": 0.7404401332544817, + "eval_runtime": 5.7402, + "eval_samples_per_second": 278.736, + "eval_steps_per_second": 2.787, + "step": 496 + }, + { + "epoch": 8.13, + "learning_rate": 4.992307692307693e-05, + "loss": 0.3889, + "step": 504 + }, + { + "epoch": 8.26, + "learning_rate": 4.9769230769230775e-05, + "loss": 0.4058, + "step": 512 + }, + { + "epoch": 8.39, + "learning_rate": 4.961538461538462e-05, + "loss": 0.4642, + "step": 520 + }, + { + "epoch": 8.52, + "learning_rate": 4.9461538461538466e-05, + "loss": 0.3708, + "step": 528 + }, + { + "epoch": 8.65, + "learning_rate": 4.930769230769231e-05, + "loss": 0.4282, + "step": 536 + }, + { + "epoch": 8.77, + "learning_rate": 4.9153846153846157e-05, + "loss": 0.4161, + "step": 544 + }, + { + "epoch": 8.9, + "learning_rate": 4.9e-05, + "loss": 0.3991, + "step": 552 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.75625, + "eval_loss": 0.4765923321247101, + "eval_macro_f1": 0.7560483587215057, + "eval_runtime": 5.6547, + "eval_samples_per_second": 282.953, + "eval_steps_per_second": 2.83, + "step": 558 + }, + { + "epoch": 9.03, + "learning_rate": 4.884615384615385e-05, + "loss": 0.3795, + "step": 560 + }, + { + "epoch": 9.16, + "learning_rate": 4.8692307692307696e-05, + "loss": 0.3325, + "step": 568 + }, + { + "epoch": 9.29, + "learning_rate": 4.853846153846154e-05, + "loss": 0.3256, + "step": 576 + }, + { + "epoch": 9.42, + "learning_rate": 4.8384615384615386e-05, + "loss": 0.3337, + "step": 584 + }, + { + "epoch": 9.55, + "learning_rate": 4.8230769230769235e-05, + "loss": 0.3274, + "step": 592 + }, + { + "epoch": 9.68, + "learning_rate": 4.8076923076923084e-05, + "loss": 0.3205, + "step": 600 + }, + { + "epoch": 9.81, + "learning_rate": 4.7923076923076926e-05, + "loss": 0.2947, + "step": 608 + }, + { + "epoch": 9.94, + "learning_rate": 4.7769230769230774e-05, + "loss": 0.3874, + "step": 616 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.72125, + "eval_loss": 0.6052362322807312, + "eval_macro_f1": 0.7164581426101109, + "eval_runtime": 5.9135, + "eval_samples_per_second": 270.565, + "eval_steps_per_second": 2.706, + "step": 620 + }, + { + "epoch": 10.06, + "learning_rate": 4.7615384615384616e-05, + "loss": 0.3583, + "step": 624 + }, + { + "epoch": 10.19, + "learning_rate": 4.7461538461538465e-05, + "loss": 0.2576, + "step": 632 + }, + { + "epoch": 10.32, + "learning_rate": 4.730769230769231e-05, + "loss": 0.2726, + "step": 640 + }, + { + "epoch": 10.45, + "learning_rate": 4.7153846153846155e-05, + "loss": 0.3178, + "step": 648 + }, + { + "epoch": 10.58, + "learning_rate": 4.7e-05, + "loss": 0.3419, + "step": 656 + }, + { + "epoch": 10.71, + "learning_rate": 4.684615384615385e-05, + "loss": 0.2767, + "step": 664 + }, + { + "epoch": 10.84, + "learning_rate": 4.6692307692307695e-05, + "loss": 0.2723, + "step": 672 + }, + { + "epoch": 10.97, + "learning_rate": 4.653846153846154e-05, + "loss": 0.2386, + "step": 680 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.7775, + "eval_loss": 0.5208433866500854, + "eval_macro_f1": 0.7774996523432067, + "eval_runtime": 5.798, + "eval_samples_per_second": 275.956, + "eval_steps_per_second": 2.76, + "step": 682 + } + ], + "max_steps": 3100, + "num_train_epochs": 50, + "total_flos": 2793710184038400.0, + "trial_name": null, + "trial_params": null +}