diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/README.md b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/adapter_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5f407fbf6673901d6bbc4be088ec92bf34bb74 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "facebook/esm2_t33_650M_UR50D", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 1, + "lora_dropout": 0.5, + "modules_to_save": null, + "peft_type": "LORA", + "r": 2, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query", + "key", + "value", + "EsmSelfOutput.dense", + "EsmIntermediate.dense", + "EsmOutput.dense", + "classifier" + ], + "task_type": "TOKEN_CLS" +} \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/adapter_model.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b55b8589bce7a88e82b5734f90e333642abfac74 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ed50a68c549fed28f1e2f2bc5b0c8a52fabaa8186ad7f5394582d716aee2ba +size 2133117 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/added_tokens.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..6527dabc1b083db2af29d5fad6902c18bf831b21 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 0, + "": 2, + "": 32, + "": 1, + "": 3 +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/optimizer.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5a97ab363daeaf720dd159ed17c0c374d398f6f --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2974e9f434d3dc3a520d6f9eaa69474d0b34f5f18d4d7d13c9517f1715f00e +size 4215045 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/rng_state.pth b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..23dfa768856d4606074a7bd6ef1da6118b71e537 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b035daa3bac4ead994af5800c9d0816bbd69afcbb567b553cd55a2e6ace5c861 +size 14575 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/scheduler.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2edb10e6fbc6cf3fed6b5e09b0cd1fb42445df1f --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef2c8960e6680e2c188e8caf9b0a9a659698c51b86a9fe73ed60cafe81dd580 +size 627 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/special_tokens_map.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ba0f9b53dbbf27934f7555e5d31e37bdea9317f1 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/tokenizer_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed0725465a1b7a8e4469cd162fbea147da56a1c2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "EsmTokenizer", + "tokenizer_file": null, + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/trainer_state.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..40f4dc6ae3a52b02b6745e850dfa4e99886a1d30 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/trainer_state.json @@ -0,0 +1,619 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.19692792437967704, + "eval_steps": 500, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00017015520980041642, + "loss": 0.5575, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017015035788629153, + "loss": 0.3065, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017014224985982174, + "loss": 0.2229, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001701308860313402, + "loss": 0.1812, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017011626683579524, + "loss": 0.1572, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017009839283273364, + "loss": 0.1335, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017007726470627936, + "loss": 0.1193, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 0.00017005288326510734, + "loss": 0.1075, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001700252494424124, + "loss": 0.0986, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016999436429587366, + "loss": 0.0952, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016996022900761407, + "loss": 0.0865, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 0.000169922844884155, + "loss": 0.0816, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016988221335636648, + "loss": 0.0775, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016983833597941224, + "loss": 0.0744, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016979121443269025, + "loss": 0.0694, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001697408505197684, + "loss": 0.0641, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016968724616831557, + "loss": 0.0629, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001696304034300278, + "loss": 0.0696, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016957032448054968, + "loss": 0.0591, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001695070116193912, + "loss": 0.06, + "step": 2000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001694404672698396, + "loss": 0.0558, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016937069397886687, + "loss": 0.0529, + "step": 2200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016929769441703196, + "loss": 0.0536, + "step": 2300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016922147137837868, + "loss": 0.0537, + "step": 2400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016914202778032893, + "loss": 0.0508, + "step": 2500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001690593666635707, + "loss": 0.0524, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016897349119194207, + "loss": 0.0494, + "step": 2700 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016888440465230977, + "loss": 0.047, + "step": 2800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016879211045444354, + "loss": 0.0502, + "step": 2900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016869661213088575, + "loss": 0.046, + "step": 3000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016859791333681583, + "loss": 0.0419, + "step": 3100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001684960178499108, + "loss": 0.0456, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016839092957020028, + "loss": 0.0432, + "step": 3300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016828265251991761, + "loss": 0.0423, + "step": 3400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016817119084334555, + "loss": 0.0407, + "step": 3500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016805654880665776, + "loss": 0.0409, + "step": 3600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016793873079775577, + "loss": 0.0397, + "step": 3700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016781774132610059, + "loss": 0.0419, + "step": 3800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001676935850225405, + "loss": 0.0399, + "step": 3900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016756626663913358, + "loss": 0.0389, + "step": 4000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016743579104896593, + "loss": 0.0385, + "step": 4100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016730216324596504, + "loss": 0.0357, + "step": 4200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001671653883447088, + "loss": 0.0373, + "step": 4300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016702547158022968, + "loss": 0.0375, + "step": 4400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001668824183078143, + "loss": 0.0393, + "step": 4500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016673623400279849, + "loss": 0.0358, + "step": 4600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016658692426035782, + "loss": 0.0334, + "step": 4700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016643449479529325, + "loss": 0.035, + "step": 4800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016627895144181258, + "loss": 0.0343, + "step": 4900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001661203001533071, + "loss": 0.0335, + "step": 5000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016595854700212362, + "loss": 0.0353, + "step": 5100 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001657936981793322, + "loss": 0.0331, + "step": 5200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001656257599944891, + "loss": 0.0387, + "step": 5300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016545473887539532, + "loss": 0.0327, + "step": 5400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016528064136785056, + "loss": 0.0316, + "step": 5500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016510347413540262, + "loss": 0.0319, + "step": 5600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001649232439590925, + "loss": 0.0314, + "step": 5700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001647399577371947, + "loss": 0.0321, + "step": 5800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016455362248495338, + "loss": 0.0333, + "step": 5900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016436424533431362, + "loss": 0.0319, + "step": 6000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001641718335336486, + "loss": 0.0315, + "step": 6100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001639763944474821, + "loss": 0.0311, + "step": 6200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001637779355562068, + "loss": 0.031, + "step": 6300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016357646445579763, + "loss": 0.0299, + "step": 6400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016337198885752133, + "loss": 0.0317, + "step": 6500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016316451658764122, + "loss": 0.0302, + "step": 6600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001629540555871176, + "loss": 0.0295, + "step": 6700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016274061391130388, + "loss": 0.03, + "step": 6800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001625241997296382, + "loss": 0.0292, + "step": 6900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016230482132533077, + "loss": 0.0289, + "step": 7000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016208472504084003, + "loss": 0.0318, + "step": 7100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001618594729250462, + "loss": 0.0301, + "step": 7200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016163128202889828, + "loss": 0.0295, + "step": 7300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016140016108635798, + "loss": 0.029, + "step": 7400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016116611894353386, + "loss": 0.0291, + "step": 7500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016092916455834295, + "loss": 0.0311, + "step": 7600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016068930700016766, + "loss": 0.0285, + "step": 7700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016044655544950889, + "loss": 0.0287, + "step": 7800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00016020091919763445, + "loss": 0.0293, + "step": 7900 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015995240764622357, + "loss": 0.0259, + "step": 8000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000159701030307007, + "loss": 0.0293, + "step": 8100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015944679680140295, + "loss": 0.0277, + "step": 8200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001591897168601488, + "loss": 0.0304, + "step": 8300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015892980032292876, + "loss": 0.026, + "step": 8400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015866705713799714, + "loss": 0.0294, + "step": 8500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015840149736179762, + "loss": 0.0321, + "step": 8600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001581331311585785, + "loss": 0.0279, + "step": 8700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015786196880000325, + "loss": 0.0277, + "step": 8800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001575880206647579, + "loss": 0.0268, + "step": 8900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015731129723815343, + "loss": 0.0281, + "step": 9000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015703180911172453, + "loss": 0.028, + "step": 9100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001567495669828243, + "loss": 0.026, + "step": 9200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001564645816542146, + "loss": 0.0256, + "step": 9300 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001561768640336529, + "loss": 0.027, + "step": 9400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001558864251334745, + "loss": 0.0249, + "step": 9500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015559327607017119, + "loss": 0.0256, + "step": 9600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015529742806396564, + "loss": 0.0251, + "step": 9700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015499889243838211, + "loss": 0.0257, + "step": 9800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015469768061981295, + "loss": 0.0264, + "step": 9900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015439685605389073, + "loss": 0.0247, + "step": 10000 + } + ], + "logging_steps": 100, + "max_steps": 50780, + "num_train_epochs": 1, + "save_steps": 10000, + "total_flos": 3.992837363712e+18, + "trial_name": null, + "trial_params": null +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/training_args.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..100b6c56d3e6f8a91a89702971e1e66650fb8075 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d17a2c3c8a65881aa45bfd2997dfe652a61cb20d8b49594dc95bff2d7669f6f +size 4155 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/vocab.txt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b946952cc35537226f07fd70957ee2f848880d2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-10000/vocab.txt @@ -0,0 +1,33 @@ + + + + +L +A +G +V +S +E +R +T +I +D +P +K +Q +N +F +Y +M +H +W +C +X +B +U +Z +O +. +- + + \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/README.md b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/adapter_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5f407fbf6673901d6bbc4be088ec92bf34bb74 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "facebook/esm2_t33_650M_UR50D", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 1, + "lora_dropout": 0.5, + "modules_to_save": null, + "peft_type": "LORA", + "r": 2, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query", + "key", + "value", + "EsmSelfOutput.dense", + "EsmIntermediate.dense", + "EsmOutput.dense", + "classifier" + ], + "task_type": "TOKEN_CLS" +} \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/adapter_model.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e39349d061867174318a1c8c0de23db9647d527e --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d0569a2930474e7eb8b4baa3794f4dc925230d27ff471fa125b41c55ec90002 +size 2133117 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/added_tokens.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..6527dabc1b083db2af29d5fad6902c18bf831b21 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 0, + "": 2, + "": 32, + "": 1, + "": 3 +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/optimizer.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1b1126ea6a275e1f909d386defbb1a0b32654a2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba123d3d5300c04df61a19f973041d9aba2f60b21e2fa6150a534ca5a07fd298 +size 4215045 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/rng_state.pth b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e2c9c29006e598265858a91f00c747e02c0d5619 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4b33fd771a590bf68f30d331d8b8f0b3cae0df4abde1370ad68d21d8531255 +size 14575 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/scheduler.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..250fe69af691150288d4678597a1fa20196bf8c3 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4cc968ce155a88f3501215aa8a7e29778cb7885b3150bad2441fdd48871663 +size 627 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/special_tokens_map.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ba0f9b53dbbf27934f7555e5d31e37bdea9317f1 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/tokenizer_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed0725465a1b7a8e4469cd162fbea147da56a1c2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "EsmTokenizer", + "tokenizer_file": null, + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/trainer_state.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..851844e8d9b96d58975df0afd574d9f0941c02c8 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/trainer_state.json @@ -0,0 +1,1219 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3938558487593541, + "eval_steps": 500, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00017015520980041642, + "loss": 0.5575, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017015035788629153, + "loss": 0.3065, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017014224985982174, + "loss": 0.2229, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001701308860313402, + "loss": 0.1812, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017011626683579524, + "loss": 0.1572, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017009839283273364, + "loss": 0.1335, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017007726470627936, + "loss": 0.1193, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 0.00017005288326510734, + "loss": 0.1075, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001700252494424124, + "loss": 0.0986, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016999436429587366, + "loss": 0.0952, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016996022900761407, + "loss": 0.0865, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 0.000169922844884155, + "loss": 0.0816, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016988221335636648, + "loss": 0.0775, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016983833597941224, + "loss": 0.0744, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016979121443269025, + "loss": 0.0694, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001697408505197684, + "loss": 0.0641, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016968724616831557, + "loss": 0.0629, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001696304034300278, + "loss": 0.0696, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016957032448054968, + "loss": 0.0591, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001695070116193912, + "loss": 0.06, + "step": 2000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001694404672698396, + "loss": 0.0558, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016937069397886687, + "loss": 0.0529, + "step": 2200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016929769441703196, + "loss": 0.0536, + "step": 2300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016922147137837868, + "loss": 0.0537, + "step": 2400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016914202778032893, + "loss": 0.0508, + "step": 2500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001690593666635707, + "loss": 0.0524, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016897349119194207, + "loss": 0.0494, + "step": 2700 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016888440465230977, + "loss": 0.047, + "step": 2800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016879211045444354, + "loss": 0.0502, + "step": 2900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016869661213088575, + "loss": 0.046, + "step": 3000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016859791333681583, + "loss": 0.0419, + "step": 3100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001684960178499108, + "loss": 0.0456, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016839092957020028, + "loss": 0.0432, + "step": 3300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016828265251991761, + "loss": 0.0423, + "step": 3400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016817119084334555, + "loss": 0.0407, + "step": 3500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016805654880665776, + "loss": 0.0409, + "step": 3600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016793873079775577, + "loss": 0.0397, + "step": 3700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016781774132610059, + "loss": 0.0419, + "step": 3800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001676935850225405, + "loss": 0.0399, + "step": 3900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016756626663913358, + "loss": 0.0389, + "step": 4000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016743579104896593, + "loss": 0.0385, + "step": 4100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016730216324596504, + "loss": 0.0357, + "step": 4200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001671653883447088, + "loss": 0.0373, + "step": 4300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016702547158022968, + "loss": 0.0375, + "step": 4400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001668824183078143, + "loss": 0.0393, + "step": 4500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016673623400279849, + "loss": 0.0358, + "step": 4600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016658692426035782, + "loss": 0.0334, + "step": 4700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016643449479529325, + "loss": 0.035, + "step": 4800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016627895144181258, + "loss": 0.0343, + "step": 4900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001661203001533071, + "loss": 0.0335, + "step": 5000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016595854700212362, + "loss": 0.0353, + "step": 5100 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001657936981793322, + "loss": 0.0331, + "step": 5200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001656257599944891, + "loss": 0.0387, + "step": 5300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016545473887539532, + "loss": 0.0327, + "step": 5400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016528064136785056, + "loss": 0.0316, + "step": 5500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016510347413540262, + "loss": 0.0319, + "step": 5600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001649232439590925, + "loss": 0.0314, + "step": 5700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001647399577371947, + "loss": 0.0321, + "step": 5800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016455362248495338, + "loss": 0.0333, + "step": 5900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016436424533431362, + "loss": 0.0319, + "step": 6000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001641718335336486, + "loss": 0.0315, + "step": 6100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001639763944474821, + "loss": 0.0311, + "step": 6200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001637779355562068, + "loss": 0.031, + "step": 6300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016357646445579763, + "loss": 0.0299, + "step": 6400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016337198885752133, + "loss": 0.0317, + "step": 6500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016316451658764122, + "loss": 0.0302, + "step": 6600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001629540555871176, + "loss": 0.0295, + "step": 6700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016274061391130388, + "loss": 0.03, + "step": 6800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001625241997296382, + "loss": 0.0292, + "step": 6900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016230482132533077, + "loss": 0.0289, + "step": 7000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016208472504084003, + "loss": 0.0318, + "step": 7100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001618594729250462, + "loss": 0.0301, + "step": 7200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016163128202889828, + "loss": 0.0295, + "step": 7300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016140016108635798, + "loss": 0.029, + "step": 7400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016116611894353386, + "loss": 0.0291, + "step": 7500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016092916455834295, + "loss": 0.0311, + "step": 7600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016068930700016766, + "loss": 0.0285, + "step": 7700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016044655544950889, + "loss": 0.0287, + "step": 7800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00016020091919763445, + "loss": 0.0293, + "step": 7900 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015995240764622357, + "loss": 0.0259, + "step": 8000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000159701030307007, + "loss": 0.0293, + "step": 8100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015944679680140295, + "loss": 0.0277, + "step": 8200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001591897168601488, + "loss": 0.0304, + "step": 8300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015892980032292876, + "loss": 0.026, + "step": 8400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015866705713799714, + "loss": 0.0294, + "step": 8500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015840149736179762, + "loss": 0.0321, + "step": 8600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001581331311585785, + "loss": 0.0279, + "step": 8700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015786196880000325, + "loss": 0.0277, + "step": 8800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001575880206647579, + "loss": 0.0268, + "step": 8900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015731129723815343, + "loss": 0.0281, + "step": 9000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015703180911172453, + "loss": 0.028, + "step": 9100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001567495669828243, + "loss": 0.026, + "step": 9200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001564645816542146, + "loss": 0.0256, + "step": 9300 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001561768640336529, + "loss": 0.027, + "step": 9400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001558864251334745, + "loss": 0.0249, + "step": 9500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015559327607017119, + "loss": 0.0256, + "step": 9600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015529742806396564, + "loss": 0.0251, + "step": 9700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015499889243838211, + "loss": 0.0257, + "step": 9800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015469768061981295, + "loss": 0.0264, + "step": 9900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015439685605389073, + "loss": 0.0247, + "step": 10000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015409035301023626, + "loss": 0.0256, + "step": 10100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015378120854776404, + "loss": 0.0247, + "step": 10200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015346943449891755, + "loss": 0.0254, + "step": 10300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015315504279678714, + "loss": 0.0246, + "step": 10400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015283804547465337, + "loss": 0.0249, + "step": 10500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001525184546655264, + "loss": 0.026, + "step": 10600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001521962826016816, + "loss": 0.0254, + "step": 10700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015187154161419122, + "loss": 0.0272, + "step": 10800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001515442441324528, + "loss": 0.0253, + "step": 10900 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015121440268371297, + "loss": 0.0263, + "step": 11000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015088202989258835, + "loss": 0.0256, + "step": 11100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015054713848058212, + "loss": 0.023, + "step": 11200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015020974126559716, + "loss": 0.0236, + "step": 11300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00014986985116144554, + "loss": 0.0241, + "step": 11400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014952748117735409, + "loss": 0.0235, + "step": 11500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001491826444174666, + "loss": 0.0242, + "step": 11600 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014883535408034227, + "loss": 0.0233, + "step": 11700 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014848562345845032, + "loss": 0.0228, + "step": 11800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014813346593766164, + "loss": 0.023, + "step": 11900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001477788949967359, + "loss": 0.0236, + "step": 12000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014742192420680626, + "loss": 0.0237, + "step": 12100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014706256723085937, + "loss": 0.0237, + "step": 12200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001467008378232128, + "loss": 0.024, + "step": 12300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014633674982898854, + "loss": 0.0227, + "step": 12400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014597031718358285, + "loss": 0.0247, + "step": 12500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014560155391213318, + "loss": 0.0233, + "step": 12600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001452304741289812, + "loss": 0.0234, + "step": 12700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014485709203713263, + "loss": 0.0221, + "step": 12800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014448142192771354, + "loss": 0.0244, + "step": 12900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014410347817942347, + "loss": 0.0242, + "step": 13000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014372327525798503, + "loss": 0.0231, + "step": 13100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014334082771559026, + "loss": 0.0238, + "step": 13200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014295615019034358, + "loss": 0.0233, + "step": 13300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001425692574057016, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014218016416990954, + "loss": 0.0249, + "step": 13500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001417928089327897, + "loss": 0.0265, + "step": 13600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014139938118717103, + "loss": 0.0206, + "step": 13700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014100379776718443, + "loss": 0.0216, + "step": 13800 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014060607381370744, + "loss": 0.0239, + "step": 13900 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014020622454954606, + "loss": 0.024, + "step": 14000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013980426527885237, + "loss": 0.0234, + "step": 14100 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013940021138653824, + "loss": 0.0228, + "step": 14200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013899407833768695, + "loss": 0.0228, + "step": 14300 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001385858816769611, + "loss": 0.0234, + "step": 14400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001381756370280075, + "loss": 0.0233, + "step": 14500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013776336009285937, + "loss": 0.0216, + "step": 14600 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013734906665133537, + "loss": 0.0268, + "step": 14700 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001369327725604354, + "loss": 0.022, + "step": 14800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013651449375373396, + "loss": 0.0214, + "step": 14900 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013609424624077, + "loss": 0.0235, + "step": 15000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001356720461064345, + "loss": 0.0225, + "step": 15100 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013524790951035441, + "loss": 0.0218, + "step": 15200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013482185268627465, + "loss": 0.0225, + "step": 15300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013439389194143625, + "loss": 0.0216, + "step": 15400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013396404365595253, + "loss": 0.0223, + "step": 15500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013353232428218212, + "loss": 0.0212, + "step": 15600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013309875034409903, + "loss": 0.023, + "step": 15700 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001326633384366605, + "loss": 0.0214, + "step": 15800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001322261052251716, + "loss": 0.021, + "step": 15900 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013178706744464749, + "loss": 0.0211, + "step": 16000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013134624189917282, + "loss": 0.0212, + "step": 16100 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013090808013598785, + "loss": 0.0252, + "step": 16200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013046374720137184, + "loss": 0.0214, + "step": 16300 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013001767715162726, + "loss": 0.0202, + "step": 16400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00012956988705999754, + "loss": 0.0205, + "step": 16500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012912039406556028, + "loss": 0.0235, + "step": 16600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012866921537257149, + "loss": 0.0215, + "step": 16700 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012821636824980682, + "loss": 0.0198, + "step": 16800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012776187002990082, + "loss": 0.0232, + "step": 16900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012730573810868347, + "loss": 0.0235, + "step": 17000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012684798994451428, + "loss": 0.0207, + "step": 17100 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012638864305761417, + "loss": 0.0218, + "step": 17200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012592771502939492, + "loss": 0.0204, + "step": 17300 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001254652235017861, + "loss": 0.0212, + "step": 17400 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012500118617655992, + "loss": 0.0207, + "step": 17500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012453562081465375, + "loss": 0.0207, + "step": 17600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001240685452354902, + "loss": 0.0204, + "step": 17700 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012359997731629517, + "loss": 0.0234, + "step": 17800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012312993499141364, + "loss": 0.02, + "step": 17900 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001226584362516231, + "loss": 0.0191, + "step": 18000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012218549914344497, + "loss": 0.0196, + "step": 18100 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012171114176845411, + "loss": 0.0203, + "step": 18200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001212353822825857, + "loss": 0.0224, + "step": 18300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012075823889544048, + "loss": 0.0211, + "step": 18400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012027972986958772, + "loss": 0.0188, + "step": 18500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001197998735198662, + "loss": 0.0214, + "step": 18600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011931868821268337, + "loss": 0.0201, + "step": 18700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011883619236531218, + "loss": 0.0206, + "step": 18800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011835240444518623, + "loss": 0.0206, + "step": 18900 + }, + { + "epoch": 0.37, + "learning_rate": 0.000117867342969193, + "loss": 0.0197, + "step": 19000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011738102650296509, + "loss": 0.0198, + "step": 19100 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011689347366016955, + "loss": 0.0211, + "step": 19200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001164047031017955, + "loss": 0.0204, + "step": 19300 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011591473353543992, + "loss": 0.0206, + "step": 19400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011542358371459156, + "loss": 0.0192, + "step": 19500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001149312724379132, + "loss": 0.0202, + "step": 19600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011443781854852212, + "loss": 0.0197, + "step": 19700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001139432409332688, + "loss": 0.021, + "step": 19800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011344755852201423, + "loss": 0.022, + "step": 19900 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011295079028690513, + "loss": 0.0196, + "step": 20000 + } + ], + "logging_steps": 100, + "max_steps": 50780, + "num_train_epochs": 1, + "save_steps": 10000, + "total_flos": 7.985674727424e+18, + "trial_name": null, + "trial_params": null +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/training_args.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..100b6c56d3e6f8a91a89702971e1e66650fb8075 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d17a2c3c8a65881aa45bfd2997dfe652a61cb20d8b49594dc95bff2d7669f6f +size 4155 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/vocab.txt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b946952cc35537226f07fd70957ee2f848880d2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-20000/vocab.txt @@ -0,0 +1,33 @@ + + + + +L +A +G +V +S +E +R +T +I +D +P +K +Q +N +F +Y +M +H +W +C +X +B +U +Z +O +. +- + + \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/README.md b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/adapter_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5f407fbf6673901d6bbc4be088ec92bf34bb74 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "facebook/esm2_t33_650M_UR50D", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 1, + "lora_dropout": 0.5, + "modules_to_save": null, + "peft_type": "LORA", + "r": 2, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query", + "key", + "value", + "EsmSelfOutput.dense", + "EsmIntermediate.dense", + "EsmOutput.dense", + "classifier" + ], + "task_type": "TOKEN_CLS" +} \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/adapter_model.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1baacd6ad898dbe8d03a48ca3908e1fdcc634b48 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e69ef69f9b32fffd90f8b84442da5df6abcbe5b65c8920aa28b43bdf626b909 +size 2133117 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/added_tokens.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..6527dabc1b083db2af29d5fad6902c18bf831b21 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 0, + "": 2, + "": 32, + "": 1, + "": 3 +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/optimizer.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d17f4208c7bd5c7b72f99d139eadb54837bdf43e --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10e147fd00c89350f232720ea73d96536d1350be6ee6e230f682e6f9187feb4 +size 4215045 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/rng_state.pth b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac6a342eba262a4e881a12ab6d2383088911ed92 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:601d282addfa26c7c4f37873b2314e1b151fe139d35103c2187ee704954f71d4 +size 14575 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/scheduler.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0df26104c866c4db85024a27d77afe555e7cfc5c --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1697708f007c343ee0b9950cd53955c3e22105d9fb14648f4201068a99d7e02d +size 627 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/special_tokens_map.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ba0f9b53dbbf27934f7555e5d31e37bdea9317f1 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/tokenizer_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed0725465a1b7a8e4469cd162fbea147da56a1c2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "EsmTokenizer", + "tokenizer_file": null, + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/trainer_state.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0ab0dba0ab6b0c4dd0da9a04f8991629651b5bed --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/trainer_state.json @@ -0,0 +1,1819 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5907837731390311, + "eval_steps": 500, + "global_step": 30000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00017015520980041642, + "loss": 0.5575, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017015035788629153, + "loss": 0.3065, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017014224985982174, + "loss": 0.2229, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001701308860313402, + "loss": 0.1812, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017011626683579524, + "loss": 0.1572, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017009839283273364, + "loss": 0.1335, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017007726470627936, + "loss": 0.1193, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 0.00017005288326510734, + "loss": 0.1075, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001700252494424124, + "loss": 0.0986, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016999436429587366, + "loss": 0.0952, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016996022900761407, + "loss": 0.0865, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 0.000169922844884155, + "loss": 0.0816, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016988221335636648, + "loss": 0.0775, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016983833597941224, + "loss": 0.0744, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016979121443269025, + "loss": 0.0694, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001697408505197684, + "loss": 0.0641, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016968724616831557, + "loss": 0.0629, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001696304034300278, + "loss": 0.0696, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016957032448054968, + "loss": 0.0591, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001695070116193912, + "loss": 0.06, + "step": 2000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001694404672698396, + "loss": 0.0558, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016937069397886687, + "loss": 0.0529, + "step": 2200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016929769441703196, + "loss": 0.0536, + "step": 2300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016922147137837868, + "loss": 0.0537, + "step": 2400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016914202778032893, + "loss": 0.0508, + "step": 2500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001690593666635707, + "loss": 0.0524, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016897349119194207, + "loss": 0.0494, + "step": 2700 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016888440465230977, + "loss": 0.047, + "step": 2800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016879211045444354, + "loss": 0.0502, + "step": 2900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016869661213088575, + "loss": 0.046, + "step": 3000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016859791333681583, + "loss": 0.0419, + "step": 3100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001684960178499108, + "loss": 0.0456, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016839092957020028, + "loss": 0.0432, + "step": 3300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016828265251991761, + "loss": 0.0423, + "step": 3400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016817119084334555, + "loss": 0.0407, + "step": 3500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016805654880665776, + "loss": 0.0409, + "step": 3600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016793873079775577, + "loss": 0.0397, + "step": 3700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016781774132610059, + "loss": 0.0419, + "step": 3800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001676935850225405, + "loss": 0.0399, + "step": 3900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016756626663913358, + "loss": 0.0389, + "step": 4000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016743579104896593, + "loss": 0.0385, + "step": 4100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016730216324596504, + "loss": 0.0357, + "step": 4200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001671653883447088, + "loss": 0.0373, + "step": 4300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016702547158022968, + "loss": 0.0375, + "step": 4400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001668824183078143, + "loss": 0.0393, + "step": 4500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016673623400279849, + "loss": 0.0358, + "step": 4600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016658692426035782, + "loss": 0.0334, + "step": 4700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016643449479529325, + "loss": 0.035, + "step": 4800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016627895144181258, + "loss": 0.0343, + "step": 4900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001661203001533071, + "loss": 0.0335, + "step": 5000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016595854700212362, + "loss": 0.0353, + "step": 5100 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001657936981793322, + "loss": 0.0331, + "step": 5200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001656257599944891, + "loss": 0.0387, + "step": 5300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016545473887539532, + "loss": 0.0327, + "step": 5400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016528064136785056, + "loss": 0.0316, + "step": 5500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016510347413540262, + "loss": 0.0319, + "step": 5600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001649232439590925, + "loss": 0.0314, + "step": 5700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001647399577371947, + "loss": 0.0321, + "step": 5800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016455362248495338, + "loss": 0.0333, + "step": 5900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016436424533431362, + "loss": 0.0319, + "step": 6000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001641718335336486, + "loss": 0.0315, + "step": 6100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001639763944474821, + "loss": 0.0311, + "step": 6200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001637779355562068, + "loss": 0.031, + "step": 6300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016357646445579763, + "loss": 0.0299, + "step": 6400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016337198885752133, + "loss": 0.0317, + "step": 6500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016316451658764122, + "loss": 0.0302, + "step": 6600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001629540555871176, + "loss": 0.0295, + "step": 6700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016274061391130388, + "loss": 0.03, + "step": 6800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001625241997296382, + "loss": 0.0292, + "step": 6900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016230482132533077, + "loss": 0.0289, + "step": 7000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016208472504084003, + "loss": 0.0318, + "step": 7100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001618594729250462, + "loss": 0.0301, + "step": 7200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016163128202889828, + "loss": 0.0295, + "step": 7300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016140016108635798, + "loss": 0.029, + "step": 7400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016116611894353386, + "loss": 0.0291, + "step": 7500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016092916455834295, + "loss": 0.0311, + "step": 7600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016068930700016766, + "loss": 0.0285, + "step": 7700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016044655544950889, + "loss": 0.0287, + "step": 7800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00016020091919763445, + "loss": 0.0293, + "step": 7900 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015995240764622357, + "loss": 0.0259, + "step": 8000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000159701030307007, + "loss": 0.0293, + "step": 8100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015944679680140295, + "loss": 0.0277, + "step": 8200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001591897168601488, + "loss": 0.0304, + "step": 8300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015892980032292876, + "loss": 0.026, + "step": 8400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015866705713799714, + "loss": 0.0294, + "step": 8500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015840149736179762, + "loss": 0.0321, + "step": 8600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001581331311585785, + "loss": 0.0279, + "step": 8700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015786196880000325, + "loss": 0.0277, + "step": 8800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001575880206647579, + "loss": 0.0268, + "step": 8900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015731129723815343, + "loss": 0.0281, + "step": 9000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015703180911172453, + "loss": 0.028, + "step": 9100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001567495669828243, + "loss": 0.026, + "step": 9200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001564645816542146, + "loss": 0.0256, + "step": 9300 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001561768640336529, + "loss": 0.027, + "step": 9400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001558864251334745, + "loss": 0.0249, + "step": 9500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015559327607017119, + "loss": 0.0256, + "step": 9600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015529742806396564, + "loss": 0.0251, + "step": 9700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015499889243838211, + "loss": 0.0257, + "step": 9800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015469768061981295, + "loss": 0.0264, + "step": 9900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015439685605389073, + "loss": 0.0247, + "step": 10000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015409035301023626, + "loss": 0.0256, + "step": 10100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015378120854776404, + "loss": 0.0247, + "step": 10200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015346943449891755, + "loss": 0.0254, + "step": 10300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015315504279678714, + "loss": 0.0246, + "step": 10400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015283804547465337, + "loss": 0.0249, + "step": 10500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001525184546655264, + "loss": 0.026, + "step": 10600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001521962826016816, + "loss": 0.0254, + "step": 10700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015187154161419122, + "loss": 0.0272, + "step": 10800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001515442441324528, + "loss": 0.0253, + "step": 10900 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015121440268371297, + "loss": 0.0263, + "step": 11000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015088202989258835, + "loss": 0.0256, + "step": 11100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015054713848058212, + "loss": 0.023, + "step": 11200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015020974126559716, + "loss": 0.0236, + "step": 11300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00014986985116144554, + "loss": 0.0241, + "step": 11400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014952748117735409, + "loss": 0.0235, + "step": 11500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001491826444174666, + "loss": 0.0242, + "step": 11600 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014883535408034227, + "loss": 0.0233, + "step": 11700 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014848562345845032, + "loss": 0.0228, + "step": 11800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014813346593766164, + "loss": 0.023, + "step": 11900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001477788949967359, + "loss": 0.0236, + "step": 12000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014742192420680626, + "loss": 0.0237, + "step": 12100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014706256723085937, + "loss": 0.0237, + "step": 12200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001467008378232128, + "loss": 0.024, + "step": 12300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014633674982898854, + "loss": 0.0227, + "step": 12400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014597031718358285, + "loss": 0.0247, + "step": 12500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014560155391213318, + "loss": 0.0233, + "step": 12600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001452304741289812, + "loss": 0.0234, + "step": 12700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014485709203713263, + "loss": 0.0221, + "step": 12800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014448142192771354, + "loss": 0.0244, + "step": 12900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014410347817942347, + "loss": 0.0242, + "step": 13000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014372327525798503, + "loss": 0.0231, + "step": 13100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014334082771559026, + "loss": 0.0238, + "step": 13200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014295615019034358, + "loss": 0.0233, + "step": 13300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001425692574057016, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014218016416990954, + "loss": 0.0249, + "step": 13500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001417928089327897, + "loss": 0.0265, + "step": 13600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014139938118717103, + "loss": 0.0206, + "step": 13700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014100379776718443, + "loss": 0.0216, + "step": 13800 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014060607381370744, + "loss": 0.0239, + "step": 13900 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014020622454954606, + "loss": 0.024, + "step": 14000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013980426527885237, + "loss": 0.0234, + "step": 14100 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013940021138653824, + "loss": 0.0228, + "step": 14200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013899407833768695, + "loss": 0.0228, + "step": 14300 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001385858816769611, + "loss": 0.0234, + "step": 14400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001381756370280075, + "loss": 0.0233, + "step": 14500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013776336009285937, + "loss": 0.0216, + "step": 14600 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013734906665133537, + "loss": 0.0268, + "step": 14700 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001369327725604354, + "loss": 0.022, + "step": 14800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013651449375373396, + "loss": 0.0214, + "step": 14900 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013609424624077, + "loss": 0.0235, + "step": 15000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001356720461064345, + "loss": 0.0225, + "step": 15100 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013524790951035441, + "loss": 0.0218, + "step": 15200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013482185268627465, + "loss": 0.0225, + "step": 15300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013439389194143625, + "loss": 0.0216, + "step": 15400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013396404365595253, + "loss": 0.0223, + "step": 15500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013353232428218212, + "loss": 0.0212, + "step": 15600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013309875034409903, + "loss": 0.023, + "step": 15700 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001326633384366605, + "loss": 0.0214, + "step": 15800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001322261052251716, + "loss": 0.021, + "step": 15900 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013178706744464749, + "loss": 0.0211, + "step": 16000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013134624189917282, + "loss": 0.0212, + "step": 16100 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013090808013598785, + "loss": 0.0252, + "step": 16200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013046374720137184, + "loss": 0.0214, + "step": 16300 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013001767715162726, + "loss": 0.0202, + "step": 16400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00012956988705999754, + "loss": 0.0205, + "step": 16500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012912039406556028, + "loss": 0.0235, + "step": 16600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012866921537257149, + "loss": 0.0215, + "step": 16700 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012821636824980682, + "loss": 0.0198, + "step": 16800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012776187002990082, + "loss": 0.0232, + "step": 16900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012730573810868347, + "loss": 0.0235, + "step": 17000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012684798994451428, + "loss": 0.0207, + "step": 17100 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012638864305761417, + "loss": 0.0218, + "step": 17200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012592771502939492, + "loss": 0.0204, + "step": 17300 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001254652235017861, + "loss": 0.0212, + "step": 17400 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012500118617655992, + "loss": 0.0207, + "step": 17500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012453562081465375, + "loss": 0.0207, + "step": 17600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001240685452354902, + "loss": 0.0204, + "step": 17700 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012359997731629517, + "loss": 0.0234, + "step": 17800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012312993499141364, + "loss": 0.02, + "step": 17900 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001226584362516231, + "loss": 0.0191, + "step": 18000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012218549914344497, + "loss": 0.0196, + "step": 18100 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012171114176845411, + "loss": 0.0203, + "step": 18200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001212353822825857, + "loss": 0.0224, + "step": 18300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012075823889544048, + "loss": 0.0211, + "step": 18400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012027972986958772, + "loss": 0.0188, + "step": 18500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001197998735198662, + "loss": 0.0214, + "step": 18600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011931868821268337, + "loss": 0.0201, + "step": 18700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011883619236531218, + "loss": 0.0206, + "step": 18800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011835240444518623, + "loss": 0.0206, + "step": 18900 + }, + { + "epoch": 0.37, + "learning_rate": 0.000117867342969193, + "loss": 0.0197, + "step": 19000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011738102650296509, + "loss": 0.0198, + "step": 19100 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011689347366016955, + "loss": 0.0211, + "step": 19200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001164047031017955, + "loss": 0.0204, + "step": 19300 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011591473353543992, + "loss": 0.0206, + "step": 19400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011542358371459156, + "loss": 0.0192, + "step": 19500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001149312724379132, + "loss": 0.0202, + "step": 19600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011443781854852212, + "loss": 0.0197, + "step": 19700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001139432409332688, + "loss": 0.021, + "step": 19800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011344755852201423, + "loss": 0.022, + "step": 19900 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011295079028690513, + "loss": 0.0196, + "step": 20000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011245295524164799, + "loss": 0.0195, + "step": 20100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011195407244078124, + "loss": 0.0197, + "step": 20200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011145916512264182, + "loss": 0.0227, + "step": 20300 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001109582541342795, + "loss": 0.0197, + "step": 20400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011045635259969838, + "loss": 0.0203, + "step": 20500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001099585132027323, + "loss": 0.0216, + "step": 20600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010945469766893083, + "loss": 0.0224, + "step": 20700 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010894994913723849, + "loss": 0.0193, + "step": 20800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010844428692680601, + "loss": 0.0203, + "step": 20900 + }, + { + "epoch": 0.41, + "learning_rate": 0.000107937730391755, + "loss": 0.02, + "step": 21000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010743029892043725, + "loss": 0.0197, + "step": 21100 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010692201193469241, + "loss": 0.0192, + "step": 21200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010641288888910492, + "loss": 0.0189, + "step": 21300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010590294927025919, + "loss": 0.02, + "step": 21400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010539221259599377, + "loss": 0.0182, + "step": 21500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010488069841465444, + "loss": 0.0191, + "step": 21600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010436842630434579, + "loss": 0.0188, + "step": 21700 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001038554158721821, + "loss": 0.023, + "step": 21800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010334168675353674, + "loss": 0.0206, + "step": 21900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010282725861129068, + "loss": 0.0187, + "step": 22000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010231215113507977, + "loss": 0.018, + "step": 22100 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010179638404054145, + "loss": 0.0202, + "step": 22200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010127997706855969, + "loss": 0.0199, + "step": 22300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010076294998450981, + "loss": 0.019, + "step": 22400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001002453225775017, + "loss": 0.0193, + "step": 22500 + }, + { + "epoch": 0.45, + "learning_rate": 9.972711465962257e-05, + "loss": 0.019, + "step": 22600 + }, + { + "epoch": 0.45, + "learning_rate": 9.920834606517847e-05, + "loss": 0.0195, + "step": 22700 + }, + { + "epoch": 0.45, + "learning_rate": 9.868903664993534e-05, + "loss": 0.018, + "step": 22800 + }, + { + "epoch": 0.45, + "learning_rate": 9.816920629035887e-05, + "loss": 0.0202, + "step": 22900 + }, + { + "epoch": 0.45, + "learning_rate": 9.764887488285381e-05, + "loss": 0.0185, + "step": 23000 + }, + { + "epoch": 0.45, + "learning_rate": 9.71280623430024e-05, + "loss": 0.0189, + "step": 23100 + }, + { + "epoch": 0.46, + "learning_rate": 9.660678860480215e-05, + "loss": 0.0186, + "step": 23200 + }, + { + "epoch": 0.46, + "learning_rate": 9.608507361990283e-05, + "loss": 0.0195, + "step": 23300 + }, + { + "epoch": 0.46, + "learning_rate": 9.556293735684288e-05, + "loss": 0.0206, + "step": 23400 + }, + { + "epoch": 0.46, + "learning_rate": 9.50456270965957e-05, + "loss": 0.0204, + "step": 23500 + }, + { + "epoch": 0.46, + "learning_rate": 9.452271196044817e-05, + "loss": 0.0189, + "step": 23600 + }, + { + "epoch": 0.47, + "learning_rate": 9.399943534522518e-05, + "loss": 0.0198, + "step": 23700 + }, + { + "epoch": 0.47, + "learning_rate": 9.347581727923617e-05, + "loss": 0.0197, + "step": 23800 + }, + { + "epoch": 0.47, + "learning_rate": 9.295187780385963e-05, + "loss": 0.0187, + "step": 23900 + }, + { + "epoch": 0.47, + "learning_rate": 9.242763697277588e-05, + "loss": 0.0201, + "step": 24000 + }, + { + "epoch": 0.47, + "learning_rate": 9.19031148511996e-05, + "loss": 0.0194, + "step": 24100 + }, + { + "epoch": 0.48, + "learning_rate": 9.137833151511182e-05, + "loss": 0.0186, + "step": 24200 + }, + { + "epoch": 0.48, + "learning_rate": 9.08533070504915e-05, + "loss": 0.0215, + "step": 24300 + }, + { + "epoch": 0.48, + "learning_rate": 9.032806155254666e-05, + "loss": 0.0205, + "step": 24400 + }, + { + "epoch": 0.48, + "learning_rate": 8.980261512494549e-05, + "loss": 0.0188, + "step": 24500 + }, + { + "epoch": 0.48, + "learning_rate": 8.927698787904661e-05, + "loss": 0.0188, + "step": 24600 + }, + { + "epoch": 0.49, + "learning_rate": 8.875119993312938e-05, + "loss": 0.0204, + "step": 24700 + }, + { + "epoch": 0.49, + "learning_rate": 8.822527141162407e-05, + "loss": 0.0191, + "step": 24800 + }, + { + "epoch": 0.49, + "learning_rate": 8.769922244434138e-05, + "loss": 0.017, + "step": 24900 + }, + { + "epoch": 0.49, + "learning_rate": 8.717307316570196e-05, + "loss": 0.0186, + "step": 25000 + }, + { + "epoch": 0.49, + "learning_rate": 8.664684371396603e-05, + "loss": 0.0195, + "step": 25100 + }, + { + "epoch": 0.5, + "learning_rate": 8.612055423046226e-05, + "loss": 0.0199, + "step": 25200 + }, + { + "epoch": 0.5, + "learning_rate": 8.559422485881711e-05, + "loss": 0.0191, + "step": 25300 + }, + { + "epoch": 0.5, + "learning_rate": 8.506787574418376e-05, + "loss": 0.0191, + "step": 25400 + }, + { + "epoch": 0.5, + "learning_rate": 8.4541527032471e-05, + "loss": 0.0196, + "step": 25500 + }, + { + "epoch": 0.5, + "learning_rate": 8.401519886957223e-05, + "loss": 0.0184, + "step": 25600 + }, + { + "epoch": 0.51, + "learning_rate": 8.348891140059429e-05, + "loss": 0.0182, + "step": 25700 + }, + { + "epoch": 0.51, + "learning_rate": 8.29626847690866e-05, + "loss": 0.0216, + "step": 25800 + }, + { + "epoch": 0.51, + "learning_rate": 8.243653911626992e-05, + "loss": 0.0177, + "step": 25900 + }, + { + "epoch": 0.51, + "learning_rate": 8.191575445898302e-05, + "loss": 0.019, + "step": 26000 + }, + { + "epoch": 0.51, + "learning_rate": 8.13898298618816e-05, + "loss": 0.0187, + "step": 26100 + }, + { + "epoch": 0.52, + "learning_rate": 8.086404644418334e-05, + "loss": 0.0189, + "step": 26200 + }, + { + "epoch": 0.52, + "learning_rate": 8.033842433014505e-05, + "loss": 0.0178, + "step": 26300 + }, + { + "epoch": 0.52, + "learning_rate": 7.981298363784983e-05, + "loss": 0.0182, + "step": 26400 + }, + { + "epoch": 0.52, + "learning_rate": 7.928774447843676e-05, + "loss": 0.0197, + "step": 26500 + }, + { + "epoch": 0.52, + "learning_rate": 7.876272695533133e-05, + "loss": 0.0182, + "step": 26600 + }, + { + "epoch": 0.53, + "learning_rate": 7.823795116347596e-05, + "loss": 0.0203, + "step": 26700 + }, + { + "epoch": 0.53, + "learning_rate": 7.771343718856085e-05, + "loss": 0.0193, + "step": 26800 + }, + { + "epoch": 0.53, + "learning_rate": 7.718920510625514e-05, + "loss": 0.0182, + "step": 26900 + }, + { + "epoch": 0.53, + "learning_rate": 7.666527498143874e-05, + "loss": 0.0202, + "step": 27000 + }, + { + "epoch": 0.53, + "learning_rate": 7.614166686743405e-05, + "loss": 0.019, + "step": 27100 + }, + { + "epoch": 0.54, + "learning_rate": 7.561840080523864e-05, + "loss": 0.0174, + "step": 27200 + }, + { + "epoch": 0.54, + "learning_rate": 7.509549682275809e-05, + "loss": 0.018, + "step": 27300 + }, + { + "epoch": 0.54, + "learning_rate": 7.457297493403944e-05, + "loss": 0.019, + "step": 27400 + }, + { + "epoch": 0.54, + "learning_rate": 7.405085513850521e-05, + "loss": 0.0205, + "step": 27500 + }, + { + "epoch": 0.54, + "learning_rate": 7.352915742018785e-05, + "loss": 0.0169, + "step": 27600 + }, + { + "epoch": 0.55, + "learning_rate": 7.30079017469649e-05, + "loss": 0.0183, + "step": 27700 + }, + { + "epoch": 0.55, + "learning_rate": 7.248710806979468e-05, + "loss": 0.0181, + "step": 27800 + }, + { + "epoch": 0.55, + "learning_rate": 7.196679632195274e-05, + "loss": 0.0171, + "step": 27900 + }, + { + "epoch": 0.55, + "learning_rate": 7.144698641826882e-05, + "loss": 0.0202, + "step": 28000 + }, + { + "epoch": 0.55, + "learning_rate": 7.092769825436483e-05, + "loss": 0.0184, + "step": 28100 + }, + { + "epoch": 0.56, + "learning_rate": 7.040895170589299e-05, + "loss": 0.0191, + "step": 28200 + }, + { + "epoch": 0.56, + "learning_rate": 6.989594563413708e-05, + "loss": 0.0206, + "step": 28300 + }, + { + "epoch": 0.56, + "learning_rate": 6.937833594866479e-05, + "loss": 0.0184, + "step": 28400 + }, + { + "epoch": 0.56, + "learning_rate": 6.886132718016212e-05, + "loss": 0.0213, + "step": 28500 + }, + { + "epoch": 0.56, + "learning_rate": 6.834493911703791e-05, + "loss": 0.0181, + "step": 28600 + }, + { + "epoch": 0.57, + "learning_rate": 6.78291915239438e-05, + "loss": 0.0205, + "step": 28700 + }, + { + "epoch": 0.57, + "learning_rate": 6.731410414101738e-05, + "loss": 0.0184, + "step": 28800 + }, + { + "epoch": 0.57, + "learning_rate": 6.679969668312693e-05, + "loss": 0.0179, + "step": 28900 + }, + { + "epoch": 0.57, + "learning_rate": 6.628598883911667e-05, + "loss": 0.0206, + "step": 29000 + }, + { + "epoch": 0.57, + "learning_rate": 6.577300027105329e-05, + "loss": 0.0172, + "step": 29100 + }, + { + "epoch": 0.58, + "learning_rate": 6.526075061347325e-05, + "loss": 0.0194, + "step": 29200 + }, + { + "epoch": 0.58, + "learning_rate": 6.474925947263152e-05, + "loss": 0.0195, + "step": 29300 + }, + { + "epoch": 0.58, + "learning_rate": 6.423854642575082e-05, + "loss": 0.0193, + "step": 29400 + }, + { + "epoch": 0.58, + "learning_rate": 6.372863102027257e-05, + "loss": 0.0182, + "step": 29500 + }, + { + "epoch": 0.58, + "learning_rate": 6.321953277310858e-05, + "loss": 0.0195, + "step": 29600 + }, + { + "epoch": 0.58, + "learning_rate": 6.27112711698941e-05, + "loss": 0.0174, + "step": 29700 + }, + { + "epoch": 0.59, + "learning_rate": 6.220386566424204e-05, + "loss": 0.0214, + "step": 29800 + }, + { + "epoch": 0.59, + "learning_rate": 6.169733567699826e-05, + "loss": 0.0176, + "step": 29900 + }, + { + "epoch": 0.59, + "learning_rate": 6.11917005954984e-05, + "loss": 0.017, + "step": 30000 + } + ], + "logging_steps": 100, + "max_steps": 50780, + "num_train_epochs": 1, + "save_steps": 10000, + "total_flos": 1.1978512091136e+19, + "trial_name": null, + "trial_params": null +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/training_args.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..100b6c56d3e6f8a91a89702971e1e66650fb8075 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d17a2c3c8a65881aa45bfd2997dfe652a61cb20d8b49594dc95bff2d7669f6f +size 4155 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/vocab.txt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b946952cc35537226f07fd70957ee2f848880d2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-30000/vocab.txt @@ -0,0 +1,33 @@ + + + + +L +A +G +V +S +E +R +T +I +D +P +K +Q +N +F +Y +M +H +W +C +X +B +U +Z +O +. +- + + \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/README.md b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/adapter_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5f407fbf6673901d6bbc4be088ec92bf34bb74 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "facebook/esm2_t33_650M_UR50D", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 1, + "lora_dropout": 0.5, + "modules_to_save": null, + "peft_type": "LORA", + "r": 2, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query", + "key", + "value", + "EsmSelfOutput.dense", + "EsmIntermediate.dense", + "EsmOutput.dense", + "classifier" + ], + "task_type": "TOKEN_CLS" +} \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/adapter_model.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9d63d59d8f3d638bcf30be0605710a62c582010 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe0773ad58d9e904a3858633796c2742ac71afda0da63ebf6d4a178d951ce4fb +size 2133117 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/added_tokens.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..6527dabc1b083db2af29d5fad6902c18bf831b21 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 0, + "": 2, + "": 32, + "": 1, + "": 3 +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/optimizer.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..79ea894e7a3812ae439d24fae8ba353cc4062f1f --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369bf2563f08a76616fc3448106c6dfc038cb3a2fe6f80f47ede0bf272cd2095 +size 4215045 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/rng_state.pth b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..73f3a16d53a17f8b3e0f4681fb9ec7252e66788f --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac84843979a304380eaef982db4fd0aa01fb1a7dafd3cdf6688e2c300c6b3f23 +size 14575 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/scheduler.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c05ac6acaa1ce16e11f8a0069576a8ea78cb318 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2963dcf1c9d9b00006d2790ed287a1d12d3120a01ec704c5500b55543bb05b48 +size 627 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/special_tokens_map.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ba0f9b53dbbf27934f7555e5d31e37bdea9317f1 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/tokenizer_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed0725465a1b7a8e4469cd162fbea147da56a1c2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "EsmTokenizer", + "tokenizer_file": null, + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/trainer_state.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8cf6ed5b2037359905f6b373f73f2ba2d4782acb --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/trainer_state.json @@ -0,0 +1,2419 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7877116975187082, + "eval_steps": 500, + "global_step": 40000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00017015520980041642, + "loss": 0.5575, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017015035788629153, + "loss": 0.3065, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017014224985982174, + "loss": 0.2229, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001701308860313402, + "loss": 0.1812, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017011626683579524, + "loss": 0.1572, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017009839283273364, + "loss": 0.1335, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017007726470627936, + "loss": 0.1193, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 0.00017005288326510734, + "loss": 0.1075, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001700252494424124, + "loss": 0.0986, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016999436429587366, + "loss": 0.0952, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016996022900761407, + "loss": 0.0865, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 0.000169922844884155, + "loss": 0.0816, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016988221335636648, + "loss": 0.0775, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016983833597941224, + "loss": 0.0744, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016979121443269025, + "loss": 0.0694, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001697408505197684, + "loss": 0.0641, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016968724616831557, + "loss": 0.0629, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001696304034300278, + "loss": 0.0696, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016957032448054968, + "loss": 0.0591, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001695070116193912, + "loss": 0.06, + "step": 2000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001694404672698396, + "loss": 0.0558, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016937069397886687, + "loss": 0.0529, + "step": 2200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016929769441703196, + "loss": 0.0536, + "step": 2300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016922147137837868, + "loss": 0.0537, + "step": 2400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016914202778032893, + "loss": 0.0508, + "step": 2500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001690593666635707, + "loss": 0.0524, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016897349119194207, + "loss": 0.0494, + "step": 2700 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016888440465230977, + "loss": 0.047, + "step": 2800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016879211045444354, + "loss": 0.0502, + "step": 2900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016869661213088575, + "loss": 0.046, + "step": 3000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016859791333681583, + "loss": 0.0419, + "step": 3100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001684960178499108, + "loss": 0.0456, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016839092957020028, + "loss": 0.0432, + "step": 3300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016828265251991761, + "loss": 0.0423, + "step": 3400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016817119084334555, + "loss": 0.0407, + "step": 3500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016805654880665776, + "loss": 0.0409, + "step": 3600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016793873079775577, + "loss": 0.0397, + "step": 3700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016781774132610059, + "loss": 0.0419, + "step": 3800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001676935850225405, + "loss": 0.0399, + "step": 3900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016756626663913358, + "loss": 0.0389, + "step": 4000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016743579104896593, + "loss": 0.0385, + "step": 4100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016730216324596504, + "loss": 0.0357, + "step": 4200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001671653883447088, + "loss": 0.0373, + "step": 4300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016702547158022968, + "loss": 0.0375, + "step": 4400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001668824183078143, + "loss": 0.0393, + "step": 4500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016673623400279849, + "loss": 0.0358, + "step": 4600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016658692426035782, + "loss": 0.0334, + "step": 4700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016643449479529325, + "loss": 0.035, + "step": 4800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016627895144181258, + "loss": 0.0343, + "step": 4900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001661203001533071, + "loss": 0.0335, + "step": 5000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016595854700212362, + "loss": 0.0353, + "step": 5100 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001657936981793322, + "loss": 0.0331, + "step": 5200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001656257599944891, + "loss": 0.0387, + "step": 5300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016545473887539532, + "loss": 0.0327, + "step": 5400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016528064136785056, + "loss": 0.0316, + "step": 5500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016510347413540262, + "loss": 0.0319, + "step": 5600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001649232439590925, + "loss": 0.0314, + "step": 5700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001647399577371947, + "loss": 0.0321, + "step": 5800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016455362248495338, + "loss": 0.0333, + "step": 5900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016436424533431362, + "loss": 0.0319, + "step": 6000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001641718335336486, + "loss": 0.0315, + "step": 6100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001639763944474821, + "loss": 0.0311, + "step": 6200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001637779355562068, + "loss": 0.031, + "step": 6300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016357646445579763, + "loss": 0.0299, + "step": 6400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016337198885752133, + "loss": 0.0317, + "step": 6500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016316451658764122, + "loss": 0.0302, + "step": 6600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001629540555871176, + "loss": 0.0295, + "step": 6700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016274061391130388, + "loss": 0.03, + "step": 6800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001625241997296382, + "loss": 0.0292, + "step": 6900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016230482132533077, + "loss": 0.0289, + "step": 7000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016208472504084003, + "loss": 0.0318, + "step": 7100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001618594729250462, + "loss": 0.0301, + "step": 7200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016163128202889828, + "loss": 0.0295, + "step": 7300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016140016108635798, + "loss": 0.029, + "step": 7400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016116611894353386, + "loss": 0.0291, + "step": 7500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016092916455834295, + "loss": 0.0311, + "step": 7600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016068930700016766, + "loss": 0.0285, + "step": 7700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016044655544950889, + "loss": 0.0287, + "step": 7800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00016020091919763445, + "loss": 0.0293, + "step": 7900 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015995240764622357, + "loss": 0.0259, + "step": 8000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000159701030307007, + "loss": 0.0293, + "step": 8100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015944679680140295, + "loss": 0.0277, + "step": 8200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001591897168601488, + "loss": 0.0304, + "step": 8300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015892980032292876, + "loss": 0.026, + "step": 8400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015866705713799714, + "loss": 0.0294, + "step": 8500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015840149736179762, + "loss": 0.0321, + "step": 8600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001581331311585785, + "loss": 0.0279, + "step": 8700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015786196880000325, + "loss": 0.0277, + "step": 8800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001575880206647579, + "loss": 0.0268, + "step": 8900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015731129723815343, + "loss": 0.0281, + "step": 9000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015703180911172453, + "loss": 0.028, + "step": 9100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001567495669828243, + "loss": 0.026, + "step": 9200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001564645816542146, + "loss": 0.0256, + "step": 9300 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001561768640336529, + "loss": 0.027, + "step": 9400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001558864251334745, + "loss": 0.0249, + "step": 9500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015559327607017119, + "loss": 0.0256, + "step": 9600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015529742806396564, + "loss": 0.0251, + "step": 9700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015499889243838211, + "loss": 0.0257, + "step": 9800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015469768061981295, + "loss": 0.0264, + "step": 9900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015439685605389073, + "loss": 0.0247, + "step": 10000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015409035301023626, + "loss": 0.0256, + "step": 10100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015378120854776404, + "loss": 0.0247, + "step": 10200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015346943449891755, + "loss": 0.0254, + "step": 10300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015315504279678714, + "loss": 0.0246, + "step": 10400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015283804547465337, + "loss": 0.0249, + "step": 10500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001525184546655264, + "loss": 0.026, + "step": 10600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001521962826016816, + "loss": 0.0254, + "step": 10700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015187154161419122, + "loss": 0.0272, + "step": 10800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001515442441324528, + "loss": 0.0253, + "step": 10900 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015121440268371297, + "loss": 0.0263, + "step": 11000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015088202989258835, + "loss": 0.0256, + "step": 11100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015054713848058212, + "loss": 0.023, + "step": 11200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015020974126559716, + "loss": 0.0236, + "step": 11300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00014986985116144554, + "loss": 0.0241, + "step": 11400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014952748117735409, + "loss": 0.0235, + "step": 11500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001491826444174666, + "loss": 0.0242, + "step": 11600 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014883535408034227, + "loss": 0.0233, + "step": 11700 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014848562345845032, + "loss": 0.0228, + "step": 11800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014813346593766164, + "loss": 0.023, + "step": 11900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001477788949967359, + "loss": 0.0236, + "step": 12000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014742192420680626, + "loss": 0.0237, + "step": 12100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014706256723085937, + "loss": 0.0237, + "step": 12200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001467008378232128, + "loss": 0.024, + "step": 12300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014633674982898854, + "loss": 0.0227, + "step": 12400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014597031718358285, + "loss": 0.0247, + "step": 12500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014560155391213318, + "loss": 0.0233, + "step": 12600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001452304741289812, + "loss": 0.0234, + "step": 12700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014485709203713263, + "loss": 0.0221, + "step": 12800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014448142192771354, + "loss": 0.0244, + "step": 12900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014410347817942347, + "loss": 0.0242, + "step": 13000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014372327525798503, + "loss": 0.0231, + "step": 13100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014334082771559026, + "loss": 0.0238, + "step": 13200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014295615019034358, + "loss": 0.0233, + "step": 13300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001425692574057016, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014218016416990954, + "loss": 0.0249, + "step": 13500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001417928089327897, + "loss": 0.0265, + "step": 13600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014139938118717103, + "loss": 0.0206, + "step": 13700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014100379776718443, + "loss": 0.0216, + "step": 13800 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014060607381370744, + "loss": 0.0239, + "step": 13900 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014020622454954606, + "loss": 0.024, + "step": 14000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013980426527885237, + "loss": 0.0234, + "step": 14100 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013940021138653824, + "loss": 0.0228, + "step": 14200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013899407833768695, + "loss": 0.0228, + "step": 14300 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001385858816769611, + "loss": 0.0234, + "step": 14400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001381756370280075, + "loss": 0.0233, + "step": 14500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013776336009285937, + "loss": 0.0216, + "step": 14600 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013734906665133537, + "loss": 0.0268, + "step": 14700 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001369327725604354, + "loss": 0.022, + "step": 14800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013651449375373396, + "loss": 0.0214, + "step": 14900 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013609424624077, + "loss": 0.0235, + "step": 15000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001356720461064345, + "loss": 0.0225, + "step": 15100 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013524790951035441, + "loss": 0.0218, + "step": 15200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013482185268627465, + "loss": 0.0225, + "step": 15300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013439389194143625, + "loss": 0.0216, + "step": 15400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013396404365595253, + "loss": 0.0223, + "step": 15500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013353232428218212, + "loss": 0.0212, + "step": 15600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013309875034409903, + "loss": 0.023, + "step": 15700 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001326633384366605, + "loss": 0.0214, + "step": 15800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001322261052251716, + "loss": 0.021, + "step": 15900 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013178706744464749, + "loss": 0.0211, + "step": 16000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013134624189917282, + "loss": 0.0212, + "step": 16100 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013090808013598785, + "loss": 0.0252, + "step": 16200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013046374720137184, + "loss": 0.0214, + "step": 16300 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013001767715162726, + "loss": 0.0202, + "step": 16400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00012956988705999754, + "loss": 0.0205, + "step": 16500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012912039406556028, + "loss": 0.0235, + "step": 16600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012866921537257149, + "loss": 0.0215, + "step": 16700 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012821636824980682, + "loss": 0.0198, + "step": 16800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012776187002990082, + "loss": 0.0232, + "step": 16900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012730573810868347, + "loss": 0.0235, + "step": 17000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012684798994451428, + "loss": 0.0207, + "step": 17100 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012638864305761417, + "loss": 0.0218, + "step": 17200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012592771502939492, + "loss": 0.0204, + "step": 17300 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001254652235017861, + "loss": 0.0212, + "step": 17400 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012500118617655992, + "loss": 0.0207, + "step": 17500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012453562081465375, + "loss": 0.0207, + "step": 17600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001240685452354902, + "loss": 0.0204, + "step": 17700 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012359997731629517, + "loss": 0.0234, + "step": 17800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012312993499141364, + "loss": 0.02, + "step": 17900 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001226584362516231, + "loss": 0.0191, + "step": 18000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012218549914344497, + "loss": 0.0196, + "step": 18100 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012171114176845411, + "loss": 0.0203, + "step": 18200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001212353822825857, + "loss": 0.0224, + "step": 18300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012075823889544048, + "loss": 0.0211, + "step": 18400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012027972986958772, + "loss": 0.0188, + "step": 18500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001197998735198662, + "loss": 0.0214, + "step": 18600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011931868821268337, + "loss": 0.0201, + "step": 18700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011883619236531218, + "loss": 0.0206, + "step": 18800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011835240444518623, + "loss": 0.0206, + "step": 18900 + }, + { + "epoch": 0.37, + "learning_rate": 0.000117867342969193, + "loss": 0.0197, + "step": 19000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011738102650296509, + "loss": 0.0198, + "step": 19100 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011689347366016955, + "loss": 0.0211, + "step": 19200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001164047031017955, + "loss": 0.0204, + "step": 19300 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011591473353543992, + "loss": 0.0206, + "step": 19400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011542358371459156, + "loss": 0.0192, + "step": 19500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001149312724379132, + "loss": 0.0202, + "step": 19600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011443781854852212, + "loss": 0.0197, + "step": 19700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001139432409332688, + "loss": 0.021, + "step": 19800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011344755852201423, + "loss": 0.022, + "step": 19900 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011295079028690513, + "loss": 0.0196, + "step": 20000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011245295524164799, + "loss": 0.0195, + "step": 20100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011195407244078124, + "loss": 0.0197, + "step": 20200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011145916512264182, + "loss": 0.0227, + "step": 20300 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001109582541342795, + "loss": 0.0197, + "step": 20400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011045635259969838, + "loss": 0.0203, + "step": 20500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001099585132027323, + "loss": 0.0216, + "step": 20600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010945469766893083, + "loss": 0.0224, + "step": 20700 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010894994913723849, + "loss": 0.0193, + "step": 20800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010844428692680601, + "loss": 0.0203, + "step": 20900 + }, + { + "epoch": 0.41, + "learning_rate": 0.000107937730391755, + "loss": 0.02, + "step": 21000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010743029892043725, + "loss": 0.0197, + "step": 21100 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010692201193469241, + "loss": 0.0192, + "step": 21200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010641288888910492, + "loss": 0.0189, + "step": 21300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010590294927025919, + "loss": 0.02, + "step": 21400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010539221259599377, + "loss": 0.0182, + "step": 21500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010488069841465444, + "loss": 0.0191, + "step": 21600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010436842630434579, + "loss": 0.0188, + "step": 21700 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001038554158721821, + "loss": 0.023, + "step": 21800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010334168675353674, + "loss": 0.0206, + "step": 21900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010282725861129068, + "loss": 0.0187, + "step": 22000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010231215113507977, + "loss": 0.018, + "step": 22100 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010179638404054145, + "loss": 0.0202, + "step": 22200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010127997706855969, + "loss": 0.0199, + "step": 22300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010076294998450981, + "loss": 0.019, + "step": 22400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001002453225775017, + "loss": 0.0193, + "step": 22500 + }, + { + "epoch": 0.45, + "learning_rate": 9.972711465962257e-05, + "loss": 0.019, + "step": 22600 + }, + { + "epoch": 0.45, + "learning_rate": 9.920834606517847e-05, + "loss": 0.0195, + "step": 22700 + }, + { + "epoch": 0.45, + "learning_rate": 9.868903664993534e-05, + "loss": 0.018, + "step": 22800 + }, + { + "epoch": 0.45, + "learning_rate": 9.816920629035887e-05, + "loss": 0.0202, + "step": 22900 + }, + { + "epoch": 0.45, + "learning_rate": 9.764887488285381e-05, + "loss": 0.0185, + "step": 23000 + }, + { + "epoch": 0.45, + "learning_rate": 9.71280623430024e-05, + "loss": 0.0189, + "step": 23100 + }, + { + "epoch": 0.46, + "learning_rate": 9.660678860480215e-05, + "loss": 0.0186, + "step": 23200 + }, + { + "epoch": 0.46, + "learning_rate": 9.608507361990283e-05, + "loss": 0.0195, + "step": 23300 + }, + { + "epoch": 0.46, + "learning_rate": 9.556293735684288e-05, + "loss": 0.0206, + "step": 23400 + }, + { + "epoch": 0.46, + "learning_rate": 9.50456270965957e-05, + "loss": 0.0204, + "step": 23500 + }, + { + "epoch": 0.46, + "learning_rate": 9.452271196044817e-05, + "loss": 0.0189, + "step": 23600 + }, + { + "epoch": 0.47, + "learning_rate": 9.399943534522518e-05, + "loss": 0.0198, + "step": 23700 + }, + { + "epoch": 0.47, + "learning_rate": 9.347581727923617e-05, + "loss": 0.0197, + "step": 23800 + }, + { + "epoch": 0.47, + "learning_rate": 9.295187780385963e-05, + "loss": 0.0187, + "step": 23900 + }, + { + "epoch": 0.47, + "learning_rate": 9.242763697277588e-05, + "loss": 0.0201, + "step": 24000 + }, + { + "epoch": 0.47, + "learning_rate": 9.19031148511996e-05, + "loss": 0.0194, + "step": 24100 + }, + { + "epoch": 0.48, + "learning_rate": 9.137833151511182e-05, + "loss": 0.0186, + "step": 24200 + }, + { + "epoch": 0.48, + "learning_rate": 9.08533070504915e-05, + "loss": 0.0215, + "step": 24300 + }, + { + "epoch": 0.48, + "learning_rate": 9.032806155254666e-05, + "loss": 0.0205, + "step": 24400 + }, + { + "epoch": 0.48, + "learning_rate": 8.980261512494549e-05, + "loss": 0.0188, + "step": 24500 + }, + { + "epoch": 0.48, + "learning_rate": 8.927698787904661e-05, + "loss": 0.0188, + "step": 24600 + }, + { + "epoch": 0.49, + "learning_rate": 8.875119993312938e-05, + "loss": 0.0204, + "step": 24700 + }, + { + "epoch": 0.49, + "learning_rate": 8.822527141162407e-05, + "loss": 0.0191, + "step": 24800 + }, + { + "epoch": 0.49, + "learning_rate": 8.769922244434138e-05, + "loss": 0.017, + "step": 24900 + }, + { + "epoch": 0.49, + "learning_rate": 8.717307316570196e-05, + "loss": 0.0186, + "step": 25000 + }, + { + "epoch": 0.49, + "learning_rate": 8.664684371396603e-05, + "loss": 0.0195, + "step": 25100 + }, + { + "epoch": 0.5, + "learning_rate": 8.612055423046226e-05, + "loss": 0.0199, + "step": 25200 + }, + { + "epoch": 0.5, + "learning_rate": 8.559422485881711e-05, + "loss": 0.0191, + "step": 25300 + }, + { + "epoch": 0.5, + "learning_rate": 8.506787574418376e-05, + "loss": 0.0191, + "step": 25400 + }, + { + "epoch": 0.5, + "learning_rate": 8.4541527032471e-05, + "loss": 0.0196, + "step": 25500 + }, + { + "epoch": 0.5, + "learning_rate": 8.401519886957223e-05, + "loss": 0.0184, + "step": 25600 + }, + { + "epoch": 0.51, + "learning_rate": 8.348891140059429e-05, + "loss": 0.0182, + "step": 25700 + }, + { + "epoch": 0.51, + "learning_rate": 8.29626847690866e-05, + "loss": 0.0216, + "step": 25800 + }, + { + "epoch": 0.51, + "learning_rate": 8.243653911626992e-05, + "loss": 0.0177, + "step": 25900 + }, + { + "epoch": 0.51, + "learning_rate": 8.191575445898302e-05, + "loss": 0.019, + "step": 26000 + }, + { + "epoch": 0.51, + "learning_rate": 8.13898298618816e-05, + "loss": 0.0187, + "step": 26100 + }, + { + "epoch": 0.52, + "learning_rate": 8.086404644418334e-05, + "loss": 0.0189, + "step": 26200 + }, + { + "epoch": 0.52, + "learning_rate": 8.033842433014505e-05, + "loss": 0.0178, + "step": 26300 + }, + { + "epoch": 0.52, + "learning_rate": 7.981298363784983e-05, + "loss": 0.0182, + "step": 26400 + }, + { + "epoch": 0.52, + "learning_rate": 7.928774447843676e-05, + "loss": 0.0197, + "step": 26500 + }, + { + "epoch": 0.52, + "learning_rate": 7.876272695533133e-05, + "loss": 0.0182, + "step": 26600 + }, + { + "epoch": 0.53, + "learning_rate": 7.823795116347596e-05, + "loss": 0.0203, + "step": 26700 + }, + { + "epoch": 0.53, + "learning_rate": 7.771343718856085e-05, + "loss": 0.0193, + "step": 26800 + }, + { + "epoch": 0.53, + "learning_rate": 7.718920510625514e-05, + "loss": 0.0182, + "step": 26900 + }, + { + "epoch": 0.53, + "learning_rate": 7.666527498143874e-05, + "loss": 0.0202, + "step": 27000 + }, + { + "epoch": 0.53, + "learning_rate": 7.614166686743405e-05, + "loss": 0.019, + "step": 27100 + }, + { + "epoch": 0.54, + "learning_rate": 7.561840080523864e-05, + "loss": 0.0174, + "step": 27200 + }, + { + "epoch": 0.54, + "learning_rate": 7.509549682275809e-05, + "loss": 0.018, + "step": 27300 + }, + { + "epoch": 0.54, + "learning_rate": 7.457297493403944e-05, + "loss": 0.019, + "step": 27400 + }, + { + "epoch": 0.54, + "learning_rate": 7.405085513850521e-05, + "loss": 0.0205, + "step": 27500 + }, + { + "epoch": 0.54, + "learning_rate": 7.352915742018785e-05, + "loss": 0.0169, + "step": 27600 + }, + { + "epoch": 0.55, + "learning_rate": 7.30079017469649e-05, + "loss": 0.0183, + "step": 27700 + }, + { + "epoch": 0.55, + "learning_rate": 7.248710806979468e-05, + "loss": 0.0181, + "step": 27800 + }, + { + "epoch": 0.55, + "learning_rate": 7.196679632195274e-05, + "loss": 0.0171, + "step": 27900 + }, + { + "epoch": 0.55, + "learning_rate": 7.144698641826882e-05, + "loss": 0.0202, + "step": 28000 + }, + { + "epoch": 0.55, + "learning_rate": 7.092769825436483e-05, + "loss": 0.0184, + "step": 28100 + }, + { + "epoch": 0.56, + "learning_rate": 7.040895170589299e-05, + "loss": 0.0191, + "step": 28200 + }, + { + "epoch": 0.56, + "learning_rate": 6.989594563413708e-05, + "loss": 0.0206, + "step": 28300 + }, + { + "epoch": 0.56, + "learning_rate": 6.937833594866479e-05, + "loss": 0.0184, + "step": 28400 + }, + { + "epoch": 0.56, + "learning_rate": 6.886132718016212e-05, + "loss": 0.0213, + "step": 28500 + }, + { + "epoch": 0.56, + "learning_rate": 6.834493911703791e-05, + "loss": 0.0181, + "step": 28600 + }, + { + "epoch": 0.57, + "learning_rate": 6.78291915239438e-05, + "loss": 0.0205, + "step": 28700 + }, + { + "epoch": 0.57, + "learning_rate": 6.731410414101738e-05, + "loss": 0.0184, + "step": 28800 + }, + { + "epoch": 0.57, + "learning_rate": 6.679969668312693e-05, + "loss": 0.0179, + "step": 28900 + }, + { + "epoch": 0.57, + "learning_rate": 6.628598883911667e-05, + "loss": 0.0206, + "step": 29000 + }, + { + "epoch": 0.57, + "learning_rate": 6.577300027105329e-05, + "loss": 0.0172, + "step": 29100 + }, + { + "epoch": 0.58, + "learning_rate": 6.526075061347325e-05, + "loss": 0.0194, + "step": 29200 + }, + { + "epoch": 0.58, + "learning_rate": 6.474925947263152e-05, + "loss": 0.0195, + "step": 29300 + }, + { + "epoch": 0.58, + "learning_rate": 6.423854642575082e-05, + "loss": 0.0193, + "step": 29400 + }, + { + "epoch": 0.58, + "learning_rate": 6.372863102027257e-05, + "loss": 0.0182, + "step": 29500 + }, + { + "epoch": 0.58, + "learning_rate": 6.321953277310858e-05, + "loss": 0.0195, + "step": 29600 + }, + { + "epoch": 0.58, + "learning_rate": 6.27112711698941e-05, + "loss": 0.0174, + "step": 29700 + }, + { + "epoch": 0.59, + "learning_rate": 6.220386566424204e-05, + "loss": 0.0214, + "step": 29800 + }, + { + "epoch": 0.59, + "learning_rate": 6.169733567699826e-05, + "loss": 0.0176, + "step": 29900 + }, + { + "epoch": 0.59, + "learning_rate": 6.11917005954984e-05, + "loss": 0.017, + "step": 30000 + }, + { + "epoch": 0.59, + "learning_rate": 6.068697977282572e-05, + "loss": 0.0185, + "step": 30100 + }, + { + "epoch": 0.59, + "learning_rate": 6.018319252707041e-05, + "loss": 0.0175, + "step": 30200 + }, + { + "epoch": 0.6, + "learning_rate": 5.968035814059016e-05, + "loss": 0.0166, + "step": 30300 + }, + { + "epoch": 0.6, + "learning_rate": 5.9178495859272244e-05, + "loss": 0.0167, + "step": 30400 + }, + { + "epoch": 0.6, + "learning_rate": 5.8677624891796735e-05, + "loss": 0.018, + "step": 30500 + }, + { + "epoch": 0.6, + "learning_rate": 5.8177764408901366e-05, + "loss": 0.0182, + "step": 30600 + }, + { + "epoch": 0.6, + "learning_rate": 5.767893354264776e-05, + "loss": 0.019, + "step": 30700 + }, + { + "epoch": 0.61, + "learning_rate": 5.7186123953555236e-05, + "loss": 0.0198, + "step": 30800 + }, + { + "epoch": 0.61, + "learning_rate": 5.668939878661137e-05, + "loss": 0.0177, + "step": 30900 + }, + { + "epoch": 0.61, + "learning_rate": 5.619376020321073e-05, + "loss": 0.0165, + "step": 31000 + }, + { + "epoch": 0.61, + "learning_rate": 5.5699227173822495e-05, + "loss": 0.0205, + "step": 31100 + }, + { + "epoch": 0.61, + "learning_rate": 5.5205818626601115e-05, + "loss": 0.0181, + "step": 31200 + }, + { + "epoch": 0.62, + "learning_rate": 5.4713553446661644e-05, + "loss": 0.0187, + "step": 31300 + }, + { + "epoch": 0.62, + "learning_rate": 5.4222450475357026e-05, + "loss": 0.0169, + "step": 31400 + }, + { + "epoch": 0.62, + "learning_rate": 5.373252850955681e-05, + "loss": 0.0174, + "step": 31500 + }, + { + "epoch": 0.62, + "learning_rate": 5.3243806300927926e-05, + "loss": 0.0179, + "step": 31600 + }, + { + "epoch": 0.62, + "learning_rate": 5.2756302555216735e-05, + "loss": 0.0175, + "step": 31700 + }, + { + "epoch": 0.63, + "learning_rate": 5.2270035931533207e-05, + "loss": 0.0199, + "step": 31800 + }, + { + "epoch": 0.63, + "learning_rate": 5.1785025041636665e-05, + "loss": 0.0184, + "step": 31900 + }, + { + "epoch": 0.63, + "learning_rate": 5.130128844922355e-05, + "loss": 0.0185, + "step": 32000 + }, + { + "epoch": 0.63, + "learning_rate": 5.081884466921669e-05, + "loss": 0.0174, + "step": 32100 + }, + { + "epoch": 0.63, + "learning_rate": 5.0337712167056944e-05, + "loss": 0.0201, + "step": 32200 + }, + { + "epoch": 0.64, + "learning_rate": 4.985790935799614e-05, + "loss": 0.0178, + "step": 32300 + }, + { + "epoch": 0.64, + "learning_rate": 4.937945460639236e-05, + "loss": 0.0176, + "step": 32400 + }, + { + "epoch": 0.64, + "learning_rate": 4.890236622500717e-05, + "loss": 0.0239, + "step": 32500 + }, + { + "epoch": 0.64, + "learning_rate": 4.8426662474304465e-05, + "loss": 0.0177, + "step": 32600 + }, + { + "epoch": 0.64, + "learning_rate": 4.795236156175173e-05, + "loss": 0.0182, + "step": 32700 + }, + { + "epoch": 0.65, + "learning_rate": 4.7479481641123066e-05, + "loss": 0.016, + "step": 32800 + }, + { + "epoch": 0.65, + "learning_rate": 4.7008040811804424e-05, + "loss": 0.0175, + "step": 32900 + }, + { + "epoch": 0.65, + "learning_rate": 4.653805711810078e-05, + "loss": 0.0173, + "step": 33000 + }, + { + "epoch": 0.65, + "learning_rate": 4.6069548548545565e-05, + "loss": 0.0179, + "step": 33100 + }, + { + "epoch": 0.65, + "learning_rate": 4.560253303521206e-05, + "loss": 0.0186, + "step": 33200 + }, + { + "epoch": 0.66, + "learning_rate": 4.513702845302723e-05, + "loss": 0.019, + "step": 33300 + }, + { + "epoch": 0.66, + "learning_rate": 4.467305261908736e-05, + "loss": 0.0164, + "step": 33400 + }, + { + "epoch": 0.66, + "learning_rate": 4.42106232919762e-05, + "loss": 0.019, + "step": 33500 + }, + { + "epoch": 0.66, + "learning_rate": 4.374975817108527e-05, + "loss": 0.0174, + "step": 33600 + }, + { + "epoch": 0.66, + "learning_rate": 4.3295059840780387e-05, + "loss": 0.0187, + "step": 33700 + }, + { + "epoch": 0.67, + "learning_rate": 4.283735990928788e-05, + "loss": 0.019, + "step": 33800 + }, + { + "epoch": 0.67, + "learning_rate": 4.2381276745401456e-05, + "loss": 0.0175, + "step": 33900 + }, + { + "epoch": 0.67, + "learning_rate": 4.192682780561444e-05, + "loss": 0.0186, + "step": 34000 + }, + { + "epoch": 0.67, + "learning_rate": 4.147403048387059e-05, + "loss": 0.02, + "step": 34100 + }, + { + "epoch": 0.67, + "learning_rate": 4.1022902110898326e-05, + "loss": 0.0169, + "step": 34200 + }, + { + "epoch": 0.68, + "learning_rate": 4.057345995354738e-05, + "loss": 0.0176, + "step": 34300 + }, + { + "epoch": 0.68, + "learning_rate": 4.0125721214127854e-05, + "loss": 0.0167, + "step": 34400 + }, + { + "epoch": 0.68, + "learning_rate": 3.9679703029752e-05, + "loss": 0.0179, + "step": 34500 + }, + { + "epoch": 0.68, + "learning_rate": 3.923542247167802e-05, + "loss": 0.0162, + "step": 34600 + }, + { + "epoch": 0.68, + "learning_rate": 3.879289654465689e-05, + "loss": 0.0194, + "step": 34700 + }, + { + "epoch": 0.69, + "learning_rate": 3.835214218628141e-05, + "loss": 0.0187, + "step": 34800 + }, + { + "epoch": 0.69, + "learning_rate": 3.7913176266337885e-05, + "loss": 0.0165, + "step": 34900 + }, + { + "epoch": 0.69, + "learning_rate": 3.747601558616062e-05, + "loss": 0.0162, + "step": 35000 + }, + { + "epoch": 0.69, + "learning_rate": 3.70406768779886e-05, + "loss": 0.0169, + "step": 35100 + }, + { + "epoch": 0.69, + "learning_rate": 3.660717680432526e-05, + "loss": 0.0173, + "step": 35200 + }, + { + "epoch": 0.7, + "learning_rate": 3.61755319573006e-05, + "loss": 0.016, + "step": 35300 + }, + { + "epoch": 0.7, + "learning_rate": 3.5745758858036255e-05, + "loss": 0.0188, + "step": 35400 + }, + { + "epoch": 0.7, + "learning_rate": 3.5317873956012995e-05, + "loss": 0.0198, + "step": 35500 + }, + { + "epoch": 0.7, + "learning_rate": 3.4891893628441334e-05, + "loss": 0.0171, + "step": 35600 + }, + { + "epoch": 0.7, + "learning_rate": 3.446783417963448e-05, + "loss": 0.0179, + "step": 35700 + }, + { + "epoch": 0.71, + "learning_rate": 3.4045711840384404e-05, + "loss": 0.0158, + "step": 35800 + }, + { + "epoch": 0.71, + "learning_rate": 3.36255427673406e-05, + "loss": 0.0171, + "step": 35900 + }, + { + "epoch": 0.71, + "learning_rate": 3.320734304239168e-05, + "loss": 0.0162, + "step": 36000 + }, + { + "epoch": 0.71, + "learning_rate": 3.279112867204983e-05, + "loss": 0.0172, + "step": 36100 + }, + { + "epoch": 0.71, + "learning_rate": 3.2376915586838195e-05, + "loss": 0.0166, + "step": 36200 + }, + { + "epoch": 0.71, + "learning_rate": 3.196471964068113e-05, + "loss": 0.0171, + "step": 36300 + }, + { + "epoch": 0.72, + "learning_rate": 3.1554556610297393e-05, + "loss": 0.019, + "step": 36400 + }, + { + "epoch": 0.72, + "learning_rate": 3.1146442194596243e-05, + "loss": 0.0167, + "step": 36500 + }, + { + "epoch": 0.72, + "learning_rate": 3.074039201407669e-05, + "loss": 0.0172, + "step": 36600 + }, + { + "epoch": 0.72, + "learning_rate": 3.0336421610229454e-05, + "loss": 0.0182, + "step": 36700 + }, + { + "epoch": 0.72, + "learning_rate": 2.9934546444942337e-05, + "loss": 0.0171, + "step": 36800 + }, + { + "epoch": 0.73, + "learning_rate": 2.9534781899908195e-05, + "loss": 0.0165, + "step": 36900 + }, + { + "epoch": 0.73, + "learning_rate": 2.9141109088924195e-05, + "loss": 0.02, + "step": 37000 + }, + { + "epoch": 0.73, + "learning_rate": 2.87455901192796e-05, + "loss": 0.0174, + "step": 37100 + }, + { + "epoch": 0.73, + "learning_rate": 2.835222727695746e-05, + "loss": 0.0171, + "step": 37200 + }, + { + "epoch": 0.73, + "learning_rate": 2.7961035617843112e-05, + "loss": 0.0172, + "step": 37300 + }, + { + "epoch": 0.74, + "learning_rate": 2.757203011472043e-05, + "loss": 0.0164, + "step": 37400 + }, + { + "epoch": 0.74, + "learning_rate": 2.718522565669847e-05, + "loss": 0.017, + "step": 37500 + }, + { + "epoch": 0.74, + "learning_rate": 2.6800637048641775e-05, + "loss": 0.0167, + "step": 37600 + }, + { + "epoch": 0.74, + "learning_rate": 2.6418279010603663e-05, + "loss": 0.0184, + "step": 37700 + }, + { + "epoch": 0.74, + "learning_rate": 2.6038166177262784e-05, + "loss": 0.0158, + "step": 37800 + }, + { + "epoch": 0.75, + "learning_rate": 2.566031309736311e-05, + "loss": 0.0165, + "step": 37900 + }, + { + "epoch": 0.75, + "learning_rate": 2.5284734233156903e-05, + "loss": 0.0165, + "step": 38000 + }, + { + "epoch": 0.75, + "learning_rate": 2.4911443959851313e-05, + "loss": 0.017, + "step": 38100 + }, + { + "epoch": 0.75, + "learning_rate": 2.4540456565058115e-05, + "loss": 0.0177, + "step": 38200 + }, + { + "epoch": 0.75, + "learning_rate": 2.417178624824684e-05, + "loss": 0.0165, + "step": 38300 + }, + { + "epoch": 0.76, + "learning_rate": 2.3805447120201323e-05, + "loss": 0.0173, + "step": 38400 + }, + { + "epoch": 0.76, + "learning_rate": 2.344145320247968e-05, + "loss": 0.0189, + "step": 38500 + }, + { + "epoch": 0.76, + "learning_rate": 2.3079818426877448e-05, + "loss": 0.0183, + "step": 38600 + }, + { + "epoch": 0.76, + "learning_rate": 2.2720556634894542e-05, + "loss": 0.0161, + "step": 38700 + }, + { + "epoch": 0.76, + "learning_rate": 2.2363681577205363e-05, + "loss": 0.0161, + "step": 38800 + }, + { + "epoch": 0.77, + "learning_rate": 2.2009206913132545e-05, + "loss": 0.017, + "step": 38900 + }, + { + "epoch": 0.77, + "learning_rate": 2.1657146210124072e-05, + "loss": 0.0175, + "step": 39000 + }, + { + "epoch": 0.77, + "learning_rate": 2.1307512943234157e-05, + "loss": 0.0162, + "step": 39100 + }, + { + "epoch": 0.77, + "learning_rate": 2.0960320494607276e-05, + "loss": 0.0161, + "step": 39200 + }, + { + "epoch": 0.77, + "learning_rate": 2.0615582152966087e-05, + "loss": 0.0174, + "step": 39300 + }, + { + "epoch": 0.78, + "learning_rate": 2.027672156726409e-05, + "loss": 0.0194, + "step": 39400 + }, + { + "epoch": 0.78, + "learning_rate": 1.9936906060978636e-05, + "loss": 0.0185, + "step": 39500 + }, + { + "epoch": 0.78, + "learning_rate": 1.9599583832664966e-05, + "loss": 0.0169, + "step": 39600 + }, + { + "epoch": 0.78, + "learning_rate": 1.9264767793265046e-05, + "loss": 0.0174, + "step": 39700 + }, + { + "epoch": 0.78, + "learning_rate": 1.8932470757796908e-05, + "loss": 0.0172, + "step": 39800 + }, + { + "epoch": 0.79, + "learning_rate": 1.86027054448642e-05, + "loss": 0.0157, + "step": 39900 + }, + { + "epoch": 0.79, + "learning_rate": 1.8275484476169405e-05, + "loss": 0.0166, + "step": 40000 + } + ], + "logging_steps": 100, + "max_steps": 50780, + "num_train_epochs": 1, + "save_steps": 10000, + "total_flos": 1.5971349454848e+19, + "trial_name": null, + "trial_params": null +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/training_args.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..100b6c56d3e6f8a91a89702971e1e66650fb8075 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d17a2c3c8a65881aa45bfd2997dfe652a61cb20d8b49594dc95bff2d7669f6f +size 4155 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/vocab.txt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b946952cc35537226f07fd70957ee2f848880d2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-40000/vocab.txt @@ -0,0 +1,33 @@ + + + + +L +A +G +V +S +E +R +T +I +D +P +K +Q +N +F +Y +M +H +W +C +X +B +U +Z +O +. +- + + \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/README.md b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/adapter_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5f407fbf6673901d6bbc4be088ec92bf34bb74 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "facebook/esm2_t33_650M_UR50D", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 1, + "lora_dropout": 0.5, + "modules_to_save": null, + "peft_type": "LORA", + "r": 2, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query", + "key", + "value", + "EsmSelfOutput.dense", + "EsmIntermediate.dense", + "EsmOutput.dense", + "classifier" + ], + "task_type": "TOKEN_CLS" +} \ No newline at end of file diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/adapter_model.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5443f32903fbebe98c0d50545d3aa9d64b87901 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85627b1dbb82966b232c4bafe1e4298d5dc5d76004054e090b29c2e464208c96 +size 2133117 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/added_tokens.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..6527dabc1b083db2af29d5fad6902c18bf831b21 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 0, + "": 2, + "": 32, + "": 1, + "": 3 +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/optimizer.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..637e9473d83546eb44941d5387d6c96343a15abc --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a34c9b53d25292f1aee71a58d518d4df3fff6b04e92ee811d46ffd9589f5fd +size 4215045 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/rng_state.pth b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..29ed9c74a5d769c11768f4474ad82ceebf829784 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a3fd34a9983cda12786b82f949df19411e62828597524a59d2d16eadecea91 +size 14575 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/scheduler.pt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1afa66a498073ad2d258e6493f161e1b5aa3e435 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305612fe8af2c54795fed014a2f1fb5cb0ef51c94f66ac35956d4fc220f3f899 +size 627 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/special_tokens_map.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ba0f9b53dbbf27934f7555e5d31e37bdea9317f1 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/tokenizer_config.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed0725465a1b7a8e4469cd162fbea147da56a1c2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "EsmTokenizer", + "tokenizer_file": null, + "unk_token": "" +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/trainer_state.json b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15b243bdc5ee7ed05e6892de282fe4c932ce365b --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/trainer_state.json @@ -0,0 +1,3019 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9846396218983852, + "eval_steps": 500, + "global_step": 50000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00017015520980041642, + "loss": 0.5575, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017015035788629153, + "loss": 0.3065, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017014224985982174, + "loss": 0.2229, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001701308860313402, + "loss": 0.1812, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017011626683579524, + "loss": 0.1572, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017009839283273364, + "loss": 0.1335, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017007726470627936, + "loss": 0.1193, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 0.00017005288326510734, + "loss": 0.1075, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001700252494424124, + "loss": 0.0986, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016999436429587366, + "loss": 0.0952, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016996022900761407, + "loss": 0.0865, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 0.000169922844884155, + "loss": 0.0816, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016988221335636648, + "loss": 0.0775, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016983833597941224, + "loss": 0.0744, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016979121443269025, + "loss": 0.0694, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001697408505197684, + "loss": 0.0641, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016968724616831557, + "loss": 0.0629, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001696304034300278, + "loss": 0.0696, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016957032448054968, + "loss": 0.0591, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001695070116193912, + "loss": 0.06, + "step": 2000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001694404672698396, + "loss": 0.0558, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016937069397886687, + "loss": 0.0529, + "step": 2200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016929769441703196, + "loss": 0.0536, + "step": 2300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016922147137837868, + "loss": 0.0537, + "step": 2400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016914202778032893, + "loss": 0.0508, + "step": 2500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001690593666635707, + "loss": 0.0524, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00016897349119194207, + "loss": 0.0494, + "step": 2700 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016888440465230977, + "loss": 0.047, + "step": 2800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016879211045444354, + "loss": 0.0502, + "step": 2900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016869661213088575, + "loss": 0.046, + "step": 3000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016859791333681583, + "loss": 0.0419, + "step": 3100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001684960178499108, + "loss": 0.0456, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016839092957020028, + "loss": 0.0432, + "step": 3300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016828265251991761, + "loss": 0.0423, + "step": 3400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016817119084334555, + "loss": 0.0407, + "step": 3500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016805654880665776, + "loss": 0.0409, + "step": 3600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016793873079775577, + "loss": 0.0397, + "step": 3700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016781774132610059, + "loss": 0.0419, + "step": 3800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001676935850225405, + "loss": 0.0399, + "step": 3900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016756626663913358, + "loss": 0.0389, + "step": 4000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016743579104896593, + "loss": 0.0385, + "step": 4100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016730216324596504, + "loss": 0.0357, + "step": 4200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001671653883447088, + "loss": 0.0373, + "step": 4300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016702547158022968, + "loss": 0.0375, + "step": 4400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001668824183078143, + "loss": 0.0393, + "step": 4500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016673623400279849, + "loss": 0.0358, + "step": 4600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016658692426035782, + "loss": 0.0334, + "step": 4700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016643449479529325, + "loss": 0.035, + "step": 4800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016627895144181258, + "loss": 0.0343, + "step": 4900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001661203001533071, + "loss": 0.0335, + "step": 5000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00016595854700212362, + "loss": 0.0353, + "step": 5100 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001657936981793322, + "loss": 0.0331, + "step": 5200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001656257599944891, + "loss": 0.0387, + "step": 5300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016545473887539532, + "loss": 0.0327, + "step": 5400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016528064136785056, + "loss": 0.0316, + "step": 5500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00016510347413540262, + "loss": 0.0319, + "step": 5600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001649232439590925, + "loss": 0.0314, + "step": 5700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001647399577371947, + "loss": 0.0321, + "step": 5800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016455362248495338, + "loss": 0.0333, + "step": 5900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016436424533431362, + "loss": 0.0319, + "step": 6000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001641718335336486, + "loss": 0.0315, + "step": 6100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001639763944474821, + "loss": 0.0311, + "step": 6200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001637779355562068, + "loss": 0.031, + "step": 6300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016357646445579763, + "loss": 0.0299, + "step": 6400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016337198885752133, + "loss": 0.0317, + "step": 6500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016316451658764122, + "loss": 0.0302, + "step": 6600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001629540555871176, + "loss": 0.0295, + "step": 6700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016274061391130388, + "loss": 0.03, + "step": 6800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001625241997296382, + "loss": 0.0292, + "step": 6900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016230482132533077, + "loss": 0.0289, + "step": 7000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016208472504084003, + "loss": 0.0318, + "step": 7100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001618594729250462, + "loss": 0.0301, + "step": 7200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00016163128202889828, + "loss": 0.0295, + "step": 7300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016140016108635798, + "loss": 0.029, + "step": 7400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016116611894353386, + "loss": 0.0291, + "step": 7500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016092916455834295, + "loss": 0.0311, + "step": 7600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016068930700016766, + "loss": 0.0285, + "step": 7700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00016044655544950889, + "loss": 0.0287, + "step": 7800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00016020091919763445, + "loss": 0.0293, + "step": 7900 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015995240764622357, + "loss": 0.0259, + "step": 8000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000159701030307007, + "loss": 0.0293, + "step": 8100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00015944679680140295, + "loss": 0.0277, + "step": 8200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001591897168601488, + "loss": 0.0304, + "step": 8300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015892980032292876, + "loss": 0.026, + "step": 8400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015866705713799714, + "loss": 0.0294, + "step": 8500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015840149736179762, + "loss": 0.0321, + "step": 8600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001581331311585785, + "loss": 0.0279, + "step": 8700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00015786196880000325, + "loss": 0.0277, + "step": 8800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001575880206647579, + "loss": 0.0268, + "step": 8900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015731129723815343, + "loss": 0.0281, + "step": 9000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00015703180911172453, + "loss": 0.028, + "step": 9100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001567495669828243, + "loss": 0.026, + "step": 9200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001564645816542146, + "loss": 0.0256, + "step": 9300 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001561768640336529, + "loss": 0.027, + "step": 9400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001558864251334745, + "loss": 0.0249, + "step": 9500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015559327607017119, + "loss": 0.0256, + "step": 9600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015529742806396564, + "loss": 0.0251, + "step": 9700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015499889243838211, + "loss": 0.0257, + "step": 9800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00015469768061981295, + "loss": 0.0264, + "step": 9900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015439685605389073, + "loss": 0.0247, + "step": 10000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015409035301023626, + "loss": 0.0256, + "step": 10100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015378120854776404, + "loss": 0.0247, + "step": 10200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015346943449891755, + "loss": 0.0254, + "step": 10300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00015315504279678714, + "loss": 0.0246, + "step": 10400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015283804547465337, + "loss": 0.0249, + "step": 10500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001525184546655264, + "loss": 0.026, + "step": 10600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001521962826016816, + "loss": 0.0254, + "step": 10700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00015187154161419122, + "loss": 0.0272, + "step": 10800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001515442441324528, + "loss": 0.0253, + "step": 10900 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015121440268371297, + "loss": 0.0263, + "step": 11000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015088202989258835, + "loss": 0.0256, + "step": 11100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015054713848058212, + "loss": 0.023, + "step": 11200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015020974126559716, + "loss": 0.0236, + "step": 11300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00014986985116144554, + "loss": 0.0241, + "step": 11400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014952748117735409, + "loss": 0.0235, + "step": 11500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001491826444174666, + "loss": 0.0242, + "step": 11600 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014883535408034227, + "loss": 0.0233, + "step": 11700 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014848562345845032, + "loss": 0.0228, + "step": 11800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00014813346593766164, + "loss": 0.023, + "step": 11900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001477788949967359, + "loss": 0.0236, + "step": 12000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014742192420680626, + "loss": 0.0237, + "step": 12100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014706256723085937, + "loss": 0.0237, + "step": 12200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001467008378232128, + "loss": 0.024, + "step": 12300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00014633674982898854, + "loss": 0.0227, + "step": 12400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014597031718358285, + "loss": 0.0247, + "step": 12500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014560155391213318, + "loss": 0.0233, + "step": 12600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001452304741289812, + "loss": 0.0234, + "step": 12700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014485709203713263, + "loss": 0.0221, + "step": 12800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00014448142192771354, + "loss": 0.0244, + "step": 12900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014410347817942347, + "loss": 0.0242, + "step": 13000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014372327525798503, + "loss": 0.0231, + "step": 13100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014334082771559026, + "loss": 0.0238, + "step": 13200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00014295615019034358, + "loss": 0.0233, + "step": 13300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001425692574057016, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014218016416990954, + "loss": 0.0249, + "step": 13500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001417928089327897, + "loss": 0.0265, + "step": 13600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014139938118717103, + "loss": 0.0206, + "step": 13700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014100379776718443, + "loss": 0.0216, + "step": 13800 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014060607381370744, + "loss": 0.0239, + "step": 13900 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014020622454954606, + "loss": 0.024, + "step": 14000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013980426527885237, + "loss": 0.0234, + "step": 14100 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013940021138653824, + "loss": 0.0228, + "step": 14200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00013899407833768695, + "loss": 0.0228, + "step": 14300 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001385858816769611, + "loss": 0.0234, + "step": 14400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001381756370280075, + "loss": 0.0233, + "step": 14500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013776336009285937, + "loss": 0.0216, + "step": 14600 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013734906665133537, + "loss": 0.0268, + "step": 14700 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001369327725604354, + "loss": 0.022, + "step": 14800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00013651449375373396, + "loss": 0.0214, + "step": 14900 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013609424624077, + "loss": 0.0235, + "step": 15000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001356720461064345, + "loss": 0.0225, + "step": 15100 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013524790951035441, + "loss": 0.0218, + "step": 15200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013482185268627465, + "loss": 0.0225, + "step": 15300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00013439389194143625, + "loss": 0.0216, + "step": 15400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013396404365595253, + "loss": 0.0223, + "step": 15500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013353232428218212, + "loss": 0.0212, + "step": 15600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00013309875034409903, + "loss": 0.023, + "step": 15700 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001326633384366605, + "loss": 0.0214, + "step": 15800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001322261052251716, + "loss": 0.021, + "step": 15900 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013178706744464749, + "loss": 0.0211, + "step": 16000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013134624189917282, + "loss": 0.0212, + "step": 16100 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013090808013598785, + "loss": 0.0252, + "step": 16200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013046374720137184, + "loss": 0.0214, + "step": 16300 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013001767715162726, + "loss": 0.0202, + "step": 16400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00012956988705999754, + "loss": 0.0205, + "step": 16500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012912039406556028, + "loss": 0.0235, + "step": 16600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012866921537257149, + "loss": 0.0215, + "step": 16700 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012821636824980682, + "loss": 0.0198, + "step": 16800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012776187002990082, + "loss": 0.0232, + "step": 16900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00012730573810868347, + "loss": 0.0235, + "step": 17000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012684798994451428, + "loss": 0.0207, + "step": 17100 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012638864305761417, + "loss": 0.0218, + "step": 17200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012592771502939492, + "loss": 0.0204, + "step": 17300 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001254652235017861, + "loss": 0.0212, + "step": 17400 + }, + { + "epoch": 0.34, + "learning_rate": 0.00012500118617655992, + "loss": 0.0207, + "step": 17500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012453562081465375, + "loss": 0.0207, + "step": 17600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001240685452354902, + "loss": 0.0204, + "step": 17700 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012359997731629517, + "loss": 0.0234, + "step": 17800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00012312993499141364, + "loss": 0.02, + "step": 17900 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001226584362516231, + "loss": 0.0191, + "step": 18000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012218549914344497, + "loss": 0.0196, + "step": 18100 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012171114176845411, + "loss": 0.0203, + "step": 18200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001212353822825857, + "loss": 0.0224, + "step": 18300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012075823889544048, + "loss": 0.0211, + "step": 18400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012027972986958772, + "loss": 0.0188, + "step": 18500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001197998735198662, + "loss": 0.0214, + "step": 18600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011931868821268337, + "loss": 0.0201, + "step": 18700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011883619236531218, + "loss": 0.0206, + "step": 18800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011835240444518623, + "loss": 0.0206, + "step": 18900 + }, + { + "epoch": 0.37, + "learning_rate": 0.000117867342969193, + "loss": 0.0197, + "step": 19000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011738102650296509, + "loss": 0.0198, + "step": 19100 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011689347366016955, + "loss": 0.0211, + "step": 19200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001164047031017955, + "loss": 0.0204, + "step": 19300 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011591473353543992, + "loss": 0.0206, + "step": 19400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011542358371459156, + "loss": 0.0192, + "step": 19500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001149312724379132, + "loss": 0.0202, + "step": 19600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011443781854852212, + "loss": 0.0197, + "step": 19700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001139432409332688, + "loss": 0.021, + "step": 19800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011344755852201423, + "loss": 0.022, + "step": 19900 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011295079028690513, + "loss": 0.0196, + "step": 20000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011245295524164799, + "loss": 0.0195, + "step": 20100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011195407244078124, + "loss": 0.0197, + "step": 20200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011145916512264182, + "loss": 0.0227, + "step": 20300 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001109582541342795, + "loss": 0.0197, + "step": 20400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011045635259969838, + "loss": 0.0203, + "step": 20500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001099585132027323, + "loss": 0.0216, + "step": 20600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010945469766893083, + "loss": 0.0224, + "step": 20700 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010894994913723849, + "loss": 0.0193, + "step": 20800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00010844428692680601, + "loss": 0.0203, + "step": 20900 + }, + { + "epoch": 0.41, + "learning_rate": 0.000107937730391755, + "loss": 0.02, + "step": 21000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010743029892043725, + "loss": 0.0197, + "step": 21100 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010692201193469241, + "loss": 0.0192, + "step": 21200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010641288888910492, + "loss": 0.0189, + "step": 21300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010590294927025919, + "loss": 0.02, + "step": 21400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00010539221259599377, + "loss": 0.0182, + "step": 21500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010488069841465444, + "loss": 0.0191, + "step": 21600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010436842630434579, + "loss": 0.0188, + "step": 21700 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001038554158721821, + "loss": 0.023, + "step": 21800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010334168675353674, + "loss": 0.0206, + "step": 21900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00010282725861129068, + "loss": 0.0187, + "step": 22000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010231215113507977, + "loss": 0.018, + "step": 22100 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010179638404054145, + "loss": 0.0202, + "step": 22200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010127997706855969, + "loss": 0.0199, + "step": 22300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00010076294998450981, + "loss": 0.019, + "step": 22400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001002453225775017, + "loss": 0.0193, + "step": 22500 + }, + { + "epoch": 0.45, + "learning_rate": 9.972711465962257e-05, + "loss": 0.019, + "step": 22600 + }, + { + "epoch": 0.45, + "learning_rate": 9.920834606517847e-05, + "loss": 0.0195, + "step": 22700 + }, + { + "epoch": 0.45, + "learning_rate": 9.868903664993534e-05, + "loss": 0.018, + "step": 22800 + }, + { + "epoch": 0.45, + "learning_rate": 9.816920629035887e-05, + "loss": 0.0202, + "step": 22900 + }, + { + "epoch": 0.45, + "learning_rate": 9.764887488285381e-05, + "loss": 0.0185, + "step": 23000 + }, + { + "epoch": 0.45, + "learning_rate": 9.71280623430024e-05, + "loss": 0.0189, + "step": 23100 + }, + { + "epoch": 0.46, + "learning_rate": 9.660678860480215e-05, + "loss": 0.0186, + "step": 23200 + }, + { + "epoch": 0.46, + "learning_rate": 9.608507361990283e-05, + "loss": 0.0195, + "step": 23300 + }, + { + "epoch": 0.46, + "learning_rate": 9.556293735684288e-05, + "loss": 0.0206, + "step": 23400 + }, + { + "epoch": 0.46, + "learning_rate": 9.50456270965957e-05, + "loss": 0.0204, + "step": 23500 + }, + { + "epoch": 0.46, + "learning_rate": 9.452271196044817e-05, + "loss": 0.0189, + "step": 23600 + }, + { + "epoch": 0.47, + "learning_rate": 9.399943534522518e-05, + "loss": 0.0198, + "step": 23700 + }, + { + "epoch": 0.47, + "learning_rate": 9.347581727923617e-05, + "loss": 0.0197, + "step": 23800 + }, + { + "epoch": 0.47, + "learning_rate": 9.295187780385963e-05, + "loss": 0.0187, + "step": 23900 + }, + { + "epoch": 0.47, + "learning_rate": 9.242763697277588e-05, + "loss": 0.0201, + "step": 24000 + }, + { + "epoch": 0.47, + "learning_rate": 9.19031148511996e-05, + "loss": 0.0194, + "step": 24100 + }, + { + "epoch": 0.48, + "learning_rate": 9.137833151511182e-05, + "loss": 0.0186, + "step": 24200 + }, + { + "epoch": 0.48, + "learning_rate": 9.08533070504915e-05, + "loss": 0.0215, + "step": 24300 + }, + { + "epoch": 0.48, + "learning_rate": 9.032806155254666e-05, + "loss": 0.0205, + "step": 24400 + }, + { + "epoch": 0.48, + "learning_rate": 8.980261512494549e-05, + "loss": 0.0188, + "step": 24500 + }, + { + "epoch": 0.48, + "learning_rate": 8.927698787904661e-05, + "loss": 0.0188, + "step": 24600 + }, + { + "epoch": 0.49, + "learning_rate": 8.875119993312938e-05, + "loss": 0.0204, + "step": 24700 + }, + { + "epoch": 0.49, + "learning_rate": 8.822527141162407e-05, + "loss": 0.0191, + "step": 24800 + }, + { + "epoch": 0.49, + "learning_rate": 8.769922244434138e-05, + "loss": 0.017, + "step": 24900 + }, + { + "epoch": 0.49, + "learning_rate": 8.717307316570196e-05, + "loss": 0.0186, + "step": 25000 + }, + { + "epoch": 0.49, + "learning_rate": 8.664684371396603e-05, + "loss": 0.0195, + "step": 25100 + }, + { + "epoch": 0.5, + "learning_rate": 8.612055423046226e-05, + "loss": 0.0199, + "step": 25200 + }, + { + "epoch": 0.5, + "learning_rate": 8.559422485881711e-05, + "loss": 0.0191, + "step": 25300 + }, + { + "epoch": 0.5, + "learning_rate": 8.506787574418376e-05, + "loss": 0.0191, + "step": 25400 + }, + { + "epoch": 0.5, + "learning_rate": 8.4541527032471e-05, + "loss": 0.0196, + "step": 25500 + }, + { + "epoch": 0.5, + "learning_rate": 8.401519886957223e-05, + "loss": 0.0184, + "step": 25600 + }, + { + "epoch": 0.51, + "learning_rate": 8.348891140059429e-05, + "loss": 0.0182, + "step": 25700 + }, + { + "epoch": 0.51, + "learning_rate": 8.29626847690866e-05, + "loss": 0.0216, + "step": 25800 + }, + { + "epoch": 0.51, + "learning_rate": 8.243653911626992e-05, + "loss": 0.0177, + "step": 25900 + }, + { + "epoch": 0.51, + "learning_rate": 8.191575445898302e-05, + "loss": 0.019, + "step": 26000 + }, + { + "epoch": 0.51, + "learning_rate": 8.13898298618816e-05, + "loss": 0.0187, + "step": 26100 + }, + { + "epoch": 0.52, + "learning_rate": 8.086404644418334e-05, + "loss": 0.0189, + "step": 26200 + }, + { + "epoch": 0.52, + "learning_rate": 8.033842433014505e-05, + "loss": 0.0178, + "step": 26300 + }, + { + "epoch": 0.52, + "learning_rate": 7.981298363784983e-05, + "loss": 0.0182, + "step": 26400 + }, + { + "epoch": 0.52, + "learning_rate": 7.928774447843676e-05, + "loss": 0.0197, + "step": 26500 + }, + { + "epoch": 0.52, + "learning_rate": 7.876272695533133e-05, + "loss": 0.0182, + "step": 26600 + }, + { + "epoch": 0.53, + "learning_rate": 7.823795116347596e-05, + "loss": 0.0203, + "step": 26700 + }, + { + "epoch": 0.53, + "learning_rate": 7.771343718856085e-05, + "loss": 0.0193, + "step": 26800 + }, + { + "epoch": 0.53, + "learning_rate": 7.718920510625514e-05, + "loss": 0.0182, + "step": 26900 + }, + { + "epoch": 0.53, + "learning_rate": 7.666527498143874e-05, + "loss": 0.0202, + "step": 27000 + }, + { + "epoch": 0.53, + "learning_rate": 7.614166686743405e-05, + "loss": 0.019, + "step": 27100 + }, + { + "epoch": 0.54, + "learning_rate": 7.561840080523864e-05, + "loss": 0.0174, + "step": 27200 + }, + { + "epoch": 0.54, + "learning_rate": 7.509549682275809e-05, + "loss": 0.018, + "step": 27300 + }, + { + "epoch": 0.54, + "learning_rate": 7.457297493403944e-05, + "loss": 0.019, + "step": 27400 + }, + { + "epoch": 0.54, + "learning_rate": 7.405085513850521e-05, + "loss": 0.0205, + "step": 27500 + }, + { + "epoch": 0.54, + "learning_rate": 7.352915742018785e-05, + "loss": 0.0169, + "step": 27600 + }, + { + "epoch": 0.55, + "learning_rate": 7.30079017469649e-05, + "loss": 0.0183, + "step": 27700 + }, + { + "epoch": 0.55, + "learning_rate": 7.248710806979468e-05, + "loss": 0.0181, + "step": 27800 + }, + { + "epoch": 0.55, + "learning_rate": 7.196679632195274e-05, + "loss": 0.0171, + "step": 27900 + }, + { + "epoch": 0.55, + "learning_rate": 7.144698641826882e-05, + "loss": 0.0202, + "step": 28000 + }, + { + "epoch": 0.55, + "learning_rate": 7.092769825436483e-05, + "loss": 0.0184, + "step": 28100 + }, + { + "epoch": 0.56, + "learning_rate": 7.040895170589299e-05, + "loss": 0.0191, + "step": 28200 + }, + { + "epoch": 0.56, + "learning_rate": 6.989594563413708e-05, + "loss": 0.0206, + "step": 28300 + }, + { + "epoch": 0.56, + "learning_rate": 6.937833594866479e-05, + "loss": 0.0184, + "step": 28400 + }, + { + "epoch": 0.56, + "learning_rate": 6.886132718016212e-05, + "loss": 0.0213, + "step": 28500 + }, + { + "epoch": 0.56, + "learning_rate": 6.834493911703791e-05, + "loss": 0.0181, + "step": 28600 + }, + { + "epoch": 0.57, + "learning_rate": 6.78291915239438e-05, + "loss": 0.0205, + "step": 28700 + }, + { + "epoch": 0.57, + "learning_rate": 6.731410414101738e-05, + "loss": 0.0184, + "step": 28800 + }, + { + "epoch": 0.57, + "learning_rate": 6.679969668312693e-05, + "loss": 0.0179, + "step": 28900 + }, + { + "epoch": 0.57, + "learning_rate": 6.628598883911667e-05, + "loss": 0.0206, + "step": 29000 + }, + { + "epoch": 0.57, + "learning_rate": 6.577300027105329e-05, + "loss": 0.0172, + "step": 29100 + }, + { + "epoch": 0.58, + "learning_rate": 6.526075061347325e-05, + "loss": 0.0194, + "step": 29200 + }, + { + "epoch": 0.58, + "learning_rate": 6.474925947263152e-05, + "loss": 0.0195, + "step": 29300 + }, + { + "epoch": 0.58, + "learning_rate": 6.423854642575082e-05, + "loss": 0.0193, + "step": 29400 + }, + { + "epoch": 0.58, + "learning_rate": 6.372863102027257e-05, + "loss": 0.0182, + "step": 29500 + }, + { + "epoch": 0.58, + "learning_rate": 6.321953277310858e-05, + "loss": 0.0195, + "step": 29600 + }, + { + "epoch": 0.58, + "learning_rate": 6.27112711698941e-05, + "loss": 0.0174, + "step": 29700 + }, + { + "epoch": 0.59, + "learning_rate": 6.220386566424204e-05, + "loss": 0.0214, + "step": 29800 + }, + { + "epoch": 0.59, + "learning_rate": 6.169733567699826e-05, + "loss": 0.0176, + "step": 29900 + }, + { + "epoch": 0.59, + "learning_rate": 6.11917005954984e-05, + "loss": 0.017, + "step": 30000 + }, + { + "epoch": 0.59, + "learning_rate": 6.068697977282572e-05, + "loss": 0.0185, + "step": 30100 + }, + { + "epoch": 0.59, + "learning_rate": 6.018319252707041e-05, + "loss": 0.0175, + "step": 30200 + }, + { + "epoch": 0.6, + "learning_rate": 5.968035814059016e-05, + "loss": 0.0166, + "step": 30300 + }, + { + "epoch": 0.6, + "learning_rate": 5.9178495859272244e-05, + "loss": 0.0167, + "step": 30400 + }, + { + "epoch": 0.6, + "learning_rate": 5.8677624891796735e-05, + "loss": 0.018, + "step": 30500 + }, + { + "epoch": 0.6, + "learning_rate": 5.8177764408901366e-05, + "loss": 0.0182, + "step": 30600 + }, + { + "epoch": 0.6, + "learning_rate": 5.767893354264776e-05, + "loss": 0.019, + "step": 30700 + }, + { + "epoch": 0.61, + "learning_rate": 5.7186123953555236e-05, + "loss": 0.0198, + "step": 30800 + }, + { + "epoch": 0.61, + "learning_rate": 5.668939878661137e-05, + "loss": 0.0177, + "step": 30900 + }, + { + "epoch": 0.61, + "learning_rate": 5.619376020321073e-05, + "loss": 0.0165, + "step": 31000 + }, + { + "epoch": 0.61, + "learning_rate": 5.5699227173822495e-05, + "loss": 0.0205, + "step": 31100 + }, + { + "epoch": 0.61, + "learning_rate": 5.5205818626601115e-05, + "loss": 0.0181, + "step": 31200 + }, + { + "epoch": 0.62, + "learning_rate": 5.4713553446661644e-05, + "loss": 0.0187, + "step": 31300 + }, + { + "epoch": 0.62, + "learning_rate": 5.4222450475357026e-05, + "loss": 0.0169, + "step": 31400 + }, + { + "epoch": 0.62, + "learning_rate": 5.373252850955681e-05, + "loss": 0.0174, + "step": 31500 + }, + { + "epoch": 0.62, + "learning_rate": 5.3243806300927926e-05, + "loss": 0.0179, + "step": 31600 + }, + { + "epoch": 0.62, + "learning_rate": 5.2756302555216735e-05, + "loss": 0.0175, + "step": 31700 + }, + { + "epoch": 0.63, + "learning_rate": 5.2270035931533207e-05, + "loss": 0.0199, + "step": 31800 + }, + { + "epoch": 0.63, + "learning_rate": 5.1785025041636665e-05, + "loss": 0.0184, + "step": 31900 + }, + { + "epoch": 0.63, + "learning_rate": 5.130128844922355e-05, + "loss": 0.0185, + "step": 32000 + }, + { + "epoch": 0.63, + "learning_rate": 5.081884466921669e-05, + "loss": 0.0174, + "step": 32100 + }, + { + "epoch": 0.63, + "learning_rate": 5.0337712167056944e-05, + "loss": 0.0201, + "step": 32200 + }, + { + "epoch": 0.64, + "learning_rate": 4.985790935799614e-05, + "loss": 0.0178, + "step": 32300 + }, + { + "epoch": 0.64, + "learning_rate": 4.937945460639236e-05, + "loss": 0.0176, + "step": 32400 + }, + { + "epoch": 0.64, + "learning_rate": 4.890236622500717e-05, + "loss": 0.0239, + "step": 32500 + }, + { + "epoch": 0.64, + "learning_rate": 4.8426662474304465e-05, + "loss": 0.0177, + "step": 32600 + }, + { + "epoch": 0.64, + "learning_rate": 4.795236156175173e-05, + "loss": 0.0182, + "step": 32700 + }, + { + "epoch": 0.65, + "learning_rate": 4.7479481641123066e-05, + "loss": 0.016, + "step": 32800 + }, + { + "epoch": 0.65, + "learning_rate": 4.7008040811804424e-05, + "loss": 0.0175, + "step": 32900 + }, + { + "epoch": 0.65, + "learning_rate": 4.653805711810078e-05, + "loss": 0.0173, + "step": 33000 + }, + { + "epoch": 0.65, + "learning_rate": 4.6069548548545565e-05, + "loss": 0.0179, + "step": 33100 + }, + { + "epoch": 0.65, + "learning_rate": 4.560253303521206e-05, + "loss": 0.0186, + "step": 33200 + }, + { + "epoch": 0.66, + "learning_rate": 4.513702845302723e-05, + "loss": 0.019, + "step": 33300 + }, + { + "epoch": 0.66, + "learning_rate": 4.467305261908736e-05, + "loss": 0.0164, + "step": 33400 + }, + { + "epoch": 0.66, + "learning_rate": 4.42106232919762e-05, + "loss": 0.019, + "step": 33500 + }, + { + "epoch": 0.66, + "learning_rate": 4.374975817108527e-05, + "loss": 0.0174, + "step": 33600 + }, + { + "epoch": 0.66, + "learning_rate": 4.3295059840780387e-05, + "loss": 0.0187, + "step": 33700 + }, + { + "epoch": 0.67, + "learning_rate": 4.283735990928788e-05, + "loss": 0.019, + "step": 33800 + }, + { + "epoch": 0.67, + "learning_rate": 4.2381276745401456e-05, + "loss": 0.0175, + "step": 33900 + }, + { + "epoch": 0.67, + "learning_rate": 4.192682780561444e-05, + "loss": 0.0186, + "step": 34000 + }, + { + "epoch": 0.67, + "learning_rate": 4.147403048387059e-05, + "loss": 0.02, + "step": 34100 + }, + { + "epoch": 0.67, + "learning_rate": 4.1022902110898326e-05, + "loss": 0.0169, + "step": 34200 + }, + { + "epoch": 0.68, + "learning_rate": 4.057345995354738e-05, + "loss": 0.0176, + "step": 34300 + }, + { + "epoch": 0.68, + "learning_rate": 4.0125721214127854e-05, + "loss": 0.0167, + "step": 34400 + }, + { + "epoch": 0.68, + "learning_rate": 3.9679703029752e-05, + "loss": 0.0179, + "step": 34500 + }, + { + "epoch": 0.68, + "learning_rate": 3.923542247167802e-05, + "loss": 0.0162, + "step": 34600 + }, + { + "epoch": 0.68, + "learning_rate": 3.879289654465689e-05, + "loss": 0.0194, + "step": 34700 + }, + { + "epoch": 0.69, + "learning_rate": 3.835214218628141e-05, + "loss": 0.0187, + "step": 34800 + }, + { + "epoch": 0.69, + "learning_rate": 3.7913176266337885e-05, + "loss": 0.0165, + "step": 34900 + }, + { + "epoch": 0.69, + "learning_rate": 3.747601558616062e-05, + "loss": 0.0162, + "step": 35000 + }, + { + "epoch": 0.69, + "learning_rate": 3.70406768779886e-05, + "loss": 0.0169, + "step": 35100 + }, + { + "epoch": 0.69, + "learning_rate": 3.660717680432526e-05, + "loss": 0.0173, + "step": 35200 + }, + { + "epoch": 0.7, + "learning_rate": 3.61755319573006e-05, + "loss": 0.016, + "step": 35300 + }, + { + "epoch": 0.7, + "learning_rate": 3.5745758858036255e-05, + "loss": 0.0188, + "step": 35400 + }, + { + "epoch": 0.7, + "learning_rate": 3.5317873956012995e-05, + "loss": 0.0198, + "step": 35500 + }, + { + "epoch": 0.7, + "learning_rate": 3.4891893628441334e-05, + "loss": 0.0171, + "step": 35600 + }, + { + "epoch": 0.7, + "learning_rate": 3.446783417963448e-05, + "loss": 0.0179, + "step": 35700 + }, + { + "epoch": 0.71, + "learning_rate": 3.4045711840384404e-05, + "loss": 0.0158, + "step": 35800 + }, + { + "epoch": 0.71, + "learning_rate": 3.36255427673406e-05, + "loss": 0.0171, + "step": 35900 + }, + { + "epoch": 0.71, + "learning_rate": 3.320734304239168e-05, + "loss": 0.0162, + "step": 36000 + }, + { + "epoch": 0.71, + "learning_rate": 3.279112867204983e-05, + "loss": 0.0172, + "step": 36100 + }, + { + "epoch": 0.71, + "learning_rate": 3.2376915586838195e-05, + "loss": 0.0166, + "step": 36200 + }, + { + "epoch": 0.71, + "learning_rate": 3.196471964068113e-05, + "loss": 0.0171, + "step": 36300 + }, + { + "epoch": 0.72, + "learning_rate": 3.1554556610297393e-05, + "loss": 0.019, + "step": 36400 + }, + { + "epoch": 0.72, + "learning_rate": 3.1146442194596243e-05, + "loss": 0.0167, + "step": 36500 + }, + { + "epoch": 0.72, + "learning_rate": 3.074039201407669e-05, + "loss": 0.0172, + "step": 36600 + }, + { + "epoch": 0.72, + "learning_rate": 3.0336421610229454e-05, + "loss": 0.0182, + "step": 36700 + }, + { + "epoch": 0.72, + "learning_rate": 2.9934546444942337e-05, + "loss": 0.0171, + "step": 36800 + }, + { + "epoch": 0.73, + "learning_rate": 2.9534781899908195e-05, + "loss": 0.0165, + "step": 36900 + }, + { + "epoch": 0.73, + "learning_rate": 2.9141109088924195e-05, + "loss": 0.02, + "step": 37000 + }, + { + "epoch": 0.73, + "learning_rate": 2.87455901192796e-05, + "loss": 0.0174, + "step": 37100 + }, + { + "epoch": 0.73, + "learning_rate": 2.835222727695746e-05, + "loss": 0.0171, + "step": 37200 + }, + { + "epoch": 0.73, + "learning_rate": 2.7961035617843112e-05, + "loss": 0.0172, + "step": 37300 + }, + { + "epoch": 0.74, + "learning_rate": 2.757203011472043e-05, + "loss": 0.0164, + "step": 37400 + }, + { + "epoch": 0.74, + "learning_rate": 2.718522565669847e-05, + "loss": 0.017, + "step": 37500 + }, + { + "epoch": 0.74, + "learning_rate": 2.6800637048641775e-05, + "loss": 0.0167, + "step": 37600 + }, + { + "epoch": 0.74, + "learning_rate": 2.6418279010603663e-05, + "loss": 0.0184, + "step": 37700 + }, + { + "epoch": 0.74, + "learning_rate": 2.6038166177262784e-05, + "loss": 0.0158, + "step": 37800 + }, + { + "epoch": 0.75, + "learning_rate": 2.566031309736311e-05, + "loss": 0.0165, + "step": 37900 + }, + { + "epoch": 0.75, + "learning_rate": 2.5284734233156903e-05, + "loss": 0.0165, + "step": 38000 + }, + { + "epoch": 0.75, + "learning_rate": 2.4911443959851313e-05, + "loss": 0.017, + "step": 38100 + }, + { + "epoch": 0.75, + "learning_rate": 2.4540456565058115e-05, + "loss": 0.0177, + "step": 38200 + }, + { + "epoch": 0.75, + "learning_rate": 2.417178624824684e-05, + "loss": 0.0165, + "step": 38300 + }, + { + "epoch": 0.76, + "learning_rate": 2.3805447120201323e-05, + "loss": 0.0173, + "step": 38400 + }, + { + "epoch": 0.76, + "learning_rate": 2.344145320247968e-05, + "loss": 0.0189, + "step": 38500 + }, + { + "epoch": 0.76, + "learning_rate": 2.3079818426877448e-05, + "loss": 0.0183, + "step": 38600 + }, + { + "epoch": 0.76, + "learning_rate": 2.2720556634894542e-05, + "loss": 0.0161, + "step": 38700 + }, + { + "epoch": 0.76, + "learning_rate": 2.2363681577205363e-05, + "loss": 0.0161, + "step": 38800 + }, + { + "epoch": 0.77, + "learning_rate": 2.2009206913132545e-05, + "loss": 0.017, + "step": 38900 + }, + { + "epoch": 0.77, + "learning_rate": 2.1657146210124072e-05, + "loss": 0.0175, + "step": 39000 + }, + { + "epoch": 0.77, + "learning_rate": 2.1307512943234157e-05, + "loss": 0.0162, + "step": 39100 + }, + { + "epoch": 0.77, + "learning_rate": 2.0960320494607276e-05, + "loss": 0.0161, + "step": 39200 + }, + { + "epoch": 0.77, + "learning_rate": 2.0615582152966087e-05, + "loss": 0.0174, + "step": 39300 + }, + { + "epoch": 0.78, + "learning_rate": 2.027672156726409e-05, + "loss": 0.0194, + "step": 39400 + }, + { + "epoch": 0.78, + "learning_rate": 1.9936906060978636e-05, + "loss": 0.0185, + "step": 39500 + }, + { + "epoch": 0.78, + "learning_rate": 1.9599583832664966e-05, + "loss": 0.0169, + "step": 39600 + }, + { + "epoch": 0.78, + "learning_rate": 1.9264767793265046e-05, + "loss": 0.0174, + "step": 39700 + }, + { + "epoch": 0.78, + "learning_rate": 1.8932470757796908e-05, + "loss": 0.0172, + "step": 39800 + }, + { + "epoch": 0.79, + "learning_rate": 1.86027054448642e-05, + "loss": 0.0157, + "step": 39900 + }, + { + "epoch": 0.79, + "learning_rate": 1.8275484476169405e-05, + "loss": 0.0166, + "step": 40000 + }, + { + "epoch": 0.79, + "learning_rate": 1.7950820376030694e-05, + "loss": 0.0159, + "step": 40100 + }, + { + "epoch": 0.79, + "learning_rate": 1.7628725570902674e-05, + "loss": 0.0162, + "step": 40200 + }, + { + "epoch": 0.79, + "learning_rate": 1.7309212388900632e-05, + "loss": 0.0192, + "step": 40300 + }, + { + "epoch": 0.8, + "learning_rate": 1.6992293059328733e-05, + "loss": 0.0173, + "step": 40400 + }, + { + "epoch": 0.8, + "learning_rate": 1.6677979712211988e-05, + "loss": 0.0161, + "step": 40500 + }, + { + "epoch": 0.8, + "learning_rate": 1.6366284377831903e-05, + "loss": 0.0164, + "step": 40600 + }, + { + "epoch": 0.8, + "learning_rate": 1.60572189862661e-05, + "loss": 0.0178, + "step": 40700 + }, + { + "epoch": 0.8, + "learning_rate": 1.575079536693162e-05, + "loss": 0.0186, + "step": 40800 + }, + { + "epoch": 0.81, + "learning_rate": 1.5447025248132218e-05, + "loss": 0.019, + "step": 40900 + }, + { + "epoch": 0.81, + "learning_rate": 1.5145920256609429e-05, + "loss": 0.0167, + "step": 41000 + }, + { + "epoch": 0.81, + "learning_rate": 1.4847491917097565e-05, + "loss": 0.0159, + "step": 41100 + }, + { + "epoch": 0.81, + "learning_rate": 1.455175165188261e-05, + "loss": 0.0181, + "step": 41200 + }, + { + "epoch": 0.81, + "learning_rate": 1.4258710780365024e-05, + "loss": 0.0166, + "step": 41300 + }, + { + "epoch": 0.82, + "learning_rate": 1.3968380518626529e-05, + "loss": 0.0182, + "step": 41400 + }, + { + "epoch": 0.82, + "learning_rate": 1.3680771979000779e-05, + "loss": 0.017, + "step": 41500 + }, + { + "epoch": 0.82, + "learning_rate": 1.3395896169648037e-05, + "loss": 0.0173, + "step": 41600 + }, + { + "epoch": 0.82, + "learning_rate": 1.3113763994133861e-05, + "loss": 0.0172, + "step": 41700 + }, + { + "epoch": 0.82, + "learning_rate": 1.2834386251011762e-05, + "loss": 0.0173, + "step": 41800 + }, + { + "epoch": 0.83, + "learning_rate": 1.2557773633409876e-05, + "loss": 0.0174, + "step": 41900 + }, + { + "epoch": 0.83, + "learning_rate": 1.2283936728621768e-05, + "loss": 0.0159, + "step": 42000 + }, + { + "epoch": 0.83, + "learning_rate": 1.2015582698997656e-05, + "loss": 0.0186, + "step": 42100 + }, + { + "epoch": 0.83, + "learning_rate": 1.1747300539671992e-05, + "loss": 0.0179, + "step": 42200 + }, + { + "epoch": 0.83, + "learning_rate": 1.148182511385831e-05, + "loss": 0.0168, + "step": 42300 + }, + { + "epoch": 0.83, + "learning_rate": 1.121916658257627e-05, + "loss": 0.0177, + "step": 42400 + }, + { + "epoch": 0.84, + "learning_rate": 1.095933499902953e-05, + "loss": 0.0161, + "step": 42500 + }, + { + "epoch": 0.84, + "learning_rate": 1.0702340308220812e-05, + "loss": 0.017, + "step": 42600 + }, + { + "epoch": 0.84, + "learning_rate": 1.0448192346571332e-05, + "loss": 0.0159, + "step": 42700 + }, + { + "epoch": 0.84, + "learning_rate": 1.0196900841544354e-05, + "loss": 0.0206, + "step": 42800 + }, + { + "epoch": 0.84, + "learning_rate": 9.948475411272763e-06, + "loss": 0.0177, + "step": 42900 + }, + { + "epoch": 0.85, + "learning_rate": 9.702925564191031e-06, + "loss": 0.017, + "step": 43000 + }, + { + "epoch": 0.85, + "learning_rate": 9.460260698671288e-06, + "loss": 0.0169, + "step": 43100 + }, + { + "epoch": 0.85, + "learning_rate": 9.220490102663478e-06, + "loss": 0.0173, + "step": 43200 + }, + { + "epoch": 0.85, + "learning_rate": 8.983622953340022e-06, + "loss": 0.0172, + "step": 43300 + }, + { + "epoch": 0.85, + "learning_rate": 8.749668316744438e-06, + "loss": 0.0166, + "step": 43400 + }, + { + "epoch": 0.86, + "learning_rate": 8.518635147444429e-06, + "loss": 0.0158, + "step": 43500 + }, + { + "epoch": 0.86, + "learning_rate": 8.290532288189078e-06, + "loss": 0.0184, + "step": 43600 + }, + { + "epoch": 0.86, + "learning_rate": 8.065368469570475e-06, + "loss": 0.0167, + "step": 43700 + }, + { + "epoch": 0.86, + "learning_rate": 7.843152309689469e-06, + "loss": 0.0155, + "step": 43800 + }, + { + "epoch": 0.86, + "learning_rate": 7.623892313825859e-06, + "loss": 0.0159, + "step": 43900 + }, + { + "epoch": 0.87, + "learning_rate": 7.407596874112859e-06, + "loss": 0.0178, + "step": 44000 + }, + { + "epoch": 0.87, + "learning_rate": 7.194274269215867e-06, + "loss": 0.0189, + "step": 44100 + }, + { + "epoch": 0.87, + "learning_rate": 6.983932664015617e-06, + "loss": 0.0158, + "step": 44200 + }, + { + "epoch": 0.87, + "learning_rate": 6.77658010929566e-06, + "loss": 0.0168, + "step": 44300 + }, + { + "epoch": 0.87, + "learning_rate": 6.572224541434249e-06, + "loss": 0.0174, + "step": 44400 + }, + { + "epoch": 0.88, + "learning_rate": 6.3708737821005295e-06, + "loss": 0.0175, + "step": 44500 + }, + { + "epoch": 0.88, + "learning_rate": 6.172535537955211e-06, + "loss": 0.0168, + "step": 44600 + }, + { + "epoch": 0.88, + "learning_rate": 5.977217400355541e-06, + "loss": 0.0171, + "step": 44700 + }, + { + "epoch": 0.88, + "learning_rate": 5.786834739822933e-06, + "loss": 0.0175, + "step": 44800 + }, + { + "epoch": 0.88, + "learning_rate": 5.597548741252224e-06, + "loss": 0.0163, + "step": 44900 + }, + { + "epoch": 0.89, + "learning_rate": 5.411304856733716e-06, + "loss": 0.0168, + "step": 45000 + }, + { + "epoch": 0.89, + "learning_rate": 5.228110214715438e-06, + "loss": 0.0161, + "step": 45100 + }, + { + "epoch": 0.89, + "learning_rate": 5.047971826936275e-06, + "loss": 0.0187, + "step": 45200 + }, + { + "epoch": 0.89, + "learning_rate": 4.8708965881575966e-06, + "loss": 0.0165, + "step": 45300 + }, + { + "epoch": 0.89, + "learning_rate": 4.696891275899309e-06, + "loss": 0.0188, + "step": 45400 + }, + { + "epoch": 0.9, + "learning_rate": 4.52596255018051e-06, + "loss": 0.0167, + "step": 45500 + }, + { + "epoch": 0.9, + "learning_rate": 4.358116953264567e-06, + "loss": 0.0168, + "step": 45600 + }, + { + "epoch": 0.9, + "learning_rate": 4.1933609094086535e-06, + "loss": 0.0155, + "step": 45700 + }, + { + "epoch": 0.9, + "learning_rate": 4.03170072461796e-06, + "loss": 0.0154, + "step": 45800 + }, + { + "epoch": 0.9, + "learning_rate": 3.8731425864042375e-06, + "loss": 0.0162, + "step": 45900 + }, + { + "epoch": 0.91, + "learning_rate": 3.7176925635490457e-06, + "loss": 0.0173, + "step": 46000 + }, + { + "epoch": 0.91, + "learning_rate": 3.5653566058714167e-06, + "loss": 0.0166, + "step": 46100 + }, + { + "epoch": 0.91, + "learning_rate": 3.4161405440001803e-06, + "loss": 0.017, + "step": 46200 + }, + { + "epoch": 0.91, + "learning_rate": 3.2700500891507256e-06, + "loss": 0.017, + "step": 46300 + }, + { + "epoch": 0.91, + "learning_rate": 3.1270908329064776e-06, + "loss": 0.0178, + "step": 46400 + }, + { + "epoch": 0.92, + "learning_rate": 2.987268247004841e-06, + "loss": 0.0155, + "step": 46500 + }, + { + "epoch": 0.92, + "learning_rate": 2.8505876831278e-06, + "loss": 0.02, + "step": 46600 + }, + { + "epoch": 0.92, + "learning_rate": 2.717054372697044e-06, + "loss": 0.0165, + "step": 46700 + }, + { + "epoch": 0.92, + "learning_rate": 2.586673426673739e-06, + "loss": 0.0174, + "step": 46800 + }, + { + "epoch": 0.92, + "learning_rate": 2.4594498353629673e-06, + "loss": 0.0161, + "step": 46900 + }, + { + "epoch": 0.93, + "learning_rate": 2.3353884682226504e-06, + "loss": 0.0185, + "step": 47000 + }, + { + "epoch": 0.93, + "learning_rate": 2.21449407367719e-06, + "loss": 0.0161, + "step": 47100 + }, + { + "epoch": 0.93, + "learning_rate": 2.096771278935778e-06, + "loss": 0.0169, + "step": 47200 + }, + { + "epoch": 0.93, + "learning_rate": 1.983354320487033e-06, + "loss": 0.021, + "step": 47300 + }, + { + "epoch": 0.93, + "learning_rate": 1.8719562950381076e-06, + "loss": 0.0165, + "step": 47400 + }, + { + "epoch": 0.94, + "learning_rate": 1.7637429799593062e-06, + "loss": 0.0169, + "step": 47500 + }, + { + "epoch": 0.94, + "learning_rate": 1.6587185170939741e-06, + "loss": 0.0177, + "step": 47600 + }, + { + "epoch": 0.94, + "learning_rate": 1.5568869262327734e-06, + "loss": 0.0172, + "step": 47700 + }, + { + "epoch": 0.94, + "learning_rate": 1.4582521049598147e-06, + "loss": 0.0174, + "step": 47800 + }, + { + "epoch": 0.94, + "learning_rate": 1.3628178285034639e-06, + "loss": 0.0182, + "step": 47900 + }, + { + "epoch": 0.95, + "learning_rate": 1.2705877495918805e-06, + "loss": 0.0162, + "step": 48000 + }, + { + "epoch": 0.95, + "learning_rate": 1.1815653983131956e-06, + "loss": 0.0186, + "step": 48100 + }, + { + "epoch": 0.95, + "learning_rate": 1.0957541819803916e-06, + "loss": 0.0168, + "step": 48200 + }, + { + "epoch": 0.95, + "learning_rate": 1.0131573850008875e-06, + "loss": 0.0164, + "step": 48300 + }, + { + "epoch": 0.95, + "learning_rate": 9.337781687508361e-07, + "loss": 0.0154, + "step": 48400 + }, + { + "epoch": 0.96, + "learning_rate": 8.576195714541266e-07, + "loss": 0.0166, + "step": 48500 + }, + { + "epoch": 0.96, + "learning_rate": 7.846845080660896e-07, + "loss": 0.0163, + "step": 48600 + }, + { + "epoch": 0.96, + "learning_rate": 7.149757701619266e-07, + "loss": 0.0175, + "step": 48700 + }, + { + "epoch": 0.96, + "learning_rate": 6.484960258298606e-07, + "loss": 0.016, + "step": 48800 + }, + { + "epoch": 0.96, + "learning_rate": 5.852478195690389e-07, + "loss": 0.0165, + "step": 48900 + }, + { + "epoch": 0.96, + "learning_rate": 5.252335721920922e-07, + "loss": 0.016, + "step": 49000 + }, + { + "epoch": 0.97, + "learning_rate": 4.684555807325587e-07, + "loss": 0.0177, + "step": 49100 + }, + { + "epoch": 0.97, + "learning_rate": 4.149160183568786e-07, + "loss": 0.0163, + "step": 49200 + }, + { + "epoch": 0.97, + "learning_rate": 3.6461693428126416e-07, + "loss": 0.0177, + "step": 49300 + }, + { + "epoch": 0.97, + "learning_rate": 3.1756025369326307e-07, + "loss": 0.0168, + "step": 49400 + }, + { + "epoch": 0.97, + "learning_rate": 2.737477776780734e-07, + "loss": 0.0165, + "step": 49500 + }, + { + "epoch": 0.98, + "learning_rate": 2.335707767802713e-07, + "loss": 0.0181, + "step": 49600 + }, + { + "epoch": 0.98, + "learning_rate": 1.9621913479759026e-07, + "loss": 0.0167, + "step": 49700 + }, + { + "epoch": 0.98, + "learning_rate": 1.621163416951787e-07, + "loss": 0.0165, + "step": 49800 + }, + { + "epoch": 0.98, + "learning_rate": 1.312637027507287e-07, + "loss": 0.0188, + "step": 49900 + }, + { + "epoch": 0.98, + "learning_rate": 1.0366239884292474e-07, + "loss": 0.0171, + "step": 50000 + } + ], + "logging_steps": 100, + "max_steps": 50780, + "num_train_epochs": 1, + "save_steps": 10000, + "total_flos": 1.996418681856e+19, + "trial_name": null, + "trial_params": null +} diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/training_args.bin b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..100b6c56d3e6f8a91a89702971e1e66650fb8075 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d17a2c3c8a65881aa45bfd2997dfe652a61cb20d8b49594dc95bff2d7669f6f +size 4155 diff --git a/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/vocab.txt b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b946952cc35537226f07fd70957ee2f848880d2 --- /dev/null +++ b/esm2_t33_650M_qlora_binding_sites_2023-10-18_02-14-48/checkpoint-50000/vocab.txt @@ -0,0 +1,33 @@ + + + + +L +A +G +V +S +E +R +T +I +D +P +K +Q +N +F +Y +M +H +W +C +X +B +U +Z +O +. +- + + \ No newline at end of file