diff --git a/checkpoint-5700/adapter_model/README.md b/checkpoint-5700/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c --- /dev/null +++ b/checkpoint-5700/adapter_model/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-5700/adapter_model/adapter_config.json b/checkpoint-5700/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7052646debaf453de93d6176727714122c31b64a --- /dev/null +++ b/checkpoint-5700/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "gate_proj", + "up_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5700/adapter_model/adapter_model.bin b/checkpoint-5700/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..02526e93c7101a3c9d0727acfe024831696398f1 --- /dev/null +++ b/checkpoint-5700/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bdd5402d919c62a264000ec96b1dd621956e56ddd66679cf8a429f111552d95 +size 500897101 diff --git a/checkpoint-5800/README.md b/checkpoint-5800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c --- /dev/null +++ b/checkpoint-5800/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-5800/adapter_config.json b/checkpoint-5800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7052646debaf453de93d6176727714122c31b64a --- /dev/null +++ b/checkpoint-5800/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "gate_proj", + "up_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5800/adapter_model.bin b/checkpoint-5800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b25d3eaace758c6638b956d27554c51cfd758425 --- /dev/null +++ b/checkpoint-5800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c8948b163d7faf80bcb4ea3c791311da3d9b14d4257794aeb4a2b35935a025 +size 500897101 diff --git a/checkpoint-5800/adapter_model/README.md b/checkpoint-5800/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c --- /dev/null +++ b/checkpoint-5800/adapter_model/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-5800/adapter_model/adapter_config.json b/checkpoint-5800/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7052646debaf453de93d6176727714122c31b64a --- /dev/null +++ b/checkpoint-5800/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "gate_proj", + "up_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5800/adapter_model/adapter_model.bin b/checkpoint-5800/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b25d3eaace758c6638b956d27554c51cfd758425 --- /dev/null +++ b/checkpoint-5800/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c8948b163d7faf80bcb4ea3c791311da3d9b14d4257794aeb4a2b35935a025 +size 500897101 diff --git a/checkpoint-5800/optimizer.pt b/checkpoint-5800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ff99dd4e8de03fbb3706e831716b20cdc36b029 --- /dev/null +++ b/checkpoint-5800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdafcf17dcbaf493ce7420b5efc55cc37f121d6d18e479f66229dfb7d3ebfe9a +size 1001752701 diff --git a/checkpoint-5800/rng_state_0.pth b/checkpoint-5800/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..32576f7fea9a909e430c2379d78ec5bf846b1367 --- /dev/null +++ b/checkpoint-5800/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdcc97c3d5ae4ead7b5285c5c5b8dddcbec730d6aced698514214e40163f6c80 +size 27772 diff --git a/checkpoint-5800/rng_state_1.pth b/checkpoint-5800/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..4427827aec35702513d131a1ce0ecb968ec72c8f --- /dev/null +++ b/checkpoint-5800/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0dfb5773126adda2e927345797eb8babdfc8aa673a963e413f06bda803f6b8 +size 27772 diff --git a/checkpoint-5800/rng_state_10.pth b/checkpoint-5800/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..d7d02c96cf7f3da06779c0b9becde5f1e0927402 --- /dev/null +++ b/checkpoint-5800/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481ee2d6343e3ee249f8f13ba72e97689ec745d34ca3eb731c5977d455e68087 +size 27789 diff --git a/checkpoint-5800/rng_state_11.pth b/checkpoint-5800/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..02f266ae42f742e7af0f68aa48bb5e1d97f9854a --- /dev/null +++ b/checkpoint-5800/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56d4288a9e908f82added428e5a14ff5dd6e86473ebd372733e241f3c2a4e833 +size 27789 diff --git a/checkpoint-5800/rng_state_12.pth b/checkpoint-5800/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5bf2100846d316c907c004cdcf4f53dec78dbc4 --- /dev/null +++ b/checkpoint-5800/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5749fda2bb629d9c3e4a0bb894676f98047fc123c7ada64e2385c1692f194369 +size 27789 diff --git a/checkpoint-5800/rng_state_13.pth b/checkpoint-5800/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e4c8ec65ba23f1127297f14df1c0e5a815972bc --- /dev/null +++ b/checkpoint-5800/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac848792be0fda751cfdebaa1419f845e441f823e55d97dc332cb6ebecab888 +size 27789 diff --git a/checkpoint-5800/rng_state_2.pth b/checkpoint-5800/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..339b13aaf629629acd363207d06b803f3164cc48 --- /dev/null +++ b/checkpoint-5800/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004ca3f2557bec1000ed01ad0ac091380c145ff4d9054e495042d9673b164cd3 +size 27772 diff --git a/checkpoint-5800/rng_state_3.pth b/checkpoint-5800/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..fdc8655bc3b73b1d48fe9069eff355b73e1cd4de --- /dev/null +++ b/checkpoint-5800/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a6e03a2771bdfefb81f6b70a469b27bf46d9bac7c91cec25fbd8fc2e1bb9fd +size 27772 diff --git a/checkpoint-5800/rng_state_4.pth b/checkpoint-5800/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..a33983d78492d1d9d9a320f02bab9fd7437fd447 --- /dev/null +++ b/checkpoint-5800/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027cc230cd1577ece8a43abc1ed88fcc51ac874f84b8bcbe865b8338b17825dc +size 27772 diff --git a/checkpoint-5800/rng_state_5.pth b/checkpoint-5800/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c12ce4de318110b659e68db3eb100c4aa128f72 --- /dev/null +++ b/checkpoint-5800/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e15c6848b49b0375761f21215e2d56ea5a16e27885192cc4af7b1a3f9c325a2 +size 27772 diff --git a/checkpoint-5800/rng_state_6.pth b/checkpoint-5800/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fcdb815d79ea791d2753a0db4b146458c95e3d7 --- /dev/null +++ b/checkpoint-5800/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0290c211df0a25ca2b739876f096befd7a2f50010f23fcfb700c648fc7c37b88 +size 27772 diff --git a/checkpoint-5800/rng_state_7.pth b/checkpoint-5800/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..c97a750c2dea4a44ea85d28ef92f4d91c42180b9 --- /dev/null +++ b/checkpoint-5800/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7bdf51b5f2867802a2660bdbfa211be18d28a9f5e7325d4957f7ee65895a3a +size 27772 diff --git a/checkpoint-5800/rng_state_8.pth b/checkpoint-5800/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..eb02047c3a256a4e8dc65a9507f137abe27d4048 --- /dev/null +++ b/checkpoint-5800/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7043986c3cfa7014abbd2eb87d4f8ccda04f6a243deda922c9522e5c66f2fd +size 27772 diff --git a/checkpoint-5800/rng_state_9.pth b/checkpoint-5800/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..45ac65bb884854298d454760ce2735e8ecec7624 --- /dev/null +++ b/checkpoint-5800/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3371033bac6e8e8c6fb2001ca45e33bd941b91fd370cce2d2edc6bac7df0e55 +size 27772 diff --git a/checkpoint-5800/scheduler.pt b/checkpoint-5800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f744f85c5c20ec4e390e284fde44a23de16db37c --- /dev/null +++ b/checkpoint-5800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e2a15463250851f8f0353fd93191418e2b4e589eb68be6185d12012957a33f +size 627 diff --git a/checkpoint-5800/trainer_state.json b/checkpoint-5800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..253a82686a6ec59beff239685621e7860be41e45 --- /dev/null +++ b/checkpoint-5800/trainer_state.json @@ -0,0 +1,1424 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2489336952307095, + "global_step": 5800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "learning_rate": 0.0001999867761371633, + "loss": 1.0435, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019993306018843102, + "loss": 0.8918, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019983804784290833, + "loss": 0.8874, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019970177836355307, + "loss": 0.8839, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019961818913082012, + "loss": 0.8801, + "step": 225 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019952430806244534, + "loss": 0.8753, + "step": 250 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019942014485754635, + "loss": 0.8754, + "step": 275 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019930571027751713, + "loss": 0.8751, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001991810161449164, + "loss": 0.8819, + "step": 325 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019904607534224612, + "loss": 0.8744, + "step": 350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019890090181062063, + "loss": 0.8735, + "step": 375 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019874551054832625, + "loss": 0.8703, + "step": 400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019857991760927193, + "loss": 0.8715, + "step": 425 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019840414010133045, + "loss": 0.8714, + "step": 450 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019821819618457114, + "loss": 0.8653, + "step": 475 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001980221050693837, + "loss": 0.8716, + "step": 500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019781588701449338, + "loss": 0.8695, + "step": 525 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001975995633248682, + "loss": 0.8746, + "step": 550 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019737315634951762, + "loss": 0.8731, + "step": 575 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019713668947918386, + "loss": 0.867, + "step": 600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001968901871439252, + "loss": 0.8706, + "step": 625 + }, + { + "epoch": 0.25, + "learning_rate": 0.000196633674810592, + "loss": 0.8595, + "step": 650 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001963671789801958, + "loss": 0.8627, + "step": 675 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001960907271851712, + "loss": 0.8607, + "step": 700 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019580434798653173, + "loss": 0.858, + "step": 725 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019550807097091876, + "loss": 0.8589, + "step": 750 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019520192674754515, + "loss": 0.8561, + "step": 775 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019488594694503264, + "loss": 0.8576, + "step": 800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019456016420814446, + "loss": 0.8597, + "step": 825 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019422461219441254, + "loss": 0.862, + "step": 850 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019387932557066035, + "loss": 0.8577, + "step": 875 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019352434000942127, + "loss": 0.8632, + "step": 900 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019315969218525333, + "loss": 0.8567, + "step": 925 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019278541977095005, + "loss": 0.8501, + "step": 950 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019240156143364844, + "loss": 0.8596, + "step": 975 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019200815683083434, + "loss": 0.8556, + "step": 1000 + }, + { + "epoch": 0.39, + "eval_loss": 0.8521950244903564, + "eval_runtime": 59.8838, + "eval_samples_per_second": 12.19, + "eval_steps_per_second": 0.885, + "step": 1000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019160524660624505, + "loss": 0.8531, + "step": 1025 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019119287238567045, + "loss": 0.8513, + "step": 1050 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019077107677265253, + "loss": 0.8502, + "step": 1075 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019033990334408384, + "loss": 0.8469, + "step": 1100 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018989939664570545, + "loss": 0.8495, + "step": 1125 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018944960218750484, + "loss": 0.8485, + "step": 1150 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018899056643901404, + "loss": 0.8534, + "step": 1175 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018852233682450893, + "loss": 0.8531, + "step": 1200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018804496171810948, + "loss": 0.8509, + "step": 1225 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018755849043878222, + "loss": 0.8445, + "step": 1250 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001870629732452449, + "loss": 0.8548, + "step": 1275 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018655846133077417, + "loss": 0.8441, + "step": 1300 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018604500681791656, + "loss": 0.8533, + "step": 1325 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018552266275310373, + "loss": 0.8505, + "step": 1350 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001849914831011719, + "loss": 0.8544, + "step": 1375 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018445152273978668, + "loss": 0.845, + "step": 1400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018390283745377354, + "loss": 0.8376, + "step": 1425 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001833454839293545, + "loss": 0.847, + "step": 1450 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018277951974829163, + "loss": 0.8473, + "step": 1475 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001822050033819382, + "loss": 0.8438, + "step": 1500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018162199418519785, + "loss": 0.8418, + "step": 1525 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018103055239039243, + "loss": 0.842, + "step": 1550 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001804307391010393, + "loss": 0.8435, + "step": 1575 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017982261628553842, + "loss": 0.8349, + "step": 1600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001792062467707703, + "loss": 0.8483, + "step": 1625 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001785816942356052, + "loss": 0.8387, + "step": 1650 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017794902320432429, + "loss": 0.843, + "step": 1675 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017730829903995333, + "loss": 0.8424, + "step": 1700 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017665958793751006, + "loss": 0.8418, + "step": 1725 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017600295691716522, + "loss": 0.8384, + "step": 1750 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017533847381731856, + "loss": 0.8445, + "step": 1775 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017466620728759033, + "loss": 0.8446, + "step": 1800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017398622678172878, + "loss": 0.838, + "step": 1825 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001732986025504348, + "loss": 0.8415, + "step": 1850 + }, + { + "epoch": 0.73, + "learning_rate": 0.000172603405634104, + "loss": 0.8357, + "step": 1875 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017190070785548755, + "loss": 0.8311, + "step": 1900 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001711905818122717, + "loss": 0.8333, + "step": 1925 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001704731008695777, + "loss": 0.8387, + "step": 1950 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001697483391523821, + "loss": 0.8442, + "step": 1975 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016901637153785885, + "loss": 0.8399, + "step": 2000 + }, + { + "epoch": 0.78, + "eval_loss": 0.8339959383010864, + "eval_runtime": 58.5829, + "eval_samples_per_second": 12.461, + "eval_steps_per_second": 0.905, + "step": 2000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001682772736476434, + "loss": 0.8334, + "step": 2025 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001675311218400201, + "loss": 0.835, + "step": 2050 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016677799320203332, + "loss": 0.8368, + "step": 2075 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016601796554152344, + "loss": 0.8278, + "step": 2100 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016525111737908827, + "loss": 0.8334, + "step": 2125 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016447752793997096, + "loss": 0.8416, + "step": 2150 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016369727714587483, + "loss": 0.8297, + "step": 2175 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001629104456067066, + "loss": 0.8327, + "step": 2200 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016211711461224825, + "loss": 0.8324, + "step": 2225 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001613173661237589, + "loss": 0.8313, + "step": 2250 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001605112827655069, + "loss": 0.8292, + "step": 2275 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001596989478162339, + "loss": 0.8334, + "step": 2300 + }, + { + "epoch": 0.9, + "learning_rate": 0.00015888044520055106, + "loss": 0.8352, + "step": 2325 + }, + { + "epoch": 0.91, + "learning_rate": 0.00015805585948026852, + "loss": 0.823, + "step": 2350 + }, + { + "epoch": 0.92, + "learning_rate": 0.000157225275845659, + "loss": 0.8293, + "step": 2375 + }, + { + "epoch": 0.93, + "learning_rate": 0.00015638878010665672, + "loss": 0.8289, + "step": 2400 + }, + { + "epoch": 0.94, + "learning_rate": 0.00015554645868399205, + "loss": 0.832, + "step": 2425 + }, + { + "epoch": 0.95, + "learning_rate": 0.00015469839860026308, + "loss": 0.8294, + "step": 2450 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001538446874709452, + "loss": 0.8281, + "step": 2475 + }, + { + "epoch": 0.97, + "learning_rate": 0.00015298541349533925, + "loss": 0.8314, + "step": 2500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00015212066544745926, + "loss": 0.831, + "step": 2525 + }, + { + "epoch": 0.99, + "learning_rate": 0.00015125053266686124, + "loss": 0.8319, + "step": 2550 + }, + { + "epoch": 1.0, + "learning_rate": 0.00015037510504941303, + "loss": 0.8259, + "step": 2575 + }, + { + "epoch": 1.01, + "learning_rate": 0.00014949447303800695, + "loss": 0.8133, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 0.00014860872761321593, + "loss": 0.8139, + "step": 2625 + }, + { + "epoch": 1.03, + "learning_rate": 0.00014771796028389405, + "loss": 0.804, + "step": 2650 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001468222630777225, + "loss": 0.8011, + "step": 2675 + }, + { + "epoch": 1.05, + "learning_rate": 0.00014592172853170193, + "loss": 0.8037, + "step": 2700 + }, + { + "epoch": 1.06, + "learning_rate": 0.00014501644968259212, + "loss": 0.8063, + "step": 2725 + }, + { + "epoch": 1.07, + "learning_rate": 0.00014410652005730025, + "loss": 0.8155, + "step": 2750 + }, + { + "epoch": 1.08, + "learning_rate": 0.00014319203366321826, + "loss": 0.8066, + "step": 2775 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001422730849785107, + "loss": 0.8091, + "step": 2800 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001413497689423539, + "loss": 0.8067, + "step": 2825 + }, + { + "epoch": 1.11, + "learning_rate": 0.00014042218094512755, + "loss": 0.8046, + "step": 2850 + }, + { + "epoch": 1.11, + "learning_rate": 0.00013949041681855985, + "loss": 0.8053, + "step": 2875 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001385545728258264, + "loss": 0.8075, + "step": 2900 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001376147456516055, + "loss": 0.8015, + "step": 2925 + }, + { + "epoch": 1.14, + "learning_rate": 0.00013667103239208903, + "loss": 0.8016, + "step": 2950 + }, + { + "epoch": 1.15, + "learning_rate": 0.00013572353054495126, + "loss": 0.8029, + "step": 2975 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001347723379992762, + "loss": 0.8017, + "step": 3000 + }, + { + "epoch": 1.16, + "eval_loss": 0.8229297995567322, + "eval_runtime": 59.3398, + "eval_samples_per_second": 12.302, + "eval_steps_per_second": 0.893, + "step": 3000 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001338175530254443, + "loss": 0.8049, + "step": 3025 + }, + { + "epoch": 1.18, + "learning_rate": 0.00013285927426497985, + "loss": 0.8027, + "step": 3050 + }, + { + "epoch": 1.19, + "learning_rate": 0.00013189760072036008, + "loss": 0.8028, + "step": 3075 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001309326317447869, + "loss": 0.8021, + "step": 3100 + }, + { + "epoch": 1.21, + "learning_rate": 0.00012996446703192257, + "loss": 0.8033, + "step": 3125 + }, + { + "epoch": 1.22, + "learning_rate": 0.00012899320660558986, + "loss": 0.8016, + "step": 3150 + }, + { + "epoch": 1.23, + "learning_rate": 0.00012801895080943846, + "loss": 0.7995, + "step": 3175 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001270418002965782, + "loss": 0.799, + "step": 3200 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001260618560191802, + "loss": 0.8002, + "step": 3225 + }, + { + "epoch": 1.26, + "learning_rate": 0.00012507921921804717, + "loss": 0.8068, + "step": 3250 + }, + { + "epoch": 1.27, + "learning_rate": 0.00012409399141215423, + "loss": 0.8041, + "step": 3275 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001231062743881603, + "loss": 0.7999, + "step": 3300 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001221161701898926, + "loss": 0.7995, + "step": 3325 + }, + { + "epoch": 1.3, + "learning_rate": 0.00012112378110780391, + "loss": 0.7959, + "step": 3350 + }, + { + "epoch": 1.31, + "learning_rate": 0.00012012920966840486, + "loss": 0.7999, + "step": 3375 + }, + { + "epoch": 1.32, + "learning_rate": 0.00011913255862367151, + "loss": 0.8016, + "step": 3400 + }, + { + "epoch": 1.33, + "learning_rate": 0.00011813393094042993, + "loss": 0.7944, + "step": 3425 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001171334297897181, + "loss": 0.8026, + "step": 3450 + }, + { + "epoch": 1.35, + "learning_rate": 0.00011613115853612734, + "loss": 0.8004, + "step": 3475 + }, + { + "epoch": 1.36, + "learning_rate": 0.00011512722072712321, + "loss": 0.7992, + "step": 3500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00011412172008234785, + "loss": 0.8004, + "step": 3525 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001131147604829043, + "loss": 0.8009, + "step": 3550 + }, + { + "epoch": 1.39, + "learning_rate": 0.00011210644596062439, + "loss": 0.7993, + "step": 3575 + }, + { + "epoch": 1.4, + "learning_rate": 0.00011109688068732081, + "loss": 0.7965, + "step": 3600 + }, + { + "epoch": 1.41, + "learning_rate": 0.00011008616896402482, + "loss": 0.7991, + "step": 3625 + }, + { + "epoch": 1.42, + "learning_rate": 0.00010907441521021072, + "loss": 0.8026, + "step": 3650 + }, + { + "epoch": 1.42, + "learning_rate": 0.00010806172395300789, + "loss": 0.7941, + "step": 3675 + }, + { + "epoch": 1.43, + "learning_rate": 0.00010704819981640186, + "loss": 0.7989, + "step": 3700 + }, + { + "epoch": 1.44, + "learning_rate": 0.00010603394751042522, + "loss": 0.7981, + "step": 3725 + }, + { + "epoch": 1.45, + "learning_rate": 0.00010501907182033979, + "loss": 0.7985, + "step": 3750 + }, + { + "epoch": 1.46, + "learning_rate": 0.000104003677595811, + "loss": 0.7921, + "step": 3775 + }, + { + "epoch": 1.47, + "learning_rate": 0.00010298786974007555, + "loss": 0.8012, + "step": 3800 + }, + { + "epoch": 1.48, + "learning_rate": 0.00010197175319910343, + "loss": 0.7906, + "step": 3825 + }, + { + "epoch": 1.49, + "learning_rate": 0.00010095543295075593, + "loss": 0.7928, + "step": 3850 + }, + { + "epoch": 1.5, + "learning_rate": 9.993901399393979e-05, + "loss": 0.8018, + "step": 3875 + }, + { + "epoch": 1.51, + "learning_rate": 9.892260133775968e-05, + "loss": 0.7991, + "step": 3900 + }, + { + "epoch": 1.52, + "learning_rate": 9.79062999906693e-05, + "loss": 0.795, + "step": 3925 + }, + { + "epoch": 1.53, + "learning_rate": 9.68902149496227e-05, + "loss": 0.7977, + "step": 3950 + }, + { + "epoch": 1.54, + "learning_rate": 9.587445118922674e-05, + "loss": 0.8013, + "step": 3975 + }, + { + "epoch": 1.55, + "learning_rate": 9.485911365089589e-05, + "loss": 0.7978, + "step": 4000 + }, + { + "epoch": 1.55, + "eval_loss": 0.8142631649971008, + "eval_runtime": 59.4108, + "eval_samples_per_second": 12.287, + "eval_steps_per_second": 0.892, + "step": 4000 + }, + { + "epoch": 1.56, + "learning_rate": 9.384430723201036e-05, + "loss": 0.7912, + "step": 4025 + }, + { + "epoch": 1.57, + "learning_rate": 9.283013677507902e-05, + "loss": 0.7919, + "step": 4050 + }, + { + "epoch": 1.58, + "learning_rate": 9.181670705690761e-05, + "loss": 0.7919, + "step": 4075 + }, + { + "epoch": 1.59, + "learning_rate": 9.080412277777413e-05, + "loss": 0.8018, + "step": 4100 + }, + { + "epoch": 1.6, + "learning_rate": 8.979248855061188e-05, + "loss": 0.7811, + "step": 4125 + }, + { + "epoch": 1.61, + "learning_rate": 8.878190889020159e-05, + "loss": 0.7919, + "step": 4150 + }, + { + "epoch": 1.62, + "learning_rate": 8.777248820237376e-05, + "loss": 0.7994, + "step": 4175 + }, + { + "epoch": 1.63, + "learning_rate": 8.676433077322215e-05, + "loss": 0.7956, + "step": 4200 + }, + { + "epoch": 1.64, + "learning_rate": 8.575754075832973e-05, + "loss": 0.7968, + "step": 4225 + }, + { + "epoch": 1.65, + "learning_rate": 8.475222217200801e-05, + "loss": 0.7905, + "step": 4250 + }, + { + "epoch": 1.66, + "learning_rate": 8.374847887655112e-05, + "loss": 0.7889, + "step": 4275 + }, + { + "epoch": 1.67, + "learning_rate": 8.274641457150543e-05, + "loss": 0.7988, + "step": 4300 + }, + { + "epoch": 1.68, + "learning_rate": 8.174613278295608e-05, + "loss": 0.7947, + "step": 4325 + }, + { + "epoch": 1.69, + "learning_rate": 8.074773685283137e-05, + "loss": 0.7929, + "step": 4350 + }, + { + "epoch": 1.7, + "learning_rate": 7.97513299282264e-05, + "loss": 0.7949, + "step": 4375 + }, + { + "epoch": 1.71, + "learning_rate": 7.875701495074638e-05, + "loss": 0.7925, + "step": 4400 + }, + { + "epoch": 1.72, + "learning_rate": 7.776489464587158e-05, + "loss": 0.7917, + "step": 4425 + }, + { + "epoch": 1.73, + "learning_rate": 7.677507151234448e-05, + "loss": 0.7905, + "step": 4450 + }, + { + "epoch": 1.74, + "learning_rate": 7.578764781158034e-05, + "loss": 0.7912, + "step": 4475 + }, + { + "epoch": 1.74, + "learning_rate": 7.480272555710227e-05, + "loss": 0.8006, + "step": 4500 + }, + { + "epoch": 1.75, + "learning_rate": 7.382040650400185e-05, + "loss": 0.7937, + "step": 4525 + }, + { + "epoch": 1.76, + "learning_rate": 7.28407921384267e-05, + "loss": 0.794, + "step": 4550 + }, + { + "epoch": 1.77, + "learning_rate": 7.186398366709545e-05, + "loss": 0.7931, + "step": 4575 + }, + { + "epoch": 1.78, + "learning_rate": 7.089008200684197e-05, + "loss": 0.7982, + "step": 4600 + }, + { + "epoch": 1.79, + "learning_rate": 6.991918777418928e-05, + "loss": 0.7916, + "step": 4625 + }, + { + "epoch": 1.8, + "learning_rate": 6.895140127495455e-05, + "loss": 0.7919, + "step": 4650 + }, + { + "epoch": 1.81, + "learning_rate": 6.798682249388631e-05, + "loss": 0.7863, + "step": 4675 + }, + { + "epoch": 1.82, + "learning_rate": 6.702555108433461e-05, + "loss": 0.789, + "step": 4700 + }, + { + "epoch": 1.83, + "learning_rate": 6.606768635795574e-05, + "loss": 0.7902, + "step": 4725 + }, + { + "epoch": 1.84, + "learning_rate": 6.511332727445191e-05, + "loss": 0.7924, + "step": 4750 + }, + { + "epoch": 1.85, + "learning_rate": 6.416257243134747e-05, + "loss": 0.7957, + "step": 4775 + }, + { + "epoch": 1.86, + "learning_rate": 6.321552005380256e-05, + "loss": 0.7916, + "step": 4800 + }, + { + "epoch": 1.87, + "learning_rate": 6.22722679844652e-05, + "loss": 0.7867, + "step": 4825 + }, + { + "epoch": 1.88, + "learning_rate": 6.133291367336284e-05, + "loss": 0.7944, + "step": 4850 + }, + { + "epoch": 1.89, + "learning_rate": 6.039755416783457e-05, + "loss": 0.7982, + "step": 4875 + }, + { + "epoch": 1.9, + "learning_rate": 5.946628610250484e-05, + "loss": 0.7918, + "step": 4900 + }, + { + "epoch": 1.91, + "learning_rate": 5.853920568929996e-05, + "loss": 0.7921, + "step": 4925 + }, + { + "epoch": 1.92, + "learning_rate": 5.761640870750799e-05, + "loss": 0.7878, + "step": 4950 + }, + { + "epoch": 1.93, + "learning_rate": 5.669799049388375e-05, + "loss": 0.7901, + "step": 4975 + }, + { + "epoch": 1.94, + "learning_rate": 5.578404593279911e-05, + "loss": 0.7858, + "step": 5000 + }, + { + "epoch": 1.94, + "eval_loss": 0.807844877243042, + "eval_runtime": 59.586, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.889, + "step": 5000 + }, + { + "epoch": 1.95, + "learning_rate": 5.487466944644033e-05, + "loss": 0.7902, + "step": 5025 + }, + { + "epoch": 1.96, + "learning_rate": 5.3969954985052996e-05, + "loss": 0.7979, + "step": 5050 + }, + { + "epoch": 1.97, + "learning_rate": 5.306999601723579e-05, + "loss": 0.7931, + "step": 5075 + }, + { + "epoch": 1.98, + "learning_rate": 5.21748855202839e-05, + "loss": 0.7868, + "step": 5100 + }, + { + "epoch": 1.99, + "learning_rate": 5.128471597058342e-05, + "loss": 0.7993, + "step": 5125 + }, + { + "epoch": 2.0, + "learning_rate": 5.03995793340572e-05, + "loss": 0.7892, + "step": 5150 + }, + { + "epoch": 2.01, + "learning_rate": 4.9519567056663694e-05, + "loss": 0.7788, + "step": 5175 + }, + { + "epoch": 2.02, + "learning_rate": 4.864477005494938e-05, + "loss": 0.7654, + "step": 5200 + }, + { + "epoch": 2.03, + "learning_rate": 4.777527870665592e-05, + "loss": 0.7468, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 4.691118284138296e-05, + "loss": 0.7359, + "step": 5250 + }, + { + "epoch": 2.05, + "learning_rate": 4.605257173130763e-05, + "loss": 0.7422, + "step": 5275 + }, + { + "epoch": 2.06, + "learning_rate": 4.519953408196152e-05, + "loss": 0.7424, + "step": 5300 + }, + { + "epoch": 2.06, + "learning_rate": 4.435215802306635e-05, + "loss": 0.7521, + "step": 5325 + }, + { + "epoch": 2.07, + "learning_rate": 4.351053109942894e-05, + "loss": 0.7477, + "step": 5350 + }, + { + "epoch": 2.08, + "learning_rate": 4.2674740261896776e-05, + "loss": 0.7456, + "step": 5375 + }, + { + "epoch": 2.09, + "learning_rate": 4.1844871858374844e-05, + "loss": 0.766, + "step": 5400 + }, + { + "epoch": 2.1, + "learning_rate": 4.1021011624904814e-05, + "loss": 0.7664, + "step": 5425 + }, + { + "epoch": 2.11, + "learning_rate": 4.0203244676807353e-05, + "loss": 0.7703, + "step": 5450 + }, + { + "epoch": 2.12, + "learning_rate": 3.939165549988873e-05, + "loss": 0.7674, + "step": 5475 + }, + { + "epoch": 2.13, + "learning_rate": 3.858632794171222e-05, + "loss": 0.7722, + "step": 5500 + }, + { + "epoch": 2.14, + "learning_rate": 3.778734520293562e-05, + "loss": 0.7716, + "step": 5525 + }, + { + "epoch": 2.15, + "learning_rate": 3.699478982871561e-05, + "loss": 0.7795, + "step": 5550 + }, + { + "epoch": 2.16, + "learning_rate": 3.62087437001797e-05, + "loss": 0.7728, + "step": 5575 + }, + { + "epoch": 2.17, + "learning_rate": 3.5429288025966944e-05, + "loss": 0.7709, + "step": 5600 + }, + { + "epoch": 2.18, + "learning_rate": 3.4656503333837956e-05, + "loss": 0.7682, + "step": 5625 + }, + { + "epoch": 2.19, + "learning_rate": 3.389046946235542e-05, + "loss": 0.7734, + "step": 5650 + }, + { + "epoch": 2.2, + "learning_rate": 3.313126555263576e-05, + "loss": 0.7716, + "step": 5675 + }, + { + "epoch": 2.21, + "learning_rate": 3.237897004017276e-05, + "loss": 0.7716, + "step": 5700 + }, + { + "epoch": 2.22, + "learning_rate": 3.163366064673427e-05, + "loss": 0.7721, + "step": 5725 + }, + { + "epoch": 2.23, + "learning_rate": 3.089541437233252e-05, + "loss": 0.7658, + "step": 5750 + }, + { + "epoch": 2.24, + "learning_rate": 3.0164307487268996e-05, + "loss": 0.7716, + "step": 5775 + }, + { + "epoch": 2.25, + "learning_rate": 2.944041552425475e-05, + "loss": 0.7687, + "step": 5800 + } + ], + "max_steps": 7737, + "num_train_epochs": 3, + "total_flos": 2.4980935510562177e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5800/training_args.bin b/checkpoint-5800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c456116f688fe6cb6deecb1e0a1cf8d153d349fb --- /dev/null +++ b/checkpoint-5800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5d13b3f1b9942f80afde79010ef0947feee3df761d245fef1699bc397648b2 +size 4027 diff --git a/checkpoint-5900/README.md b/checkpoint-5900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c --- /dev/null +++ b/checkpoint-5900/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-5900/adapter_config.json b/checkpoint-5900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7052646debaf453de93d6176727714122c31b64a --- /dev/null +++ b/checkpoint-5900/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "gate_proj", + "up_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5900/adapter_model.bin b/checkpoint-5900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a5a6e355eff258e831ac8972e31e0b7b97f5a0a4 --- /dev/null +++ b/checkpoint-5900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b3da1c54329907c1913ad730b0e8c5d24d4c2d49e4f95aa7976664afc0a98c +size 500897101 diff --git a/checkpoint-5900/adapter_model/README.md b/checkpoint-5900/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c --- /dev/null +++ b/checkpoint-5900/adapter_model/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-5900/adapter_model/adapter_config.json b/checkpoint-5900/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7052646debaf453de93d6176727714122c31b64a --- /dev/null +++ b/checkpoint-5900/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "gate_proj", + "up_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5900/adapter_model/adapter_model.bin b/checkpoint-5900/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a5a6e355eff258e831ac8972e31e0b7b97f5a0a4 --- /dev/null +++ b/checkpoint-5900/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b3da1c54329907c1913ad730b0e8c5d24d4c2d49e4f95aa7976664afc0a98c +size 500897101 diff --git a/checkpoint-5900/optimizer.pt b/checkpoint-5900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72266aa65efccd48202e3fb60bdb4aeb59f6ba26 --- /dev/null +++ b/checkpoint-5900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1197bafa9ae84ea7a453b741f03a67406157df8c322c0f2e87f2e9c99c7e6415 +size 1001752701 diff --git a/checkpoint-5900/rng_state_0.pth b/checkpoint-5900/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..18663756587b8bd910f595fe23fb24123cbf1bed --- /dev/null +++ b/checkpoint-5900/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b970d7fefc137c6fa7c1f3ec5751405f46258d274a85e6d4372f5c1b430100 +size 27772 diff --git a/checkpoint-5900/rng_state_1.pth b/checkpoint-5900/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6dead9aa0a1048087f6cdc6949859fc9a9bd458 --- /dev/null +++ b/checkpoint-5900/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c346a11c7d7489d10f88696f09c70a4abe9b4d22e70d081daf287152ba6758 +size 27772 diff --git a/checkpoint-5900/rng_state_10.pth b/checkpoint-5900/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e1f2dffc2f5d8d9447fbb85330e4bfd2af94428 --- /dev/null +++ b/checkpoint-5900/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cc2f86fa3f892a60fcd12ec1c513fe4892758b164d8b6dcaf3ffad379de4bf +size 27789 diff --git a/checkpoint-5900/rng_state_11.pth b/checkpoint-5900/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0a94b7884f61885024b4ce786c9cd49560ff4a8 --- /dev/null +++ b/checkpoint-5900/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e9eaa5d2c6ecd694c6005f784826c1ef125cd869dc4ae397dc6622d332750e +size 27789 diff --git a/checkpoint-5900/rng_state_12.pth b/checkpoint-5900/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..46ee92749473d23751444f701c8fdd98c7b6b03a --- /dev/null +++ b/checkpoint-5900/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38acd1e9cddbb15c52d3524a7f3e3b0813e9d4cd4aa950fe53314f0b60178040 +size 27789 diff --git a/checkpoint-5900/rng_state_13.pth b/checkpoint-5900/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..ec9105e08842cbba5b65f8457b1b03541bb29903 --- /dev/null +++ b/checkpoint-5900/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c474d8b008c60069849057cd5597c90d8f9e8db24500adaab180041a48449f +size 27789 diff --git a/checkpoint-5900/rng_state_2.pth b/checkpoint-5900/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a9b5e7010270c29b101f59decb40b4f8e99955a --- /dev/null +++ b/checkpoint-5900/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70d819dbeb60759d92edd51269d8cee2700d47a9ed7c6a47d97f93849e55bb0 +size 27772 diff --git a/checkpoint-5900/rng_state_3.pth b/checkpoint-5900/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..766c27d9f7bfd190376f77d0549e323ac4dafb34 --- /dev/null +++ b/checkpoint-5900/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d28ba4337f3d0ed31c728bf8fbbd7c00c8014b3f57ffbf29863e421ce59080c +size 27772 diff --git a/checkpoint-5900/rng_state_4.pth b/checkpoint-5900/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..58d9ca8b3ee6e9a5b21de233887d2752e6347a7d --- /dev/null +++ b/checkpoint-5900/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00c43f507f234617c2e9bcf5dd72636cec19310803bce07ad6514e0e90814858 +size 27772 diff --git a/checkpoint-5900/rng_state_5.pth b/checkpoint-5900/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..091228cf96b09b25b5d2a6eb2e968700c19f61b8 --- /dev/null +++ b/checkpoint-5900/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29979a24bd6bef289a10c8e120d56f27b5f058b309d98caf16f123a7a861dce9 +size 27772 diff --git a/checkpoint-5900/rng_state_6.pth b/checkpoint-5900/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f7862d3d3ffda1d283d94272f9bd3dc88e7c55c --- /dev/null +++ b/checkpoint-5900/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78aa8d7c2497285c4d5739872ca5ceac97b5b636a89d97a0185297b5bbd049d6 +size 27772 diff --git a/checkpoint-5900/rng_state_7.pth b/checkpoint-5900/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf5c1d123706c7d2647e69f7e1af9b77b9fb1599 --- /dev/null +++ b/checkpoint-5900/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a7b8a1abaea1776e2ddd83a1b3d050ed23897ede63223f377eb777702f7a0f +size 27772 diff --git a/checkpoint-5900/rng_state_8.pth b/checkpoint-5900/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..f202d39b150ac8803b4f06ea54b6b25b9cb7a07e --- /dev/null +++ b/checkpoint-5900/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acca6763654c1dff02cf8af98fff17739233e457ad75b6ff4297b68efc173984 +size 27772 diff --git a/checkpoint-5900/rng_state_9.pth b/checkpoint-5900/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..e5b73e77dc5a4623884fa5dc6cc8865042a65e03 --- /dev/null +++ b/checkpoint-5900/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71a0b91f398dff81646dcd42cd8df3dfca54a277c75bab4cc51332fdf4a4768 +size 27772 diff --git a/checkpoint-5900/scheduler.pt b/checkpoint-5900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6398293689882cc1555627d627d6ecde7fca623c --- /dev/null +++ b/checkpoint-5900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f769241bb104f866dbb417f19005672d382c2f1d72ce0cc8284c37b0a8dd3e0e +size 627 diff --git a/checkpoint-5900/trainer_state.json b/checkpoint-5900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f25e9351d5d160b59e18ca4b6901b53e3e6a665f --- /dev/null +++ b/checkpoint-5900/trainer_state.json @@ -0,0 +1,1448 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2877084141139976, + "global_step": 5900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "learning_rate": 0.0001999867761371633, + "loss": 1.0435, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019993306018843102, + "loss": 0.8918, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019983804784290833, + "loss": 0.8874, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019970177836355307, + "loss": 0.8839, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019961818913082012, + "loss": 0.8801, + "step": 225 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019952430806244534, + "loss": 0.8753, + "step": 250 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019942014485754635, + "loss": 0.8754, + "step": 275 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019930571027751713, + "loss": 0.8751, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001991810161449164, + "loss": 0.8819, + "step": 325 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019904607534224612, + "loss": 0.8744, + "step": 350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019890090181062063, + "loss": 0.8735, + "step": 375 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019874551054832625, + "loss": 0.8703, + "step": 400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019857991760927193, + "loss": 0.8715, + "step": 425 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019840414010133045, + "loss": 0.8714, + "step": 450 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019821819618457114, + "loss": 0.8653, + "step": 475 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001980221050693837, + "loss": 0.8716, + "step": 500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019781588701449338, + "loss": 0.8695, + "step": 525 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001975995633248682, + "loss": 0.8746, + "step": 550 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019737315634951762, + "loss": 0.8731, + "step": 575 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019713668947918386, + "loss": 0.867, + "step": 600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001968901871439252, + "loss": 0.8706, + "step": 625 + }, + { + "epoch": 0.25, + "learning_rate": 0.000196633674810592, + "loss": 0.8595, + "step": 650 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001963671789801958, + "loss": 0.8627, + "step": 675 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001960907271851712, + "loss": 0.8607, + "step": 700 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019580434798653173, + "loss": 0.858, + "step": 725 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019550807097091876, + "loss": 0.8589, + "step": 750 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019520192674754515, + "loss": 0.8561, + "step": 775 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019488594694503264, + "loss": 0.8576, + "step": 800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019456016420814446, + "loss": 0.8597, + "step": 825 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019422461219441254, + "loss": 0.862, + "step": 850 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019387932557066035, + "loss": 0.8577, + "step": 875 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019352434000942127, + "loss": 0.8632, + "step": 900 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019315969218525333, + "loss": 0.8567, + "step": 925 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019278541977095005, + "loss": 0.8501, + "step": 950 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019240156143364844, + "loss": 0.8596, + "step": 975 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019200815683083434, + "loss": 0.8556, + "step": 1000 + }, + { + "epoch": 0.39, + "eval_loss": 0.8521950244903564, + "eval_runtime": 59.8838, + "eval_samples_per_second": 12.19, + "eval_steps_per_second": 0.885, + "step": 1000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019160524660624505, + "loss": 0.8531, + "step": 1025 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019119287238567045, + "loss": 0.8513, + "step": 1050 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019077107677265253, + "loss": 0.8502, + "step": 1075 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019033990334408384, + "loss": 0.8469, + "step": 1100 + }, + { + "epoch": 0.44, + "learning_rate": 0.00018989939664570545, + "loss": 0.8495, + "step": 1125 + }, + { + "epoch": 0.45, + "learning_rate": 0.00018944960218750484, + "loss": 0.8485, + "step": 1150 + }, + { + "epoch": 0.46, + "learning_rate": 0.00018899056643901404, + "loss": 0.8534, + "step": 1175 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018852233682450893, + "loss": 0.8531, + "step": 1200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00018804496171810948, + "loss": 0.8509, + "step": 1225 + }, + { + "epoch": 0.48, + "learning_rate": 0.00018755849043878222, + "loss": 0.8445, + "step": 1250 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001870629732452449, + "loss": 0.8548, + "step": 1275 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018655846133077417, + "loss": 0.8441, + "step": 1300 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018604500681791656, + "loss": 0.8533, + "step": 1325 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018552266275310373, + "loss": 0.8505, + "step": 1350 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001849914831011719, + "loss": 0.8544, + "step": 1375 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018445152273978668, + "loss": 0.845, + "step": 1400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018390283745377354, + "loss": 0.8376, + "step": 1425 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001833454839293545, + "loss": 0.847, + "step": 1450 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018277951974829163, + "loss": 0.8473, + "step": 1475 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001822050033819382, + "loss": 0.8438, + "step": 1500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018162199418519785, + "loss": 0.8418, + "step": 1525 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018103055239039243, + "loss": 0.842, + "step": 1550 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001804307391010393, + "loss": 0.8435, + "step": 1575 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017982261628553842, + "loss": 0.8349, + "step": 1600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001792062467707703, + "loss": 0.8483, + "step": 1625 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001785816942356052, + "loss": 0.8387, + "step": 1650 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017794902320432429, + "loss": 0.843, + "step": 1675 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017730829903995333, + "loss": 0.8424, + "step": 1700 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017665958793751006, + "loss": 0.8418, + "step": 1725 + }, + { + "epoch": 0.68, + "learning_rate": 0.00017600295691716522, + "loss": 0.8384, + "step": 1750 + }, + { + "epoch": 0.69, + "learning_rate": 0.00017533847381731856, + "loss": 0.8445, + "step": 1775 + }, + { + "epoch": 0.7, + "learning_rate": 0.00017466620728759033, + "loss": 0.8446, + "step": 1800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00017398622678172878, + "loss": 0.838, + "step": 1825 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001732986025504348, + "loss": 0.8415, + "step": 1850 + }, + { + "epoch": 0.73, + "learning_rate": 0.000172603405634104, + "loss": 0.8357, + "step": 1875 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017190070785548755, + "loss": 0.8311, + "step": 1900 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001711905818122717, + "loss": 0.8333, + "step": 1925 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001704731008695777, + "loss": 0.8387, + "step": 1950 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001697483391523821, + "loss": 0.8442, + "step": 1975 + }, + { + "epoch": 0.78, + "learning_rate": 0.00016901637153785885, + "loss": 0.8399, + "step": 2000 + }, + { + "epoch": 0.78, + "eval_loss": 0.8339959383010864, + "eval_runtime": 58.5829, + "eval_samples_per_second": 12.461, + "eval_steps_per_second": 0.905, + "step": 2000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001682772736476434, + "loss": 0.8334, + "step": 2025 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001675311218400201, + "loss": 0.835, + "step": 2050 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016677799320203332, + "loss": 0.8368, + "step": 2075 + }, + { + "epoch": 0.81, + "learning_rate": 0.00016601796554152344, + "loss": 0.8278, + "step": 2100 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016525111737908827, + "loss": 0.8334, + "step": 2125 + }, + { + "epoch": 0.83, + "learning_rate": 0.00016447752793997096, + "loss": 0.8416, + "step": 2150 + }, + { + "epoch": 0.84, + "learning_rate": 0.00016369727714587483, + "loss": 0.8297, + "step": 2175 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001629104456067066, + "loss": 0.8327, + "step": 2200 + }, + { + "epoch": 0.86, + "learning_rate": 0.00016211711461224825, + "loss": 0.8324, + "step": 2225 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001613173661237589, + "loss": 0.8313, + "step": 2250 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001605112827655069, + "loss": 0.8292, + "step": 2275 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001596989478162339, + "loss": 0.8334, + "step": 2300 + }, + { + "epoch": 0.9, + "learning_rate": 0.00015888044520055106, + "loss": 0.8352, + "step": 2325 + }, + { + "epoch": 0.91, + "learning_rate": 0.00015805585948026852, + "loss": 0.823, + "step": 2350 + }, + { + "epoch": 0.92, + "learning_rate": 0.000157225275845659, + "loss": 0.8293, + "step": 2375 + }, + { + "epoch": 0.93, + "learning_rate": 0.00015638878010665672, + "loss": 0.8289, + "step": 2400 + }, + { + "epoch": 0.94, + "learning_rate": 0.00015554645868399205, + "loss": 0.832, + "step": 2425 + }, + { + "epoch": 0.95, + "learning_rate": 0.00015469839860026308, + "loss": 0.8294, + "step": 2450 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001538446874709452, + "loss": 0.8281, + "step": 2475 + }, + { + "epoch": 0.97, + "learning_rate": 0.00015298541349533925, + "loss": 0.8314, + "step": 2500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00015212066544745926, + "loss": 0.831, + "step": 2525 + }, + { + "epoch": 0.99, + "learning_rate": 0.00015125053266686124, + "loss": 0.8319, + "step": 2550 + }, + { + "epoch": 1.0, + "learning_rate": 0.00015037510504941303, + "loss": 0.8259, + "step": 2575 + }, + { + "epoch": 1.01, + "learning_rate": 0.00014949447303800695, + "loss": 0.8133, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 0.00014860872761321593, + "loss": 0.8139, + "step": 2625 + }, + { + "epoch": 1.03, + "learning_rate": 0.00014771796028389405, + "loss": 0.804, + "step": 2650 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001468222630777225, + "loss": 0.8011, + "step": 2675 + }, + { + "epoch": 1.05, + "learning_rate": 0.00014592172853170193, + "loss": 0.8037, + "step": 2700 + }, + { + "epoch": 1.06, + "learning_rate": 0.00014501644968259212, + "loss": 0.8063, + "step": 2725 + }, + { + "epoch": 1.07, + "learning_rate": 0.00014410652005730025, + "loss": 0.8155, + "step": 2750 + }, + { + "epoch": 1.08, + "learning_rate": 0.00014319203366321826, + "loss": 0.8066, + "step": 2775 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001422730849785107, + "loss": 0.8091, + "step": 2800 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001413497689423539, + "loss": 0.8067, + "step": 2825 + }, + { + "epoch": 1.11, + "learning_rate": 0.00014042218094512755, + "loss": 0.8046, + "step": 2850 + }, + { + "epoch": 1.11, + "learning_rate": 0.00013949041681855985, + "loss": 0.8053, + "step": 2875 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001385545728258264, + "loss": 0.8075, + "step": 2900 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001376147456516055, + "loss": 0.8015, + "step": 2925 + }, + { + "epoch": 1.14, + "learning_rate": 0.00013667103239208903, + "loss": 0.8016, + "step": 2950 + }, + { + "epoch": 1.15, + "learning_rate": 0.00013572353054495126, + "loss": 0.8029, + "step": 2975 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001347723379992762, + "loss": 0.8017, + "step": 3000 + }, + { + "epoch": 1.16, + "eval_loss": 0.8229297995567322, + "eval_runtime": 59.3398, + "eval_samples_per_second": 12.302, + "eval_steps_per_second": 0.893, + "step": 3000 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001338175530254443, + "loss": 0.8049, + "step": 3025 + }, + { + "epoch": 1.18, + "learning_rate": 0.00013285927426497985, + "loss": 0.8027, + "step": 3050 + }, + { + "epoch": 1.19, + "learning_rate": 0.00013189760072036008, + "loss": 0.8028, + "step": 3075 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001309326317447869, + "loss": 0.8021, + "step": 3100 + }, + { + "epoch": 1.21, + "learning_rate": 0.00012996446703192257, + "loss": 0.8033, + "step": 3125 + }, + { + "epoch": 1.22, + "learning_rate": 0.00012899320660558986, + "loss": 0.8016, + "step": 3150 + }, + { + "epoch": 1.23, + "learning_rate": 0.00012801895080943846, + "loss": 0.7995, + "step": 3175 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001270418002965782, + "loss": 0.799, + "step": 3200 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001260618560191802, + "loss": 0.8002, + "step": 3225 + }, + { + "epoch": 1.26, + "learning_rate": 0.00012507921921804717, + "loss": 0.8068, + "step": 3250 + }, + { + "epoch": 1.27, + "learning_rate": 0.00012409399141215423, + "loss": 0.8041, + "step": 3275 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001231062743881603, + "loss": 0.7999, + "step": 3300 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001221161701898926, + "loss": 0.7995, + "step": 3325 + }, + { + "epoch": 1.3, + "learning_rate": 0.00012112378110780391, + "loss": 0.7959, + "step": 3350 + }, + { + "epoch": 1.31, + "learning_rate": 0.00012012920966840486, + "loss": 0.7999, + "step": 3375 + }, + { + "epoch": 1.32, + "learning_rate": 0.00011913255862367151, + "loss": 0.8016, + "step": 3400 + }, + { + "epoch": 1.33, + "learning_rate": 0.00011813393094042993, + "loss": 0.7944, + "step": 3425 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001171334297897181, + "loss": 0.8026, + "step": 3450 + }, + { + "epoch": 1.35, + "learning_rate": 0.00011613115853612734, + "loss": 0.8004, + "step": 3475 + }, + { + "epoch": 1.36, + "learning_rate": 0.00011512722072712321, + "loss": 0.7992, + "step": 3500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00011412172008234785, + "loss": 0.8004, + "step": 3525 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001131147604829043, + "loss": 0.8009, + "step": 3550 + }, + { + "epoch": 1.39, + "learning_rate": 0.00011210644596062439, + "loss": 0.7993, + "step": 3575 + }, + { + "epoch": 1.4, + "learning_rate": 0.00011109688068732081, + "loss": 0.7965, + "step": 3600 + }, + { + "epoch": 1.41, + "learning_rate": 0.00011008616896402482, + "loss": 0.7991, + "step": 3625 + }, + { + "epoch": 1.42, + "learning_rate": 0.00010907441521021072, + "loss": 0.8026, + "step": 3650 + }, + { + "epoch": 1.42, + "learning_rate": 0.00010806172395300789, + "loss": 0.7941, + "step": 3675 + }, + { + "epoch": 1.43, + "learning_rate": 0.00010704819981640186, + "loss": 0.7989, + "step": 3700 + }, + { + "epoch": 1.44, + "learning_rate": 0.00010603394751042522, + "loss": 0.7981, + "step": 3725 + }, + { + "epoch": 1.45, + "learning_rate": 0.00010501907182033979, + "loss": 0.7985, + "step": 3750 + }, + { + "epoch": 1.46, + "learning_rate": 0.000104003677595811, + "loss": 0.7921, + "step": 3775 + }, + { + "epoch": 1.47, + "learning_rate": 0.00010298786974007555, + "loss": 0.8012, + "step": 3800 + }, + { + "epoch": 1.48, + "learning_rate": 0.00010197175319910343, + "loss": 0.7906, + "step": 3825 + }, + { + "epoch": 1.49, + "learning_rate": 0.00010095543295075593, + "loss": 0.7928, + "step": 3850 + }, + { + "epoch": 1.5, + "learning_rate": 9.993901399393979e-05, + "loss": 0.8018, + "step": 3875 + }, + { + "epoch": 1.51, + "learning_rate": 9.892260133775968e-05, + "loss": 0.7991, + "step": 3900 + }, + { + "epoch": 1.52, + "learning_rate": 9.79062999906693e-05, + "loss": 0.795, + "step": 3925 + }, + { + "epoch": 1.53, + "learning_rate": 9.68902149496227e-05, + "loss": 0.7977, + "step": 3950 + }, + { + "epoch": 1.54, + "learning_rate": 9.587445118922674e-05, + "loss": 0.8013, + "step": 3975 + }, + { + "epoch": 1.55, + "learning_rate": 9.485911365089589e-05, + "loss": 0.7978, + "step": 4000 + }, + { + "epoch": 1.55, + "eval_loss": 0.8142631649971008, + "eval_runtime": 59.4108, + "eval_samples_per_second": 12.287, + "eval_steps_per_second": 0.892, + "step": 4000 + }, + { + "epoch": 1.56, + "learning_rate": 9.384430723201036e-05, + "loss": 0.7912, + "step": 4025 + }, + { + "epoch": 1.57, + "learning_rate": 9.283013677507902e-05, + "loss": 0.7919, + "step": 4050 + }, + { + "epoch": 1.58, + "learning_rate": 9.181670705690761e-05, + "loss": 0.7919, + "step": 4075 + }, + { + "epoch": 1.59, + "learning_rate": 9.080412277777413e-05, + "loss": 0.8018, + "step": 4100 + }, + { + "epoch": 1.6, + "learning_rate": 8.979248855061188e-05, + "loss": 0.7811, + "step": 4125 + }, + { + "epoch": 1.61, + "learning_rate": 8.878190889020159e-05, + "loss": 0.7919, + "step": 4150 + }, + { + "epoch": 1.62, + "learning_rate": 8.777248820237376e-05, + "loss": 0.7994, + "step": 4175 + }, + { + "epoch": 1.63, + "learning_rate": 8.676433077322215e-05, + "loss": 0.7956, + "step": 4200 + }, + { + "epoch": 1.64, + "learning_rate": 8.575754075832973e-05, + "loss": 0.7968, + "step": 4225 + }, + { + "epoch": 1.65, + "learning_rate": 8.475222217200801e-05, + "loss": 0.7905, + "step": 4250 + }, + { + "epoch": 1.66, + "learning_rate": 8.374847887655112e-05, + "loss": 0.7889, + "step": 4275 + }, + { + "epoch": 1.67, + "learning_rate": 8.274641457150543e-05, + "loss": 0.7988, + "step": 4300 + }, + { + "epoch": 1.68, + "learning_rate": 8.174613278295608e-05, + "loss": 0.7947, + "step": 4325 + }, + { + "epoch": 1.69, + "learning_rate": 8.074773685283137e-05, + "loss": 0.7929, + "step": 4350 + }, + { + "epoch": 1.7, + "learning_rate": 7.97513299282264e-05, + "loss": 0.7949, + "step": 4375 + }, + { + "epoch": 1.71, + "learning_rate": 7.875701495074638e-05, + "loss": 0.7925, + "step": 4400 + }, + { + "epoch": 1.72, + "learning_rate": 7.776489464587158e-05, + "loss": 0.7917, + "step": 4425 + }, + { + "epoch": 1.73, + "learning_rate": 7.677507151234448e-05, + "loss": 0.7905, + "step": 4450 + }, + { + "epoch": 1.74, + "learning_rate": 7.578764781158034e-05, + "loss": 0.7912, + "step": 4475 + }, + { + "epoch": 1.74, + "learning_rate": 7.480272555710227e-05, + "loss": 0.8006, + "step": 4500 + }, + { + "epoch": 1.75, + "learning_rate": 7.382040650400185e-05, + "loss": 0.7937, + "step": 4525 + }, + { + "epoch": 1.76, + "learning_rate": 7.28407921384267e-05, + "loss": 0.794, + "step": 4550 + }, + { + "epoch": 1.77, + "learning_rate": 7.186398366709545e-05, + "loss": 0.7931, + "step": 4575 + }, + { + "epoch": 1.78, + "learning_rate": 7.089008200684197e-05, + "loss": 0.7982, + "step": 4600 + }, + { + "epoch": 1.79, + "learning_rate": 6.991918777418928e-05, + "loss": 0.7916, + "step": 4625 + }, + { + "epoch": 1.8, + "learning_rate": 6.895140127495455e-05, + "loss": 0.7919, + "step": 4650 + }, + { + "epoch": 1.81, + "learning_rate": 6.798682249388631e-05, + "loss": 0.7863, + "step": 4675 + }, + { + "epoch": 1.82, + "learning_rate": 6.702555108433461e-05, + "loss": 0.789, + "step": 4700 + }, + { + "epoch": 1.83, + "learning_rate": 6.606768635795574e-05, + "loss": 0.7902, + "step": 4725 + }, + { + "epoch": 1.84, + "learning_rate": 6.511332727445191e-05, + "loss": 0.7924, + "step": 4750 + }, + { + "epoch": 1.85, + "learning_rate": 6.416257243134747e-05, + "loss": 0.7957, + "step": 4775 + }, + { + "epoch": 1.86, + "learning_rate": 6.321552005380256e-05, + "loss": 0.7916, + "step": 4800 + }, + { + "epoch": 1.87, + "learning_rate": 6.22722679844652e-05, + "loss": 0.7867, + "step": 4825 + }, + { + "epoch": 1.88, + "learning_rate": 6.133291367336284e-05, + "loss": 0.7944, + "step": 4850 + }, + { + "epoch": 1.89, + "learning_rate": 6.039755416783457e-05, + "loss": 0.7982, + "step": 4875 + }, + { + "epoch": 1.9, + "learning_rate": 5.946628610250484e-05, + "loss": 0.7918, + "step": 4900 + }, + { + "epoch": 1.91, + "learning_rate": 5.853920568929996e-05, + "loss": 0.7921, + "step": 4925 + }, + { + "epoch": 1.92, + "learning_rate": 5.761640870750799e-05, + "loss": 0.7878, + "step": 4950 + }, + { + "epoch": 1.93, + "learning_rate": 5.669799049388375e-05, + "loss": 0.7901, + "step": 4975 + }, + { + "epoch": 1.94, + "learning_rate": 5.578404593279911e-05, + "loss": 0.7858, + "step": 5000 + }, + { + "epoch": 1.94, + "eval_loss": 0.807844877243042, + "eval_runtime": 59.586, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.889, + "step": 5000 + }, + { + "epoch": 1.95, + "learning_rate": 5.487466944644033e-05, + "loss": 0.7902, + "step": 5025 + }, + { + "epoch": 1.96, + "learning_rate": 5.3969954985052996e-05, + "loss": 0.7979, + "step": 5050 + }, + { + "epoch": 1.97, + "learning_rate": 5.306999601723579e-05, + "loss": 0.7931, + "step": 5075 + }, + { + "epoch": 1.98, + "learning_rate": 5.21748855202839e-05, + "loss": 0.7868, + "step": 5100 + }, + { + "epoch": 1.99, + "learning_rate": 5.128471597058342e-05, + "loss": 0.7993, + "step": 5125 + }, + { + "epoch": 2.0, + "learning_rate": 5.03995793340572e-05, + "loss": 0.7892, + "step": 5150 + }, + { + "epoch": 2.01, + "learning_rate": 4.9519567056663694e-05, + "loss": 0.7788, + "step": 5175 + }, + { + "epoch": 2.02, + "learning_rate": 4.864477005494938e-05, + "loss": 0.7654, + "step": 5200 + }, + { + "epoch": 2.03, + "learning_rate": 4.777527870665592e-05, + "loss": 0.7468, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 4.691118284138296e-05, + "loss": 0.7359, + "step": 5250 + }, + { + "epoch": 2.05, + "learning_rate": 4.605257173130763e-05, + "loss": 0.7422, + "step": 5275 + }, + { + "epoch": 2.06, + "learning_rate": 4.519953408196152e-05, + "loss": 0.7424, + "step": 5300 + }, + { + "epoch": 2.06, + "learning_rate": 4.435215802306635e-05, + "loss": 0.7521, + "step": 5325 + }, + { + "epoch": 2.07, + "learning_rate": 4.351053109942894e-05, + "loss": 0.7477, + "step": 5350 + }, + { + "epoch": 2.08, + "learning_rate": 4.2674740261896776e-05, + "loss": 0.7456, + "step": 5375 + }, + { + "epoch": 2.09, + "learning_rate": 4.1844871858374844e-05, + "loss": 0.766, + "step": 5400 + }, + { + "epoch": 2.1, + "learning_rate": 4.1021011624904814e-05, + "loss": 0.7664, + "step": 5425 + }, + { + "epoch": 2.11, + "learning_rate": 4.0203244676807353e-05, + "loss": 0.7703, + "step": 5450 + }, + { + "epoch": 2.12, + "learning_rate": 3.939165549988873e-05, + "loss": 0.7674, + "step": 5475 + }, + { + "epoch": 2.13, + "learning_rate": 3.858632794171222e-05, + "loss": 0.7722, + "step": 5500 + }, + { + "epoch": 2.14, + "learning_rate": 3.778734520293562e-05, + "loss": 0.7716, + "step": 5525 + }, + { + "epoch": 2.15, + "learning_rate": 3.699478982871561e-05, + "loss": 0.7795, + "step": 5550 + }, + { + "epoch": 2.16, + "learning_rate": 3.62087437001797e-05, + "loss": 0.7728, + "step": 5575 + }, + { + "epoch": 2.17, + "learning_rate": 3.5429288025966944e-05, + "loss": 0.7709, + "step": 5600 + }, + { + "epoch": 2.18, + "learning_rate": 3.4656503333837956e-05, + "loss": 0.7682, + "step": 5625 + }, + { + "epoch": 2.19, + "learning_rate": 3.389046946235542e-05, + "loss": 0.7734, + "step": 5650 + }, + { + "epoch": 2.2, + "learning_rate": 3.313126555263576e-05, + "loss": 0.7716, + "step": 5675 + }, + { + "epoch": 2.21, + "learning_rate": 3.237897004017276e-05, + "loss": 0.7716, + "step": 5700 + }, + { + "epoch": 2.22, + "learning_rate": 3.163366064673427e-05, + "loss": 0.7721, + "step": 5725 + }, + { + "epoch": 2.23, + "learning_rate": 3.089541437233252e-05, + "loss": 0.7658, + "step": 5750 + }, + { + "epoch": 2.24, + "learning_rate": 3.0164307487268996e-05, + "loss": 0.7716, + "step": 5775 + }, + { + "epoch": 2.25, + "learning_rate": 2.944041552425475e-05, + "loss": 0.7687, + "step": 5800 + }, + { + "epoch": 2.26, + "learning_rate": 2.8723813270606982e-05, + "loss": 0.7698, + "step": 5825 + }, + { + "epoch": 2.27, + "learning_rate": 2.8014574760522416e-05, + "loss": 0.7641, + "step": 5850 + }, + { + "epoch": 2.28, + "learning_rate": 2.731277326742876e-05, + "loss": 0.7746, + "step": 5875 + }, + { + "epoch": 2.29, + "learning_rate": 2.6618481296414522e-05, + "loss": 0.7722, + "step": 5900 + } + ], + "max_steps": 7737, + "num_train_epochs": 3, + "total_flos": 2.5410887248997515e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5900/training_args.bin b/checkpoint-5900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c456116f688fe6cb6deecb1e0a1cf8d153d349fb --- /dev/null +++ b/checkpoint-5900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5d13b3f1b9942f80afde79010ef0947feee3df761d245fef1699bc397648b2 +size 4027