diff --git a/.gitattributes b/.gitattributes index 77526c326f89be0d2ec51131eeaaac0d9486f5b5..c6b0620300147b21f556318fc908e68b7908df30 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3440,3 +3440,12 @@ Meta-Llama-3-8B-Instruct_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq- Meta-Llama-3-8B-Instruct_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.3-num-308-sd-3/checkpoint-62/tokenizer.json filter=lfs diff=lfs merge=lfs -text Meta-Llama-3-8B-Instruct_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.3-num-308-sd-3/checkpoint-93/tokenizer.json filter=lfs diff=lfs merge=lfs -text Meta-Llama-3-8B-Instruct_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.3-num-308-sd-3/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd07b136186b6cab7a95c56cee29b819ea8abc60 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd12938c2921e88ddbf8cf072ef9cf5ff10438c3aa24d2d488c7e0194b13eaf2 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef1dc0229bb3439679f0215cc95aa8e0c746af91 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd526591ad5c1389b6b81e053957f16f2106cc8774bcbc55e47def710aaf89c +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..22abdd9465b5c1c5a3f1097ab09980ee5b2ede86 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e496b861e771bd57baa63406da4d09dc13f6aa0d8580dec56b0b9532a8635423 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4be3ad8bb6ee5532355718d8dfe9041b33b12d67 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef949a062f3361962a45bd4a0afbfb6a1c1fcf81966c5d8fc05fbbdf99bd36fe +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c88ee9ebcb61ebf2d59ba261a2518715c5304abb --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7100d0e256df65cf97f5479cde3d4ed8f39ca565b35da687997e23532600ce4 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b7a4e57640d79e1174e41d006cd3ac78a89068c2 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/trainer_state.json @@ -0,0 +1,891 @@ +{ + "best_metric": 1.83539617061615, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 1184, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.3993101716041565, + "learning_rate": 0.0002, + "loss": 1.7254, + "step": 300 + }, + { + "epoch": 1.0472972972972974, + "grad_norm": 0.45567989349365234, + "learning_rate": 0.0002, + "loss": 1.7985, + "step": 310 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.3767794072628021, + "learning_rate": 0.0002, + "loss": 1.757, + "step": 320 + }, + { + "epoch": 1.114864864864865, + "grad_norm": 0.5181908011436462, + "learning_rate": 0.0002, + "loss": 1.7917, + "step": 330 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4213193356990814, + "learning_rate": 0.0002, + "loss": 1.7723, + "step": 340 + }, + { + "epoch": 1.1824324324324325, + "grad_norm": 0.45519495010375977, + "learning_rate": 0.0002, + "loss": 1.8203, + "step": 350 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.35332638025283813, + "learning_rate": 0.0002, + "loss": 1.6946, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 0.3675481677055359, + "learning_rate": 0.0002, + "loss": 1.7541, + "step": 370 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.4569270610809326, + "learning_rate": 0.0002, + "loss": 1.7458, + "step": 380 + }, + { + "epoch": 1.3175675675675675, + "grad_norm": 0.37950295209884644, + "learning_rate": 0.0002, + "loss": 1.7988, + "step": 390 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5744572877883911, + "learning_rate": 0.0002, + "loss": 1.7032, + "step": 400 + }, + { + "epoch": 1.385135135135135, + "grad_norm": 0.44380778074264526, + "learning_rate": 0.0002, + "loss": 1.7437, + "step": 410 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.43328171968460083, + "learning_rate": 0.0002, + "loss": 1.7454, + "step": 420 + }, + { + "epoch": 1.4527027027027026, + "grad_norm": 0.41290056705474854, + "learning_rate": 0.0002, + "loss": 1.7636, + "step": 430 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.3771473169326782, + "learning_rate": 0.0002, + "loss": 1.7332, + "step": 440 + }, + { + "epoch": 1.5202702702702702, + "grad_norm": 0.42537811398506165, + "learning_rate": 0.0002, + "loss": 1.7618, + "step": 450 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.39705610275268555, + "learning_rate": 0.0002, + "loss": 1.8523, + "step": 460 + }, + { + "epoch": 1.5878378378378377, + "grad_norm": 0.4178248643875122, + "learning_rate": 0.0002, + "loss": 1.7673, + "step": 470 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.39107105135917664, + "learning_rate": 0.0002, + "loss": 1.742, + "step": 480 + }, + { + "epoch": 1.6554054054054053, + "grad_norm": 0.38505619764328003, + "learning_rate": 0.0002, + "loss": 1.6984, + "step": 490 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.43590813875198364, + "learning_rate": 0.0002, + "loss": 1.7382, + "step": 500 + }, + { + "epoch": 1.722972972972973, + "grad_norm": 0.42785948514938354, + "learning_rate": 0.0002, + "loss": 1.7139, + "step": 510 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3829004168510437, + "learning_rate": 0.0002, + "loss": 1.7551, + "step": 520 + }, + { + "epoch": 1.7905405405405406, + "grad_norm": 0.35287904739379883, + "learning_rate": 0.0002, + "loss": 1.7744, + "step": 530 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.38657888770103455, + "learning_rate": 0.0002, + "loss": 1.7714, + "step": 540 + }, + { + "epoch": 1.8581081081081081, + "grad_norm": 0.41452157497406006, + "learning_rate": 0.0002, + "loss": 1.7535, + "step": 550 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.3898279070854187, + "learning_rate": 0.0002, + "loss": 1.7627, + "step": 560 + }, + { + "epoch": 1.9256756756756757, + "grad_norm": 0.4147624373435974, + "learning_rate": 0.0002, + "loss": 1.7494, + "step": 570 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.4374851584434509, + "learning_rate": 0.0002, + "loss": 1.7693, + "step": 580 + }, + { + "epoch": 1.9932432432432432, + "grad_norm": 0.48530328273773193, + "learning_rate": 0.0002, + "loss": 1.7796, + "step": 590 + }, + { + "epoch": 2.0, + "eval_loss": 1.83539617061615, + "eval_runtime": 70.9865, + "eval_samples_per_second": 7.255, + "eval_steps_per_second": 0.916, + "step": 592 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.40344223380088806, + "learning_rate": 0.0002, + "loss": 1.7294, + "step": 600 + }, + { + "epoch": 2.060810810810811, + "grad_norm": 0.48268747329711914, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 610 + }, + { + "epoch": 2.0945945945945947, + "grad_norm": 0.4675706923007965, + "learning_rate": 0.0002, + "loss": 1.6315, + "step": 620 + }, + { + "epoch": 2.1283783783783785, + "grad_norm": 0.47494322061538696, + "learning_rate": 0.0002, + "loss": 1.6627, + "step": 630 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.4555308520793915, + "learning_rate": 0.0002, + "loss": 1.5668, + "step": 640 + }, + { + "epoch": 2.195945945945946, + "grad_norm": 0.43085595965385437, + "learning_rate": 0.0002, + "loss": 1.6537, + "step": 650 + }, + { + "epoch": 2.22972972972973, + "grad_norm": 0.4364128112792969, + "learning_rate": 0.0002, + "loss": 1.6316, + "step": 660 + }, + { + "epoch": 2.2635135135135136, + "grad_norm": 0.4711395800113678, + "learning_rate": 0.0002, + "loss": 1.669, + "step": 670 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.5109705328941345, + "learning_rate": 0.0002, + "loss": 1.5758, + "step": 680 + }, + { + "epoch": 2.331081081081081, + "grad_norm": 0.5185648798942566, + "learning_rate": 0.0002, + "loss": 1.5912, + "step": 690 + }, + { + "epoch": 2.364864864864865, + "grad_norm": 0.49192842841148376, + "learning_rate": 0.0002, + "loss": 1.6605, + "step": 700 + }, + { + "epoch": 2.3986486486486487, + "grad_norm": 0.5619909763336182, + "learning_rate": 0.0002, + "loss": 1.6688, + "step": 710 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.4932861328125, + "learning_rate": 0.0002, + "loss": 1.7836, + "step": 720 + }, + { + "epoch": 2.4662162162162162, + "grad_norm": 0.5211932063102722, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 730 + }, + { + "epoch": 2.5, + "grad_norm": 0.4138050377368927, + "learning_rate": 0.0002, + "loss": 1.667, + "step": 740 + }, + { + "epoch": 2.5337837837837838, + "grad_norm": 0.4644908010959625, + "learning_rate": 0.0002, + "loss": 1.658, + "step": 750 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.4513227641582489, + "learning_rate": 0.0002, + "loss": 1.6451, + "step": 760 + }, + { + "epoch": 2.6013513513513513, + "grad_norm": 0.4735109508037567, + "learning_rate": 0.0002, + "loss": 1.7071, + "step": 770 + }, + { + "epoch": 2.635135135135135, + "grad_norm": 0.5453559756278992, + "learning_rate": 0.0002, + "loss": 1.6659, + "step": 780 + }, + { + "epoch": 2.668918918918919, + "grad_norm": 0.5422565937042236, + "learning_rate": 0.0002, + "loss": 1.7211, + "step": 790 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4288518726825714, + "learning_rate": 0.0002, + "loss": 1.6623, + "step": 800 + }, + { + "epoch": 2.7364864864864864, + "grad_norm": 0.4085204005241394, + "learning_rate": 0.0002, + "loss": 1.7197, + "step": 810 + }, + { + "epoch": 2.77027027027027, + "grad_norm": 0.49770182371139526, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 820 + }, + { + "epoch": 2.804054054054054, + "grad_norm": 0.5005106329917908, + "learning_rate": 0.0002, + "loss": 1.6332, + "step": 830 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.4763440489768982, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 840 + }, + { + "epoch": 2.8716216216216215, + "grad_norm": 0.44995108246803284, + "learning_rate": 0.0002, + "loss": 1.7149, + "step": 850 + }, + { + "epoch": 2.9054054054054053, + "grad_norm": 0.5299676656723022, + "learning_rate": 0.0002, + "loss": 1.6438, + "step": 860 + }, + { + "epoch": 2.939189189189189, + "grad_norm": 0.49627119302749634, + "learning_rate": 0.0002, + "loss": 1.6457, + "step": 870 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.502545177936554, + "learning_rate": 0.0002, + "loss": 1.6517, + "step": 880 + }, + { + "epoch": 3.0, + "eval_loss": 1.8520468473434448, + "eval_runtime": 70.9917, + "eval_samples_per_second": 7.254, + "eval_steps_per_second": 0.916, + "step": 888 + }, + { + "epoch": 3.0067567567567566, + "grad_norm": 0.4756380319595337, + "learning_rate": 0.0002, + "loss": 1.6271, + "step": 890 + }, + { + "epoch": 3.0405405405405403, + "grad_norm": 0.5167421102523804, + "learning_rate": 0.0002, + "loss": 1.563, + "step": 900 + }, + { + "epoch": 3.074324324324324, + "grad_norm": 0.5524939298629761, + "learning_rate": 0.0002, + "loss": 1.48, + "step": 910 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 0.7045221924781799, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 920 + }, + { + "epoch": 3.141891891891892, + "grad_norm": 0.5692355036735535, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 930 + }, + { + "epoch": 3.175675675675676, + "grad_norm": 0.5467017292976379, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 940 + }, + { + "epoch": 3.2094594594594597, + "grad_norm": 0.6004040241241455, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 950 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.5713295936584473, + "learning_rate": 0.0002, + "loss": 1.5255, + "step": 960 + }, + { + "epoch": 3.277027027027027, + "grad_norm": 0.6054869890213013, + "learning_rate": 0.0002, + "loss": 1.5412, + "step": 970 + }, + { + "epoch": 3.310810810810811, + "grad_norm": 0.6304576992988586, + "learning_rate": 0.0002, + "loss": 1.5167, + "step": 980 + }, + { + "epoch": 3.3445945945945947, + "grad_norm": 0.5347281694412231, + "learning_rate": 0.0002, + "loss": 1.52, + "step": 990 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 0.598211407661438, + "learning_rate": 0.0002, + "loss": 1.5707, + "step": 1000 + }, + { + "epoch": 3.4121621621621623, + "grad_norm": 0.637312650680542, + "learning_rate": 0.0002, + "loss": 1.5243, + "step": 1010 + }, + { + "epoch": 3.445945945945946, + "grad_norm": 0.6092430949211121, + "learning_rate": 0.0002, + "loss": 1.5356, + "step": 1020 + }, + { + "epoch": 3.47972972972973, + "grad_norm": 0.6421037912368774, + "learning_rate": 0.0002, + "loss": 1.5856, + "step": 1030 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 0.6712167263031006, + "learning_rate": 0.0002, + "loss": 1.5553, + "step": 1040 + }, + { + "epoch": 3.5472972972972974, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4708, + "step": 1050 + }, + { + "epoch": 3.581081081081081, + "grad_norm": 1.418167233467102, + "learning_rate": 0.0002, + "loss": 1.5159, + "step": 1060 + }, + { + "epoch": 3.614864864864865, + "grad_norm": 0.6092377305030823, + "learning_rate": 0.0002, + "loss": 1.5264, + "step": 1070 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.5632478594779968, + "learning_rate": 0.0002, + "loss": 1.5227, + "step": 1080 + }, + { + "epoch": 3.6824324324324325, + "grad_norm": 0.6007736921310425, + "learning_rate": 0.0002, + "loss": 1.5492, + "step": 1090 + }, + { + "epoch": 3.7162162162162162, + "grad_norm": 0.6031264066696167, + "learning_rate": 0.0002, + "loss": 1.5002, + "step": 1100 + }, + { + "epoch": 3.75, + "grad_norm": 0.5440598726272583, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 1110 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 0.6304370760917664, + "learning_rate": 0.0002, + "loss": 1.5743, + "step": 1120 + }, + { + "epoch": 3.8175675675675675, + "grad_norm": 0.6729280948638916, + "learning_rate": 0.0002, + "loss": 1.6429, + "step": 1130 + }, + { + "epoch": 3.8513513513513513, + "grad_norm": 0.5881901979446411, + "learning_rate": 0.0002, + "loss": 1.594, + "step": 1140 + }, + { + "epoch": 3.885135135135135, + "grad_norm": 0.5508038997650146, + "learning_rate": 0.0002, + "loss": 1.5008, + "step": 1150 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 0.5926295518875122, + "learning_rate": 0.0002, + "loss": 1.5045, + "step": 1160 + }, + { + "epoch": 3.9527027027027026, + "grad_norm": 0.5882043838500977, + "learning_rate": 0.0002, + "loss": 1.5223, + "step": 1170 + }, + { + "epoch": 3.9864864864864864, + "grad_norm": 0.604119598865509, + "learning_rate": 0.0002, + "loss": 1.5874, + "step": 1180 + }, + { + "epoch": 4.0, + "eval_loss": 1.8923152685165405, + "eval_runtime": 62.9577, + "eval_samples_per_second": 8.18, + "eval_steps_per_second": 1.032, + "step": 1184 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.479286891662541e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1184/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11a30a930cd91d3f1eabb69deb8fbb42c499862c --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11a93d33526a57833f067fa016e062473b617c9d62a03e4e132cd16d65aaaa5 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8b4e64a6d83d56224f2304094a186bf38525422 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3236807cdf34e5eca2b9f65d974853bf6b108230feb7b2f98c4b9708eae83434 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..151fb372093c8de40336deedc47b6e1532e6b640 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9dbb6a1466e6e2370a3c73dab7185393ac509933ad28433f0f58263d553299f +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfeba4f8f5faf8843250e778cc9e088ac50ba7ef --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:685e7d52da73e70a47812f1d2d6c2fcea04b7af2d1423dc796dae124b2ef94af +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9c3142e812a2df65a74e9bf7b32fec8969e4812e --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/trainer_state.json @@ -0,0 +1,1109 @@ +{ + "best_metric": 1.83539617061615, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", + "epoch": 5.0, + "eval_steps": 10, + "global_step": 1480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.3993101716041565, + "learning_rate": 0.0002, + "loss": 1.7254, + "step": 300 + }, + { + "epoch": 1.0472972972972974, + "grad_norm": 0.45567989349365234, + "learning_rate": 0.0002, + "loss": 1.7985, + "step": 310 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.3767794072628021, + "learning_rate": 0.0002, + "loss": 1.757, + "step": 320 + }, + { + "epoch": 1.114864864864865, + "grad_norm": 0.5181908011436462, + "learning_rate": 0.0002, + "loss": 1.7917, + "step": 330 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4213193356990814, + "learning_rate": 0.0002, + "loss": 1.7723, + "step": 340 + }, + { + "epoch": 1.1824324324324325, + "grad_norm": 0.45519495010375977, + "learning_rate": 0.0002, + "loss": 1.8203, + "step": 350 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.35332638025283813, + "learning_rate": 0.0002, + "loss": 1.6946, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 0.3675481677055359, + "learning_rate": 0.0002, + "loss": 1.7541, + "step": 370 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.4569270610809326, + "learning_rate": 0.0002, + "loss": 1.7458, + "step": 380 + }, + { + "epoch": 1.3175675675675675, + "grad_norm": 0.37950295209884644, + "learning_rate": 0.0002, + "loss": 1.7988, + "step": 390 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5744572877883911, + "learning_rate": 0.0002, + "loss": 1.7032, + "step": 400 + }, + { + "epoch": 1.385135135135135, + "grad_norm": 0.44380778074264526, + "learning_rate": 0.0002, + "loss": 1.7437, + "step": 410 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.43328171968460083, + "learning_rate": 0.0002, + "loss": 1.7454, + "step": 420 + }, + { + "epoch": 1.4527027027027026, + "grad_norm": 0.41290056705474854, + "learning_rate": 0.0002, + "loss": 1.7636, + "step": 430 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.3771473169326782, + "learning_rate": 0.0002, + "loss": 1.7332, + "step": 440 + }, + { + "epoch": 1.5202702702702702, + "grad_norm": 0.42537811398506165, + "learning_rate": 0.0002, + "loss": 1.7618, + "step": 450 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.39705610275268555, + "learning_rate": 0.0002, + "loss": 1.8523, + "step": 460 + }, + { + "epoch": 1.5878378378378377, + "grad_norm": 0.4178248643875122, + "learning_rate": 0.0002, + "loss": 1.7673, + "step": 470 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.39107105135917664, + "learning_rate": 0.0002, + "loss": 1.742, + "step": 480 + }, + { + "epoch": 1.6554054054054053, + "grad_norm": 0.38505619764328003, + "learning_rate": 0.0002, + "loss": 1.6984, + "step": 490 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.43590813875198364, + "learning_rate": 0.0002, + "loss": 1.7382, + "step": 500 + }, + { + "epoch": 1.722972972972973, + "grad_norm": 0.42785948514938354, + "learning_rate": 0.0002, + "loss": 1.7139, + "step": 510 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3829004168510437, + "learning_rate": 0.0002, + "loss": 1.7551, + "step": 520 + }, + { + "epoch": 1.7905405405405406, + "grad_norm": 0.35287904739379883, + "learning_rate": 0.0002, + "loss": 1.7744, + "step": 530 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.38657888770103455, + "learning_rate": 0.0002, + "loss": 1.7714, + "step": 540 + }, + { + "epoch": 1.8581081081081081, + "grad_norm": 0.41452157497406006, + "learning_rate": 0.0002, + "loss": 1.7535, + "step": 550 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.3898279070854187, + "learning_rate": 0.0002, + "loss": 1.7627, + "step": 560 + }, + { + "epoch": 1.9256756756756757, + "grad_norm": 0.4147624373435974, + "learning_rate": 0.0002, + "loss": 1.7494, + "step": 570 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.4374851584434509, + "learning_rate": 0.0002, + "loss": 1.7693, + "step": 580 + }, + { + "epoch": 1.9932432432432432, + "grad_norm": 0.48530328273773193, + "learning_rate": 0.0002, + "loss": 1.7796, + "step": 590 + }, + { + "epoch": 2.0, + "eval_loss": 1.83539617061615, + "eval_runtime": 70.9865, + "eval_samples_per_second": 7.255, + "eval_steps_per_second": 0.916, + "step": 592 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.40344223380088806, + "learning_rate": 0.0002, + "loss": 1.7294, + "step": 600 + }, + { + "epoch": 2.060810810810811, + "grad_norm": 0.48268747329711914, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 610 + }, + { + "epoch": 2.0945945945945947, + "grad_norm": 0.4675706923007965, + "learning_rate": 0.0002, + "loss": 1.6315, + "step": 620 + }, + { + "epoch": 2.1283783783783785, + "grad_norm": 0.47494322061538696, + "learning_rate": 0.0002, + "loss": 1.6627, + "step": 630 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.4555308520793915, + "learning_rate": 0.0002, + "loss": 1.5668, + "step": 640 + }, + { + "epoch": 2.195945945945946, + "grad_norm": 0.43085595965385437, + "learning_rate": 0.0002, + "loss": 1.6537, + "step": 650 + }, + { + "epoch": 2.22972972972973, + "grad_norm": 0.4364128112792969, + "learning_rate": 0.0002, + "loss": 1.6316, + "step": 660 + }, + { + "epoch": 2.2635135135135136, + "grad_norm": 0.4711395800113678, + "learning_rate": 0.0002, + "loss": 1.669, + "step": 670 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.5109705328941345, + "learning_rate": 0.0002, + "loss": 1.5758, + "step": 680 + }, + { + "epoch": 2.331081081081081, + "grad_norm": 0.5185648798942566, + "learning_rate": 0.0002, + "loss": 1.5912, + "step": 690 + }, + { + "epoch": 2.364864864864865, + "grad_norm": 0.49192842841148376, + "learning_rate": 0.0002, + "loss": 1.6605, + "step": 700 + }, + { + "epoch": 2.3986486486486487, + "grad_norm": 0.5619909763336182, + "learning_rate": 0.0002, + "loss": 1.6688, + "step": 710 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.4932861328125, + "learning_rate": 0.0002, + "loss": 1.7836, + "step": 720 + }, + { + "epoch": 2.4662162162162162, + "grad_norm": 0.5211932063102722, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 730 + }, + { + "epoch": 2.5, + "grad_norm": 0.4138050377368927, + "learning_rate": 0.0002, + "loss": 1.667, + "step": 740 + }, + { + "epoch": 2.5337837837837838, + "grad_norm": 0.4644908010959625, + "learning_rate": 0.0002, + "loss": 1.658, + "step": 750 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.4513227641582489, + "learning_rate": 0.0002, + "loss": 1.6451, + "step": 760 + }, + { + "epoch": 2.6013513513513513, + "grad_norm": 0.4735109508037567, + "learning_rate": 0.0002, + "loss": 1.7071, + "step": 770 + }, + { + "epoch": 2.635135135135135, + "grad_norm": 0.5453559756278992, + "learning_rate": 0.0002, + "loss": 1.6659, + "step": 780 + }, + { + "epoch": 2.668918918918919, + "grad_norm": 0.5422565937042236, + "learning_rate": 0.0002, + "loss": 1.7211, + "step": 790 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4288518726825714, + "learning_rate": 0.0002, + "loss": 1.6623, + "step": 800 + }, + { + "epoch": 2.7364864864864864, + "grad_norm": 0.4085204005241394, + "learning_rate": 0.0002, + "loss": 1.7197, + "step": 810 + }, + { + "epoch": 2.77027027027027, + "grad_norm": 0.49770182371139526, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 820 + }, + { + "epoch": 2.804054054054054, + "grad_norm": 0.5005106329917908, + "learning_rate": 0.0002, + "loss": 1.6332, + "step": 830 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.4763440489768982, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 840 + }, + { + "epoch": 2.8716216216216215, + "grad_norm": 0.44995108246803284, + "learning_rate": 0.0002, + "loss": 1.7149, + "step": 850 + }, + { + "epoch": 2.9054054054054053, + "grad_norm": 0.5299676656723022, + "learning_rate": 0.0002, + "loss": 1.6438, + "step": 860 + }, + { + "epoch": 2.939189189189189, + "grad_norm": 0.49627119302749634, + "learning_rate": 0.0002, + "loss": 1.6457, + "step": 870 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.502545177936554, + "learning_rate": 0.0002, + "loss": 1.6517, + "step": 880 + }, + { + "epoch": 3.0, + "eval_loss": 1.8520468473434448, + "eval_runtime": 70.9917, + "eval_samples_per_second": 7.254, + "eval_steps_per_second": 0.916, + "step": 888 + }, + { + "epoch": 3.0067567567567566, + "grad_norm": 0.4756380319595337, + "learning_rate": 0.0002, + "loss": 1.6271, + "step": 890 + }, + { + "epoch": 3.0405405405405403, + "grad_norm": 0.5167421102523804, + "learning_rate": 0.0002, + "loss": 1.563, + "step": 900 + }, + { + "epoch": 3.074324324324324, + "grad_norm": 0.5524939298629761, + "learning_rate": 0.0002, + "loss": 1.48, + "step": 910 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 0.7045221924781799, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 920 + }, + { + "epoch": 3.141891891891892, + "grad_norm": 0.5692355036735535, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 930 + }, + { + "epoch": 3.175675675675676, + "grad_norm": 0.5467017292976379, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 940 + }, + { + "epoch": 3.2094594594594597, + "grad_norm": 0.6004040241241455, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 950 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.5713295936584473, + "learning_rate": 0.0002, + "loss": 1.5255, + "step": 960 + }, + { + "epoch": 3.277027027027027, + "grad_norm": 0.6054869890213013, + "learning_rate": 0.0002, + "loss": 1.5412, + "step": 970 + }, + { + "epoch": 3.310810810810811, + "grad_norm": 0.6304576992988586, + "learning_rate": 0.0002, + "loss": 1.5167, + "step": 980 + }, + { + "epoch": 3.3445945945945947, + "grad_norm": 0.5347281694412231, + "learning_rate": 0.0002, + "loss": 1.52, + "step": 990 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 0.598211407661438, + "learning_rate": 0.0002, + "loss": 1.5707, + "step": 1000 + }, + { + "epoch": 3.4121621621621623, + "grad_norm": 0.637312650680542, + "learning_rate": 0.0002, + "loss": 1.5243, + "step": 1010 + }, + { + "epoch": 3.445945945945946, + "grad_norm": 0.6092430949211121, + "learning_rate": 0.0002, + "loss": 1.5356, + "step": 1020 + }, + { + "epoch": 3.47972972972973, + "grad_norm": 0.6421037912368774, + "learning_rate": 0.0002, + "loss": 1.5856, + "step": 1030 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 0.6712167263031006, + "learning_rate": 0.0002, + "loss": 1.5553, + "step": 1040 + }, + { + "epoch": 3.5472972972972974, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4708, + "step": 1050 + }, + { + "epoch": 3.581081081081081, + "grad_norm": 1.418167233467102, + "learning_rate": 0.0002, + "loss": 1.5159, + "step": 1060 + }, + { + "epoch": 3.614864864864865, + "grad_norm": 0.6092377305030823, + "learning_rate": 0.0002, + "loss": 1.5264, + "step": 1070 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.5632478594779968, + "learning_rate": 0.0002, + "loss": 1.5227, + "step": 1080 + }, + { + "epoch": 3.6824324324324325, + "grad_norm": 0.6007736921310425, + "learning_rate": 0.0002, + "loss": 1.5492, + "step": 1090 + }, + { + "epoch": 3.7162162162162162, + "grad_norm": 0.6031264066696167, + "learning_rate": 0.0002, + "loss": 1.5002, + "step": 1100 + }, + { + "epoch": 3.75, + "grad_norm": 0.5440598726272583, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 1110 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 0.6304370760917664, + "learning_rate": 0.0002, + "loss": 1.5743, + "step": 1120 + }, + { + "epoch": 3.8175675675675675, + "grad_norm": 0.6729280948638916, + "learning_rate": 0.0002, + "loss": 1.6429, + "step": 1130 + }, + { + "epoch": 3.8513513513513513, + "grad_norm": 0.5881901979446411, + "learning_rate": 0.0002, + "loss": 1.594, + "step": 1140 + }, + { + "epoch": 3.885135135135135, + "grad_norm": 0.5508038997650146, + "learning_rate": 0.0002, + "loss": 1.5008, + "step": 1150 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 0.5926295518875122, + "learning_rate": 0.0002, + "loss": 1.5045, + "step": 1160 + }, + { + "epoch": 3.9527027027027026, + "grad_norm": 0.5882043838500977, + "learning_rate": 0.0002, + "loss": 1.5223, + "step": 1170 + }, + { + "epoch": 3.9864864864864864, + "grad_norm": 0.604119598865509, + "learning_rate": 0.0002, + "loss": 1.5874, + "step": 1180 + }, + { + "epoch": 4.0, + "eval_loss": 1.8923152685165405, + "eval_runtime": 62.9577, + "eval_samples_per_second": 8.18, + "eval_steps_per_second": 1.032, + "step": 1184 + }, + { + "epoch": 4.02027027027027, + "grad_norm": 0.696061909198761, + "learning_rate": 0.0002, + "loss": 1.4183, + "step": 1190 + }, + { + "epoch": 4.054054054054054, + "grad_norm": 0.751200258731842, + "learning_rate": 0.0002, + "loss": 1.3455, + "step": 1200 + }, + { + "epoch": 4.087837837837838, + "grad_norm": 0.9667422771453857, + "learning_rate": 0.0002, + "loss": 1.35, + "step": 1210 + }, + { + "epoch": 4.121621621621622, + "grad_norm": 0.7374204397201538, + "learning_rate": 0.0002, + "loss": 1.4058, + "step": 1220 + }, + { + "epoch": 4.155405405405405, + "grad_norm": 0.8050723075866699, + "learning_rate": 0.0002, + "loss": 1.4454, + "step": 1230 + }, + { + "epoch": 4.1891891891891895, + "grad_norm": 0.7360416054725647, + "learning_rate": 0.0002, + "loss": 1.4129, + "step": 1240 + }, + { + "epoch": 4.222972972972973, + "grad_norm": 0.7947028279304504, + "learning_rate": 0.0002, + "loss": 1.3899, + "step": 1250 + }, + { + "epoch": 4.256756756756757, + "grad_norm": 0.7336545586585999, + "learning_rate": 0.0002, + "loss": 1.4264, + "step": 1260 + }, + { + "epoch": 4.29054054054054, + "grad_norm": 0.7051223516464233, + "learning_rate": 0.0002, + "loss": 1.4047, + "step": 1270 + }, + { + "epoch": 4.324324324324325, + "grad_norm": 0.7939404845237732, + "learning_rate": 0.0002, + "loss": 1.3507, + "step": 1280 + }, + { + "epoch": 4.358108108108108, + "grad_norm": 0.7818657755851746, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 1290 + }, + { + "epoch": 4.391891891891892, + "grad_norm": 0.7490634918212891, + "learning_rate": 0.0002, + "loss": 1.3533, + "step": 1300 + }, + { + "epoch": 4.425675675675675, + "grad_norm": 0.9319770932197571, + "learning_rate": 0.0002, + "loss": 1.3912, + "step": 1310 + }, + { + "epoch": 4.45945945945946, + "grad_norm": 0.7811282873153687, + "learning_rate": 0.0002, + "loss": 1.439, + "step": 1320 + }, + { + "epoch": 4.493243243243243, + "grad_norm": 0.7785378694534302, + "learning_rate": 0.0002, + "loss": 1.3973, + "step": 1330 + }, + { + "epoch": 4.527027027027027, + "grad_norm": 0.8697562217712402, + "learning_rate": 0.0002, + "loss": 1.3931, + "step": 1340 + }, + { + "epoch": 4.5608108108108105, + "grad_norm": 0.7927497625350952, + "learning_rate": 0.0002, + "loss": 1.3846, + "step": 1350 + }, + { + "epoch": 4.594594594594595, + "grad_norm": 0.9746347665786743, + "learning_rate": 0.0002, + "loss": 1.3188, + "step": 1360 + }, + { + "epoch": 4.628378378378378, + "grad_norm": 0.7353375554084778, + "learning_rate": 0.0002, + "loss": 1.4611, + "step": 1370 + }, + { + "epoch": 4.662162162162162, + "grad_norm": 0.8139469027519226, + "learning_rate": 0.0002, + "loss": 1.4067, + "step": 1380 + }, + { + "epoch": 4.695945945945946, + "grad_norm": 1.728020429611206, + "learning_rate": 0.0002, + "loss": 1.3727, + "step": 1390 + }, + { + "epoch": 4.72972972972973, + "grad_norm": 0.8249040246009827, + "learning_rate": 0.0002, + "loss": 1.3971, + "step": 1400 + }, + { + "epoch": 4.763513513513513, + "grad_norm": 0.7916110157966614, + "learning_rate": 0.0002, + "loss": 1.4238, + "step": 1410 + }, + { + "epoch": 4.797297297297297, + "grad_norm": 0.7286198735237122, + "learning_rate": 0.0002, + "loss": 1.4064, + "step": 1420 + }, + { + "epoch": 4.831081081081081, + "grad_norm": 0.7969672083854675, + "learning_rate": 0.0002, + "loss": 1.305, + "step": 1430 + }, + { + "epoch": 4.864864864864865, + "grad_norm": 0.9593119621276855, + "learning_rate": 0.0002, + "loss": 1.4109, + "step": 1440 + }, + { + "epoch": 4.898648648648649, + "grad_norm": 0.8609084486961365, + "learning_rate": 0.0002, + "loss": 1.4112, + "step": 1450 + }, + { + "epoch": 4.9324324324324325, + "grad_norm": 0.705203115940094, + "learning_rate": 0.0002, + "loss": 1.3126, + "step": 1460 + }, + { + "epoch": 4.966216216216216, + "grad_norm": 0.9503173232078552, + "learning_rate": 0.0002, + "loss": 1.4226, + "step": 1470 + }, + { + "epoch": 5.0, + "grad_norm": 0.7174800038337708, + "learning_rate": 0.0002, + "loss": 1.4457, + "step": 1480 + }, + { + "epoch": 5.0, + "eval_loss": 1.9753261804580688, + "eval_runtime": 70.4622, + "eval_samples_per_second": 7.309, + "eval_steps_per_second": 0.922, + "step": 1480 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.849108614578176e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0aad1c82cb66abb02a09a1af0f5377fee1fd284c --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd5b9ebeea2e3750a7f59b81b9c8407e36944b6eacc3de95bcad4cd056f8df7 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6c3f77eb6ff48f6fd6d3fa6db0ad9b32d415f7d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6228fe30404d60eb2654ffaccd8256d3da0874c94bb151165129f0f23071c3d +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e6804d71eff70810e0ea126718b26cc0b4015c3 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81e39f762841f15e1a2dedac143e00276fc4141236d263b18991bfb6b8883ea +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..74f7ae1ed2cdb1f4207faa61596a763fdc1ff58d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63cb6b85b91ddf0d697565a43728f8831849c7adbdd20021e13e09aceb248657 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2a227b22fef246ec079e6ccc07e84aba3f833ee3 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/trainer_state.json @@ -0,0 +1,1320 @@ +{ + "best_metric": 1.83539617061615, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 1776, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.3993101716041565, + "learning_rate": 0.0002, + "loss": 1.7254, + "step": 300 + }, + { + "epoch": 1.0472972972972974, + "grad_norm": 0.45567989349365234, + "learning_rate": 0.0002, + "loss": 1.7985, + "step": 310 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.3767794072628021, + "learning_rate": 0.0002, + "loss": 1.757, + "step": 320 + }, + { + "epoch": 1.114864864864865, + "grad_norm": 0.5181908011436462, + "learning_rate": 0.0002, + "loss": 1.7917, + "step": 330 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4213193356990814, + "learning_rate": 0.0002, + "loss": 1.7723, + "step": 340 + }, + { + "epoch": 1.1824324324324325, + "grad_norm": 0.45519495010375977, + "learning_rate": 0.0002, + "loss": 1.8203, + "step": 350 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.35332638025283813, + "learning_rate": 0.0002, + "loss": 1.6946, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 0.3675481677055359, + "learning_rate": 0.0002, + "loss": 1.7541, + "step": 370 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.4569270610809326, + "learning_rate": 0.0002, + "loss": 1.7458, + "step": 380 + }, + { + "epoch": 1.3175675675675675, + "grad_norm": 0.37950295209884644, + "learning_rate": 0.0002, + "loss": 1.7988, + "step": 390 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5744572877883911, + "learning_rate": 0.0002, + "loss": 1.7032, + "step": 400 + }, + { + "epoch": 1.385135135135135, + "grad_norm": 0.44380778074264526, + "learning_rate": 0.0002, + "loss": 1.7437, + "step": 410 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.43328171968460083, + "learning_rate": 0.0002, + "loss": 1.7454, + "step": 420 + }, + { + "epoch": 1.4527027027027026, + "grad_norm": 0.41290056705474854, + "learning_rate": 0.0002, + "loss": 1.7636, + "step": 430 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.3771473169326782, + "learning_rate": 0.0002, + "loss": 1.7332, + "step": 440 + }, + { + "epoch": 1.5202702702702702, + "grad_norm": 0.42537811398506165, + "learning_rate": 0.0002, + "loss": 1.7618, + "step": 450 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.39705610275268555, + "learning_rate": 0.0002, + "loss": 1.8523, + "step": 460 + }, + { + "epoch": 1.5878378378378377, + "grad_norm": 0.4178248643875122, + "learning_rate": 0.0002, + "loss": 1.7673, + "step": 470 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.39107105135917664, + "learning_rate": 0.0002, + "loss": 1.742, + "step": 480 + }, + { + "epoch": 1.6554054054054053, + "grad_norm": 0.38505619764328003, + "learning_rate": 0.0002, + "loss": 1.6984, + "step": 490 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.43590813875198364, + "learning_rate": 0.0002, + "loss": 1.7382, + "step": 500 + }, + { + "epoch": 1.722972972972973, + "grad_norm": 0.42785948514938354, + "learning_rate": 0.0002, + "loss": 1.7139, + "step": 510 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3829004168510437, + "learning_rate": 0.0002, + "loss": 1.7551, + "step": 520 + }, + { + "epoch": 1.7905405405405406, + "grad_norm": 0.35287904739379883, + "learning_rate": 0.0002, + "loss": 1.7744, + "step": 530 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.38657888770103455, + "learning_rate": 0.0002, + "loss": 1.7714, + "step": 540 + }, + { + "epoch": 1.8581081081081081, + "grad_norm": 0.41452157497406006, + "learning_rate": 0.0002, + "loss": 1.7535, + "step": 550 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.3898279070854187, + "learning_rate": 0.0002, + "loss": 1.7627, + "step": 560 + }, + { + "epoch": 1.9256756756756757, + "grad_norm": 0.4147624373435974, + "learning_rate": 0.0002, + "loss": 1.7494, + "step": 570 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.4374851584434509, + "learning_rate": 0.0002, + "loss": 1.7693, + "step": 580 + }, + { + "epoch": 1.9932432432432432, + "grad_norm": 0.48530328273773193, + "learning_rate": 0.0002, + "loss": 1.7796, + "step": 590 + }, + { + "epoch": 2.0, + "eval_loss": 1.83539617061615, + "eval_runtime": 70.9865, + "eval_samples_per_second": 7.255, + "eval_steps_per_second": 0.916, + "step": 592 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.40344223380088806, + "learning_rate": 0.0002, + "loss": 1.7294, + "step": 600 + }, + { + "epoch": 2.060810810810811, + "grad_norm": 0.48268747329711914, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 610 + }, + { + "epoch": 2.0945945945945947, + "grad_norm": 0.4675706923007965, + "learning_rate": 0.0002, + "loss": 1.6315, + "step": 620 + }, + { + "epoch": 2.1283783783783785, + "grad_norm": 0.47494322061538696, + "learning_rate": 0.0002, + "loss": 1.6627, + "step": 630 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.4555308520793915, + "learning_rate": 0.0002, + "loss": 1.5668, + "step": 640 + }, + { + "epoch": 2.195945945945946, + "grad_norm": 0.43085595965385437, + "learning_rate": 0.0002, + "loss": 1.6537, + "step": 650 + }, + { + "epoch": 2.22972972972973, + "grad_norm": 0.4364128112792969, + "learning_rate": 0.0002, + "loss": 1.6316, + "step": 660 + }, + { + "epoch": 2.2635135135135136, + "grad_norm": 0.4711395800113678, + "learning_rate": 0.0002, + "loss": 1.669, + "step": 670 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.5109705328941345, + "learning_rate": 0.0002, + "loss": 1.5758, + "step": 680 + }, + { + "epoch": 2.331081081081081, + "grad_norm": 0.5185648798942566, + "learning_rate": 0.0002, + "loss": 1.5912, + "step": 690 + }, + { + "epoch": 2.364864864864865, + "grad_norm": 0.49192842841148376, + "learning_rate": 0.0002, + "loss": 1.6605, + "step": 700 + }, + { + "epoch": 2.3986486486486487, + "grad_norm": 0.5619909763336182, + "learning_rate": 0.0002, + "loss": 1.6688, + "step": 710 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.4932861328125, + "learning_rate": 0.0002, + "loss": 1.7836, + "step": 720 + }, + { + "epoch": 2.4662162162162162, + "grad_norm": 0.5211932063102722, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 730 + }, + { + "epoch": 2.5, + "grad_norm": 0.4138050377368927, + "learning_rate": 0.0002, + "loss": 1.667, + "step": 740 + }, + { + "epoch": 2.5337837837837838, + "grad_norm": 0.4644908010959625, + "learning_rate": 0.0002, + "loss": 1.658, + "step": 750 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.4513227641582489, + "learning_rate": 0.0002, + "loss": 1.6451, + "step": 760 + }, + { + "epoch": 2.6013513513513513, + "grad_norm": 0.4735109508037567, + "learning_rate": 0.0002, + "loss": 1.7071, + "step": 770 + }, + { + "epoch": 2.635135135135135, + "grad_norm": 0.5453559756278992, + "learning_rate": 0.0002, + "loss": 1.6659, + "step": 780 + }, + { + "epoch": 2.668918918918919, + "grad_norm": 0.5422565937042236, + "learning_rate": 0.0002, + "loss": 1.7211, + "step": 790 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4288518726825714, + "learning_rate": 0.0002, + "loss": 1.6623, + "step": 800 + }, + { + "epoch": 2.7364864864864864, + "grad_norm": 0.4085204005241394, + "learning_rate": 0.0002, + "loss": 1.7197, + "step": 810 + }, + { + "epoch": 2.77027027027027, + "grad_norm": 0.49770182371139526, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 820 + }, + { + "epoch": 2.804054054054054, + "grad_norm": 0.5005106329917908, + "learning_rate": 0.0002, + "loss": 1.6332, + "step": 830 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.4763440489768982, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 840 + }, + { + "epoch": 2.8716216216216215, + "grad_norm": 0.44995108246803284, + "learning_rate": 0.0002, + "loss": 1.7149, + "step": 850 + }, + { + "epoch": 2.9054054054054053, + "grad_norm": 0.5299676656723022, + "learning_rate": 0.0002, + "loss": 1.6438, + "step": 860 + }, + { + "epoch": 2.939189189189189, + "grad_norm": 0.49627119302749634, + "learning_rate": 0.0002, + "loss": 1.6457, + "step": 870 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.502545177936554, + "learning_rate": 0.0002, + "loss": 1.6517, + "step": 880 + }, + { + "epoch": 3.0, + "eval_loss": 1.8520468473434448, + "eval_runtime": 70.9917, + "eval_samples_per_second": 7.254, + "eval_steps_per_second": 0.916, + "step": 888 + }, + { + "epoch": 3.0067567567567566, + "grad_norm": 0.4756380319595337, + "learning_rate": 0.0002, + "loss": 1.6271, + "step": 890 + }, + { + "epoch": 3.0405405405405403, + "grad_norm": 0.5167421102523804, + "learning_rate": 0.0002, + "loss": 1.563, + "step": 900 + }, + { + "epoch": 3.074324324324324, + "grad_norm": 0.5524939298629761, + "learning_rate": 0.0002, + "loss": 1.48, + "step": 910 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 0.7045221924781799, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 920 + }, + { + "epoch": 3.141891891891892, + "grad_norm": 0.5692355036735535, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 930 + }, + { + "epoch": 3.175675675675676, + "grad_norm": 0.5467017292976379, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 940 + }, + { + "epoch": 3.2094594594594597, + "grad_norm": 0.6004040241241455, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 950 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.5713295936584473, + "learning_rate": 0.0002, + "loss": 1.5255, + "step": 960 + }, + { + "epoch": 3.277027027027027, + "grad_norm": 0.6054869890213013, + "learning_rate": 0.0002, + "loss": 1.5412, + "step": 970 + }, + { + "epoch": 3.310810810810811, + "grad_norm": 0.6304576992988586, + "learning_rate": 0.0002, + "loss": 1.5167, + "step": 980 + }, + { + "epoch": 3.3445945945945947, + "grad_norm": 0.5347281694412231, + "learning_rate": 0.0002, + "loss": 1.52, + "step": 990 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 0.598211407661438, + "learning_rate": 0.0002, + "loss": 1.5707, + "step": 1000 + }, + { + "epoch": 3.4121621621621623, + "grad_norm": 0.637312650680542, + "learning_rate": 0.0002, + "loss": 1.5243, + "step": 1010 + }, + { + "epoch": 3.445945945945946, + "grad_norm": 0.6092430949211121, + "learning_rate": 0.0002, + "loss": 1.5356, + "step": 1020 + }, + { + "epoch": 3.47972972972973, + "grad_norm": 0.6421037912368774, + "learning_rate": 0.0002, + "loss": 1.5856, + "step": 1030 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 0.6712167263031006, + "learning_rate": 0.0002, + "loss": 1.5553, + "step": 1040 + }, + { + "epoch": 3.5472972972972974, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4708, + "step": 1050 + }, + { + "epoch": 3.581081081081081, + "grad_norm": 1.418167233467102, + "learning_rate": 0.0002, + "loss": 1.5159, + "step": 1060 + }, + { + "epoch": 3.614864864864865, + "grad_norm": 0.6092377305030823, + "learning_rate": 0.0002, + "loss": 1.5264, + "step": 1070 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.5632478594779968, + "learning_rate": 0.0002, + "loss": 1.5227, + "step": 1080 + }, + { + "epoch": 3.6824324324324325, + "grad_norm": 0.6007736921310425, + "learning_rate": 0.0002, + "loss": 1.5492, + "step": 1090 + }, + { + "epoch": 3.7162162162162162, + "grad_norm": 0.6031264066696167, + "learning_rate": 0.0002, + "loss": 1.5002, + "step": 1100 + }, + { + "epoch": 3.75, + "grad_norm": 0.5440598726272583, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 1110 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 0.6304370760917664, + "learning_rate": 0.0002, + "loss": 1.5743, + "step": 1120 + }, + { + "epoch": 3.8175675675675675, + "grad_norm": 0.6729280948638916, + "learning_rate": 0.0002, + "loss": 1.6429, + "step": 1130 + }, + { + "epoch": 3.8513513513513513, + "grad_norm": 0.5881901979446411, + "learning_rate": 0.0002, + "loss": 1.594, + "step": 1140 + }, + { + "epoch": 3.885135135135135, + "grad_norm": 0.5508038997650146, + "learning_rate": 0.0002, + "loss": 1.5008, + "step": 1150 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 0.5926295518875122, + "learning_rate": 0.0002, + "loss": 1.5045, + "step": 1160 + }, + { + "epoch": 3.9527027027027026, + "grad_norm": 0.5882043838500977, + "learning_rate": 0.0002, + "loss": 1.5223, + "step": 1170 + }, + { + "epoch": 3.9864864864864864, + "grad_norm": 0.604119598865509, + "learning_rate": 0.0002, + "loss": 1.5874, + "step": 1180 + }, + { + "epoch": 4.0, + "eval_loss": 1.8923152685165405, + "eval_runtime": 62.9577, + "eval_samples_per_second": 8.18, + "eval_steps_per_second": 1.032, + "step": 1184 + }, + { + "epoch": 4.02027027027027, + "grad_norm": 0.696061909198761, + "learning_rate": 0.0002, + "loss": 1.4183, + "step": 1190 + }, + { + "epoch": 4.054054054054054, + "grad_norm": 0.751200258731842, + "learning_rate": 0.0002, + "loss": 1.3455, + "step": 1200 + }, + { + "epoch": 4.087837837837838, + "grad_norm": 0.9667422771453857, + "learning_rate": 0.0002, + "loss": 1.35, + "step": 1210 + }, + { + "epoch": 4.121621621621622, + "grad_norm": 0.7374204397201538, + "learning_rate": 0.0002, + "loss": 1.4058, + "step": 1220 + }, + { + "epoch": 4.155405405405405, + "grad_norm": 0.8050723075866699, + "learning_rate": 0.0002, + "loss": 1.4454, + "step": 1230 + }, + { + "epoch": 4.1891891891891895, + "grad_norm": 0.7360416054725647, + "learning_rate": 0.0002, + "loss": 1.4129, + "step": 1240 + }, + { + "epoch": 4.222972972972973, + "grad_norm": 0.7947028279304504, + "learning_rate": 0.0002, + "loss": 1.3899, + "step": 1250 + }, + { + "epoch": 4.256756756756757, + "grad_norm": 0.7336545586585999, + "learning_rate": 0.0002, + "loss": 1.4264, + "step": 1260 + }, + { + "epoch": 4.29054054054054, + "grad_norm": 0.7051223516464233, + "learning_rate": 0.0002, + "loss": 1.4047, + "step": 1270 + }, + { + "epoch": 4.324324324324325, + "grad_norm": 0.7939404845237732, + "learning_rate": 0.0002, + "loss": 1.3507, + "step": 1280 + }, + { + "epoch": 4.358108108108108, + "grad_norm": 0.7818657755851746, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 1290 + }, + { + "epoch": 4.391891891891892, + "grad_norm": 0.7490634918212891, + "learning_rate": 0.0002, + "loss": 1.3533, + "step": 1300 + }, + { + "epoch": 4.425675675675675, + "grad_norm": 0.9319770932197571, + "learning_rate": 0.0002, + "loss": 1.3912, + "step": 1310 + }, + { + "epoch": 4.45945945945946, + "grad_norm": 0.7811282873153687, + "learning_rate": 0.0002, + "loss": 1.439, + "step": 1320 + }, + { + "epoch": 4.493243243243243, + "grad_norm": 0.7785378694534302, + "learning_rate": 0.0002, + "loss": 1.3973, + "step": 1330 + }, + { + "epoch": 4.527027027027027, + "grad_norm": 0.8697562217712402, + "learning_rate": 0.0002, + "loss": 1.3931, + "step": 1340 + }, + { + "epoch": 4.5608108108108105, + "grad_norm": 0.7927497625350952, + "learning_rate": 0.0002, + "loss": 1.3846, + "step": 1350 + }, + { + "epoch": 4.594594594594595, + "grad_norm": 0.9746347665786743, + "learning_rate": 0.0002, + "loss": 1.3188, + "step": 1360 + }, + { + "epoch": 4.628378378378378, + "grad_norm": 0.7353375554084778, + "learning_rate": 0.0002, + "loss": 1.4611, + "step": 1370 + }, + { + "epoch": 4.662162162162162, + "grad_norm": 0.8139469027519226, + "learning_rate": 0.0002, + "loss": 1.4067, + "step": 1380 + }, + { + "epoch": 4.695945945945946, + "grad_norm": 1.728020429611206, + "learning_rate": 0.0002, + "loss": 1.3727, + "step": 1390 + }, + { + "epoch": 4.72972972972973, + "grad_norm": 0.8249040246009827, + "learning_rate": 0.0002, + "loss": 1.3971, + "step": 1400 + }, + { + "epoch": 4.763513513513513, + "grad_norm": 0.7916110157966614, + "learning_rate": 0.0002, + "loss": 1.4238, + "step": 1410 + }, + { + "epoch": 4.797297297297297, + "grad_norm": 0.7286198735237122, + "learning_rate": 0.0002, + "loss": 1.4064, + "step": 1420 + }, + { + "epoch": 4.831081081081081, + "grad_norm": 0.7969672083854675, + "learning_rate": 0.0002, + "loss": 1.305, + "step": 1430 + }, + { + "epoch": 4.864864864864865, + "grad_norm": 0.9593119621276855, + "learning_rate": 0.0002, + "loss": 1.4109, + "step": 1440 + }, + { + "epoch": 4.898648648648649, + "grad_norm": 0.8609084486961365, + "learning_rate": 0.0002, + "loss": 1.4112, + "step": 1450 + }, + { + "epoch": 4.9324324324324325, + "grad_norm": 0.705203115940094, + "learning_rate": 0.0002, + "loss": 1.3126, + "step": 1460 + }, + { + "epoch": 4.966216216216216, + "grad_norm": 0.9503173232078552, + "learning_rate": 0.0002, + "loss": 1.4226, + "step": 1470 + }, + { + "epoch": 5.0, + "grad_norm": 0.7174800038337708, + "learning_rate": 0.0002, + "loss": 1.4457, + "step": 1480 + }, + { + "epoch": 5.0, + "eval_loss": 1.9753261804580688, + "eval_runtime": 70.4622, + "eval_samples_per_second": 7.309, + "eval_steps_per_second": 0.922, + "step": 1480 + }, + { + "epoch": 5.033783783783784, + "grad_norm": 1.450723648071289, + "learning_rate": 0.0002, + "loss": 1.2986, + "step": 1490 + }, + { + "epoch": 5.0675675675675675, + "grad_norm": 0.9207791686058044, + "learning_rate": 0.0002, + "loss": 1.2184, + "step": 1500 + }, + { + "epoch": 5.101351351351352, + "grad_norm": 1.0742532014846802, + "learning_rate": 0.0002, + "loss": 1.1628, + "step": 1510 + }, + { + "epoch": 5.135135135135135, + "grad_norm": 1.1070902347564697, + "learning_rate": 0.0002, + "loss": 1.2221, + "step": 1520 + }, + { + "epoch": 5.168918918918919, + "grad_norm": 0.9838612079620361, + "learning_rate": 0.0002, + "loss": 1.1737, + "step": 1530 + }, + { + "epoch": 5.202702702702703, + "grad_norm": 0.9286013245582581, + "learning_rate": 0.0002, + "loss": 1.2095, + "step": 1540 + }, + { + "epoch": 5.236486486486487, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.0002, + "loss": 1.2243, + "step": 1550 + }, + { + "epoch": 5.27027027027027, + "grad_norm": 0.9734522104263306, + "learning_rate": 0.0002, + "loss": 1.1729, + "step": 1560 + }, + { + "epoch": 5.304054054054054, + "grad_norm": 1.1838241815567017, + "learning_rate": 0.0002, + "loss": 1.1528, + "step": 1570 + }, + { + "epoch": 5.337837837837838, + "grad_norm": 1.1389052867889404, + "learning_rate": 0.0002, + "loss": 1.2135, + "step": 1580 + }, + { + "epoch": 5.371621621621622, + "grad_norm": 1.2093408107757568, + "learning_rate": 0.0002, + "loss": 1.2486, + "step": 1590 + }, + { + "epoch": 5.405405405405405, + "grad_norm": 0.9418244361877441, + "learning_rate": 0.0002, + "loss": 1.2017, + "step": 1600 + }, + { + "epoch": 5.4391891891891895, + "grad_norm": 0.9843172430992126, + "learning_rate": 0.0002, + "loss": 1.2554, + "step": 1610 + }, + { + "epoch": 5.472972972972973, + "grad_norm": 1.0316557884216309, + "learning_rate": 0.0002, + "loss": 1.2967, + "step": 1620 + }, + { + "epoch": 5.506756756756757, + "grad_norm": 1.0008920431137085, + "learning_rate": 0.0002, + "loss": 1.2509, + "step": 1630 + }, + { + "epoch": 5.54054054054054, + "grad_norm": 1.1854851245880127, + "learning_rate": 0.0002, + "loss": 1.2908, + "step": 1640 + }, + { + "epoch": 5.574324324324325, + "grad_norm": 0.9324101209640503, + "learning_rate": 0.0002, + "loss": 1.2679, + "step": 1650 + }, + { + "epoch": 5.608108108108108, + "grad_norm": 0.993882954120636, + "learning_rate": 0.0002, + "loss": 1.202, + "step": 1660 + }, + { + "epoch": 5.641891891891892, + "grad_norm": 0.8795919418334961, + "learning_rate": 0.0002, + "loss": 1.2498, + "step": 1670 + }, + { + "epoch": 5.675675675675675, + "grad_norm": 1.203471064567566, + "learning_rate": 0.0002, + "loss": 1.2244, + "step": 1680 + }, + { + "epoch": 5.70945945945946, + "grad_norm": 0.916689932346344, + "learning_rate": 0.0002, + "loss": 1.2452, + "step": 1690 + }, + { + "epoch": 5.743243243243243, + "grad_norm": 0.8567600846290588, + "learning_rate": 0.0002, + "loss": 1.1774, + "step": 1700 + }, + { + "epoch": 5.777027027027027, + "grad_norm": 0.9426271319389343, + "learning_rate": 0.0002, + "loss": 1.2585, + "step": 1710 + }, + { + "epoch": 5.8108108108108105, + "grad_norm": 1.0812019109725952, + "learning_rate": 0.0002, + "loss": 1.239, + "step": 1720 + }, + { + "epoch": 5.844594594594595, + "grad_norm": 1.0045292377471924, + "learning_rate": 0.0002, + "loss": 1.1527, + "step": 1730 + }, + { + "epoch": 5.878378378378378, + "grad_norm": 1.0750256776809692, + "learning_rate": 0.0002, + "loss": 1.2949, + "step": 1740 + }, + { + "epoch": 5.912162162162162, + "grad_norm": 1.0471885204315186, + "learning_rate": 0.0002, + "loss": 1.3052, + "step": 1750 + }, + { + "epoch": 5.945945945945946, + "grad_norm": 0.9119327664375305, + "learning_rate": 0.0002, + "loss": 1.2318, + "step": 1760 + }, + { + "epoch": 5.97972972972973, + "grad_norm": 1.0975338220596313, + "learning_rate": 0.0002, + "loss": 1.2652, + "step": 1770 + }, + { + "epoch": 6.0, + "eval_loss": 2.104356527328491, + "eval_runtime": 71.0586, + "eval_samples_per_second": 7.248, + "eval_steps_per_second": 0.915, + "step": 1776 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.218930337493811e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-1776/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f000b4b5e7f0d4ea9c8289d3471a2d959f115626 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75bdea5989e01471ada637d1c39a801f9c95badfda3739d93b077fb5409ebd75 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b7dcebecb2b9d44756bc8abb6b980ea151b458e --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a28ef90bd56661b445501754ff087e2e643b1b3be7573906a963a69f0c644fd +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9b6e6f75af49b3a3f664cce66951e78b3ea1f9e --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ee17289f1216ec9bf1354b97810ae1e47101e5ba614daeb6eb946b07884767 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0c231b7f07e25a1840f5124a49ff8a47c7b87cd --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0610f8918476b1d351f0177fee60bb0152ca6e8cfd14e3c3690fa7ee6815191 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8a02ee4b2f75681eb2658642bac580deab367db7 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/trainer_state.json @@ -0,0 +1,1538 @@ +{ + "best_metric": 1.83539617061615, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", + "epoch": 7.0, + "eval_steps": 10, + "global_step": 2072, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.3993101716041565, + "learning_rate": 0.0002, + "loss": 1.7254, + "step": 300 + }, + { + "epoch": 1.0472972972972974, + "grad_norm": 0.45567989349365234, + "learning_rate": 0.0002, + "loss": 1.7985, + "step": 310 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.3767794072628021, + "learning_rate": 0.0002, + "loss": 1.757, + "step": 320 + }, + { + "epoch": 1.114864864864865, + "grad_norm": 0.5181908011436462, + "learning_rate": 0.0002, + "loss": 1.7917, + "step": 330 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4213193356990814, + "learning_rate": 0.0002, + "loss": 1.7723, + "step": 340 + }, + { + "epoch": 1.1824324324324325, + "grad_norm": 0.45519495010375977, + "learning_rate": 0.0002, + "loss": 1.8203, + "step": 350 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.35332638025283813, + "learning_rate": 0.0002, + "loss": 1.6946, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 0.3675481677055359, + "learning_rate": 0.0002, + "loss": 1.7541, + "step": 370 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.4569270610809326, + "learning_rate": 0.0002, + "loss": 1.7458, + "step": 380 + }, + { + "epoch": 1.3175675675675675, + "grad_norm": 0.37950295209884644, + "learning_rate": 0.0002, + "loss": 1.7988, + "step": 390 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5744572877883911, + "learning_rate": 0.0002, + "loss": 1.7032, + "step": 400 + }, + { + "epoch": 1.385135135135135, + "grad_norm": 0.44380778074264526, + "learning_rate": 0.0002, + "loss": 1.7437, + "step": 410 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.43328171968460083, + "learning_rate": 0.0002, + "loss": 1.7454, + "step": 420 + }, + { + "epoch": 1.4527027027027026, + "grad_norm": 0.41290056705474854, + "learning_rate": 0.0002, + "loss": 1.7636, + "step": 430 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.3771473169326782, + "learning_rate": 0.0002, + "loss": 1.7332, + "step": 440 + }, + { + "epoch": 1.5202702702702702, + "grad_norm": 0.42537811398506165, + "learning_rate": 0.0002, + "loss": 1.7618, + "step": 450 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.39705610275268555, + "learning_rate": 0.0002, + "loss": 1.8523, + "step": 460 + }, + { + "epoch": 1.5878378378378377, + "grad_norm": 0.4178248643875122, + "learning_rate": 0.0002, + "loss": 1.7673, + "step": 470 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.39107105135917664, + "learning_rate": 0.0002, + "loss": 1.742, + "step": 480 + }, + { + "epoch": 1.6554054054054053, + "grad_norm": 0.38505619764328003, + "learning_rate": 0.0002, + "loss": 1.6984, + "step": 490 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.43590813875198364, + "learning_rate": 0.0002, + "loss": 1.7382, + "step": 500 + }, + { + "epoch": 1.722972972972973, + "grad_norm": 0.42785948514938354, + "learning_rate": 0.0002, + "loss": 1.7139, + "step": 510 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3829004168510437, + "learning_rate": 0.0002, + "loss": 1.7551, + "step": 520 + }, + { + "epoch": 1.7905405405405406, + "grad_norm": 0.35287904739379883, + "learning_rate": 0.0002, + "loss": 1.7744, + "step": 530 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.38657888770103455, + "learning_rate": 0.0002, + "loss": 1.7714, + "step": 540 + }, + { + "epoch": 1.8581081081081081, + "grad_norm": 0.41452157497406006, + "learning_rate": 0.0002, + "loss": 1.7535, + "step": 550 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.3898279070854187, + "learning_rate": 0.0002, + "loss": 1.7627, + "step": 560 + }, + { + "epoch": 1.9256756756756757, + "grad_norm": 0.4147624373435974, + "learning_rate": 0.0002, + "loss": 1.7494, + "step": 570 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.4374851584434509, + "learning_rate": 0.0002, + "loss": 1.7693, + "step": 580 + }, + { + "epoch": 1.9932432432432432, + "grad_norm": 0.48530328273773193, + "learning_rate": 0.0002, + "loss": 1.7796, + "step": 590 + }, + { + "epoch": 2.0, + "eval_loss": 1.83539617061615, + "eval_runtime": 70.9865, + "eval_samples_per_second": 7.255, + "eval_steps_per_second": 0.916, + "step": 592 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.40344223380088806, + "learning_rate": 0.0002, + "loss": 1.7294, + "step": 600 + }, + { + "epoch": 2.060810810810811, + "grad_norm": 0.48268747329711914, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 610 + }, + { + "epoch": 2.0945945945945947, + "grad_norm": 0.4675706923007965, + "learning_rate": 0.0002, + "loss": 1.6315, + "step": 620 + }, + { + "epoch": 2.1283783783783785, + "grad_norm": 0.47494322061538696, + "learning_rate": 0.0002, + "loss": 1.6627, + "step": 630 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.4555308520793915, + "learning_rate": 0.0002, + "loss": 1.5668, + "step": 640 + }, + { + "epoch": 2.195945945945946, + "grad_norm": 0.43085595965385437, + "learning_rate": 0.0002, + "loss": 1.6537, + "step": 650 + }, + { + "epoch": 2.22972972972973, + "grad_norm": 0.4364128112792969, + "learning_rate": 0.0002, + "loss": 1.6316, + "step": 660 + }, + { + "epoch": 2.2635135135135136, + "grad_norm": 0.4711395800113678, + "learning_rate": 0.0002, + "loss": 1.669, + "step": 670 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.5109705328941345, + "learning_rate": 0.0002, + "loss": 1.5758, + "step": 680 + }, + { + "epoch": 2.331081081081081, + "grad_norm": 0.5185648798942566, + "learning_rate": 0.0002, + "loss": 1.5912, + "step": 690 + }, + { + "epoch": 2.364864864864865, + "grad_norm": 0.49192842841148376, + "learning_rate": 0.0002, + "loss": 1.6605, + "step": 700 + }, + { + "epoch": 2.3986486486486487, + "grad_norm": 0.5619909763336182, + "learning_rate": 0.0002, + "loss": 1.6688, + "step": 710 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.4932861328125, + "learning_rate": 0.0002, + "loss": 1.7836, + "step": 720 + }, + { + "epoch": 2.4662162162162162, + "grad_norm": 0.5211932063102722, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 730 + }, + { + "epoch": 2.5, + "grad_norm": 0.4138050377368927, + "learning_rate": 0.0002, + "loss": 1.667, + "step": 740 + }, + { + "epoch": 2.5337837837837838, + "grad_norm": 0.4644908010959625, + "learning_rate": 0.0002, + "loss": 1.658, + "step": 750 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.4513227641582489, + "learning_rate": 0.0002, + "loss": 1.6451, + "step": 760 + }, + { + "epoch": 2.6013513513513513, + "grad_norm": 0.4735109508037567, + "learning_rate": 0.0002, + "loss": 1.7071, + "step": 770 + }, + { + "epoch": 2.635135135135135, + "grad_norm": 0.5453559756278992, + "learning_rate": 0.0002, + "loss": 1.6659, + "step": 780 + }, + { + "epoch": 2.668918918918919, + "grad_norm": 0.5422565937042236, + "learning_rate": 0.0002, + "loss": 1.7211, + "step": 790 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4288518726825714, + "learning_rate": 0.0002, + "loss": 1.6623, + "step": 800 + }, + { + "epoch": 2.7364864864864864, + "grad_norm": 0.4085204005241394, + "learning_rate": 0.0002, + "loss": 1.7197, + "step": 810 + }, + { + "epoch": 2.77027027027027, + "grad_norm": 0.49770182371139526, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 820 + }, + { + "epoch": 2.804054054054054, + "grad_norm": 0.5005106329917908, + "learning_rate": 0.0002, + "loss": 1.6332, + "step": 830 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.4763440489768982, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 840 + }, + { + "epoch": 2.8716216216216215, + "grad_norm": 0.44995108246803284, + "learning_rate": 0.0002, + "loss": 1.7149, + "step": 850 + }, + { + "epoch": 2.9054054054054053, + "grad_norm": 0.5299676656723022, + "learning_rate": 0.0002, + "loss": 1.6438, + "step": 860 + }, + { + "epoch": 2.939189189189189, + "grad_norm": 0.49627119302749634, + "learning_rate": 0.0002, + "loss": 1.6457, + "step": 870 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.502545177936554, + "learning_rate": 0.0002, + "loss": 1.6517, + "step": 880 + }, + { + "epoch": 3.0, + "eval_loss": 1.8520468473434448, + "eval_runtime": 70.9917, + "eval_samples_per_second": 7.254, + "eval_steps_per_second": 0.916, + "step": 888 + }, + { + "epoch": 3.0067567567567566, + "grad_norm": 0.4756380319595337, + "learning_rate": 0.0002, + "loss": 1.6271, + "step": 890 + }, + { + "epoch": 3.0405405405405403, + "grad_norm": 0.5167421102523804, + "learning_rate": 0.0002, + "loss": 1.563, + "step": 900 + }, + { + "epoch": 3.074324324324324, + "grad_norm": 0.5524939298629761, + "learning_rate": 0.0002, + "loss": 1.48, + "step": 910 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 0.7045221924781799, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 920 + }, + { + "epoch": 3.141891891891892, + "grad_norm": 0.5692355036735535, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 930 + }, + { + "epoch": 3.175675675675676, + "grad_norm": 0.5467017292976379, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 940 + }, + { + "epoch": 3.2094594594594597, + "grad_norm": 0.6004040241241455, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 950 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.5713295936584473, + "learning_rate": 0.0002, + "loss": 1.5255, + "step": 960 + }, + { + "epoch": 3.277027027027027, + "grad_norm": 0.6054869890213013, + "learning_rate": 0.0002, + "loss": 1.5412, + "step": 970 + }, + { + "epoch": 3.310810810810811, + "grad_norm": 0.6304576992988586, + "learning_rate": 0.0002, + "loss": 1.5167, + "step": 980 + }, + { + "epoch": 3.3445945945945947, + "grad_norm": 0.5347281694412231, + "learning_rate": 0.0002, + "loss": 1.52, + "step": 990 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 0.598211407661438, + "learning_rate": 0.0002, + "loss": 1.5707, + "step": 1000 + }, + { + "epoch": 3.4121621621621623, + "grad_norm": 0.637312650680542, + "learning_rate": 0.0002, + "loss": 1.5243, + "step": 1010 + }, + { + "epoch": 3.445945945945946, + "grad_norm": 0.6092430949211121, + "learning_rate": 0.0002, + "loss": 1.5356, + "step": 1020 + }, + { + "epoch": 3.47972972972973, + "grad_norm": 0.6421037912368774, + "learning_rate": 0.0002, + "loss": 1.5856, + "step": 1030 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 0.6712167263031006, + "learning_rate": 0.0002, + "loss": 1.5553, + "step": 1040 + }, + { + "epoch": 3.5472972972972974, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4708, + "step": 1050 + }, + { + "epoch": 3.581081081081081, + "grad_norm": 1.418167233467102, + "learning_rate": 0.0002, + "loss": 1.5159, + "step": 1060 + }, + { + "epoch": 3.614864864864865, + "grad_norm": 0.6092377305030823, + "learning_rate": 0.0002, + "loss": 1.5264, + "step": 1070 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.5632478594779968, + "learning_rate": 0.0002, + "loss": 1.5227, + "step": 1080 + }, + { + "epoch": 3.6824324324324325, + "grad_norm": 0.6007736921310425, + "learning_rate": 0.0002, + "loss": 1.5492, + "step": 1090 + }, + { + "epoch": 3.7162162162162162, + "grad_norm": 0.6031264066696167, + "learning_rate": 0.0002, + "loss": 1.5002, + "step": 1100 + }, + { + "epoch": 3.75, + "grad_norm": 0.5440598726272583, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 1110 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 0.6304370760917664, + "learning_rate": 0.0002, + "loss": 1.5743, + "step": 1120 + }, + { + "epoch": 3.8175675675675675, + "grad_norm": 0.6729280948638916, + "learning_rate": 0.0002, + "loss": 1.6429, + "step": 1130 + }, + { + "epoch": 3.8513513513513513, + "grad_norm": 0.5881901979446411, + "learning_rate": 0.0002, + "loss": 1.594, + "step": 1140 + }, + { + "epoch": 3.885135135135135, + "grad_norm": 0.5508038997650146, + "learning_rate": 0.0002, + "loss": 1.5008, + "step": 1150 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 0.5926295518875122, + "learning_rate": 0.0002, + "loss": 1.5045, + "step": 1160 + }, + { + "epoch": 3.9527027027027026, + "grad_norm": 0.5882043838500977, + "learning_rate": 0.0002, + "loss": 1.5223, + "step": 1170 + }, + { + "epoch": 3.9864864864864864, + "grad_norm": 0.604119598865509, + "learning_rate": 0.0002, + "loss": 1.5874, + "step": 1180 + }, + { + "epoch": 4.0, + "eval_loss": 1.8923152685165405, + "eval_runtime": 62.9577, + "eval_samples_per_second": 8.18, + "eval_steps_per_second": 1.032, + "step": 1184 + }, + { + "epoch": 4.02027027027027, + "grad_norm": 0.696061909198761, + "learning_rate": 0.0002, + "loss": 1.4183, + "step": 1190 + }, + { + "epoch": 4.054054054054054, + "grad_norm": 0.751200258731842, + "learning_rate": 0.0002, + "loss": 1.3455, + "step": 1200 + }, + { + "epoch": 4.087837837837838, + "grad_norm": 0.9667422771453857, + "learning_rate": 0.0002, + "loss": 1.35, + "step": 1210 + }, + { + "epoch": 4.121621621621622, + "grad_norm": 0.7374204397201538, + "learning_rate": 0.0002, + "loss": 1.4058, + "step": 1220 + }, + { + "epoch": 4.155405405405405, + "grad_norm": 0.8050723075866699, + "learning_rate": 0.0002, + "loss": 1.4454, + "step": 1230 + }, + { + "epoch": 4.1891891891891895, + "grad_norm": 0.7360416054725647, + "learning_rate": 0.0002, + "loss": 1.4129, + "step": 1240 + }, + { + "epoch": 4.222972972972973, + "grad_norm": 0.7947028279304504, + "learning_rate": 0.0002, + "loss": 1.3899, + "step": 1250 + }, + { + "epoch": 4.256756756756757, + "grad_norm": 0.7336545586585999, + "learning_rate": 0.0002, + "loss": 1.4264, + "step": 1260 + }, + { + "epoch": 4.29054054054054, + "grad_norm": 0.7051223516464233, + "learning_rate": 0.0002, + "loss": 1.4047, + "step": 1270 + }, + { + "epoch": 4.324324324324325, + "grad_norm": 0.7939404845237732, + "learning_rate": 0.0002, + "loss": 1.3507, + "step": 1280 + }, + { + "epoch": 4.358108108108108, + "grad_norm": 0.7818657755851746, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 1290 + }, + { + "epoch": 4.391891891891892, + "grad_norm": 0.7490634918212891, + "learning_rate": 0.0002, + "loss": 1.3533, + "step": 1300 + }, + { + "epoch": 4.425675675675675, + "grad_norm": 0.9319770932197571, + "learning_rate": 0.0002, + "loss": 1.3912, + "step": 1310 + }, + { + "epoch": 4.45945945945946, + "grad_norm": 0.7811282873153687, + "learning_rate": 0.0002, + "loss": 1.439, + "step": 1320 + }, + { + "epoch": 4.493243243243243, + "grad_norm": 0.7785378694534302, + "learning_rate": 0.0002, + "loss": 1.3973, + "step": 1330 + }, + { + "epoch": 4.527027027027027, + "grad_norm": 0.8697562217712402, + "learning_rate": 0.0002, + "loss": 1.3931, + "step": 1340 + }, + { + "epoch": 4.5608108108108105, + "grad_norm": 0.7927497625350952, + "learning_rate": 0.0002, + "loss": 1.3846, + "step": 1350 + }, + { + "epoch": 4.594594594594595, + "grad_norm": 0.9746347665786743, + "learning_rate": 0.0002, + "loss": 1.3188, + "step": 1360 + }, + { + "epoch": 4.628378378378378, + "grad_norm": 0.7353375554084778, + "learning_rate": 0.0002, + "loss": 1.4611, + "step": 1370 + }, + { + "epoch": 4.662162162162162, + "grad_norm": 0.8139469027519226, + "learning_rate": 0.0002, + "loss": 1.4067, + "step": 1380 + }, + { + "epoch": 4.695945945945946, + "grad_norm": 1.728020429611206, + "learning_rate": 0.0002, + "loss": 1.3727, + "step": 1390 + }, + { + "epoch": 4.72972972972973, + "grad_norm": 0.8249040246009827, + "learning_rate": 0.0002, + "loss": 1.3971, + "step": 1400 + }, + { + "epoch": 4.763513513513513, + "grad_norm": 0.7916110157966614, + "learning_rate": 0.0002, + "loss": 1.4238, + "step": 1410 + }, + { + "epoch": 4.797297297297297, + "grad_norm": 0.7286198735237122, + "learning_rate": 0.0002, + "loss": 1.4064, + "step": 1420 + }, + { + "epoch": 4.831081081081081, + "grad_norm": 0.7969672083854675, + "learning_rate": 0.0002, + "loss": 1.305, + "step": 1430 + }, + { + "epoch": 4.864864864864865, + "grad_norm": 0.9593119621276855, + "learning_rate": 0.0002, + "loss": 1.4109, + "step": 1440 + }, + { + "epoch": 4.898648648648649, + "grad_norm": 0.8609084486961365, + "learning_rate": 0.0002, + "loss": 1.4112, + "step": 1450 + }, + { + "epoch": 4.9324324324324325, + "grad_norm": 0.705203115940094, + "learning_rate": 0.0002, + "loss": 1.3126, + "step": 1460 + }, + { + "epoch": 4.966216216216216, + "grad_norm": 0.9503173232078552, + "learning_rate": 0.0002, + "loss": 1.4226, + "step": 1470 + }, + { + "epoch": 5.0, + "grad_norm": 0.7174800038337708, + "learning_rate": 0.0002, + "loss": 1.4457, + "step": 1480 + }, + { + "epoch": 5.0, + "eval_loss": 1.9753261804580688, + "eval_runtime": 70.4622, + "eval_samples_per_second": 7.309, + "eval_steps_per_second": 0.922, + "step": 1480 + }, + { + "epoch": 5.033783783783784, + "grad_norm": 1.450723648071289, + "learning_rate": 0.0002, + "loss": 1.2986, + "step": 1490 + }, + { + "epoch": 5.0675675675675675, + "grad_norm": 0.9207791686058044, + "learning_rate": 0.0002, + "loss": 1.2184, + "step": 1500 + }, + { + "epoch": 5.101351351351352, + "grad_norm": 1.0742532014846802, + "learning_rate": 0.0002, + "loss": 1.1628, + "step": 1510 + }, + { + "epoch": 5.135135135135135, + "grad_norm": 1.1070902347564697, + "learning_rate": 0.0002, + "loss": 1.2221, + "step": 1520 + }, + { + "epoch": 5.168918918918919, + "grad_norm": 0.9838612079620361, + "learning_rate": 0.0002, + "loss": 1.1737, + "step": 1530 + }, + { + "epoch": 5.202702702702703, + "grad_norm": 0.9286013245582581, + "learning_rate": 0.0002, + "loss": 1.2095, + "step": 1540 + }, + { + "epoch": 5.236486486486487, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.0002, + "loss": 1.2243, + "step": 1550 + }, + { + "epoch": 5.27027027027027, + "grad_norm": 0.9734522104263306, + "learning_rate": 0.0002, + "loss": 1.1729, + "step": 1560 + }, + { + "epoch": 5.304054054054054, + "grad_norm": 1.1838241815567017, + "learning_rate": 0.0002, + "loss": 1.1528, + "step": 1570 + }, + { + "epoch": 5.337837837837838, + "grad_norm": 1.1389052867889404, + "learning_rate": 0.0002, + "loss": 1.2135, + "step": 1580 + }, + { + "epoch": 5.371621621621622, + "grad_norm": 1.2093408107757568, + "learning_rate": 0.0002, + "loss": 1.2486, + "step": 1590 + }, + { + "epoch": 5.405405405405405, + "grad_norm": 0.9418244361877441, + "learning_rate": 0.0002, + "loss": 1.2017, + "step": 1600 + }, + { + "epoch": 5.4391891891891895, + "grad_norm": 0.9843172430992126, + "learning_rate": 0.0002, + "loss": 1.2554, + "step": 1610 + }, + { + "epoch": 5.472972972972973, + "grad_norm": 1.0316557884216309, + "learning_rate": 0.0002, + "loss": 1.2967, + "step": 1620 + }, + { + "epoch": 5.506756756756757, + "grad_norm": 1.0008920431137085, + "learning_rate": 0.0002, + "loss": 1.2509, + "step": 1630 + }, + { + "epoch": 5.54054054054054, + "grad_norm": 1.1854851245880127, + "learning_rate": 0.0002, + "loss": 1.2908, + "step": 1640 + }, + { + "epoch": 5.574324324324325, + "grad_norm": 0.9324101209640503, + "learning_rate": 0.0002, + "loss": 1.2679, + "step": 1650 + }, + { + "epoch": 5.608108108108108, + "grad_norm": 0.993882954120636, + "learning_rate": 0.0002, + "loss": 1.202, + "step": 1660 + }, + { + "epoch": 5.641891891891892, + "grad_norm": 0.8795919418334961, + "learning_rate": 0.0002, + "loss": 1.2498, + "step": 1670 + }, + { + "epoch": 5.675675675675675, + "grad_norm": 1.203471064567566, + "learning_rate": 0.0002, + "loss": 1.2244, + "step": 1680 + }, + { + "epoch": 5.70945945945946, + "grad_norm": 0.916689932346344, + "learning_rate": 0.0002, + "loss": 1.2452, + "step": 1690 + }, + { + "epoch": 5.743243243243243, + "grad_norm": 0.8567600846290588, + "learning_rate": 0.0002, + "loss": 1.1774, + "step": 1700 + }, + { + "epoch": 5.777027027027027, + "grad_norm": 0.9426271319389343, + "learning_rate": 0.0002, + "loss": 1.2585, + "step": 1710 + }, + { + "epoch": 5.8108108108108105, + "grad_norm": 1.0812019109725952, + "learning_rate": 0.0002, + "loss": 1.239, + "step": 1720 + }, + { + "epoch": 5.844594594594595, + "grad_norm": 1.0045292377471924, + "learning_rate": 0.0002, + "loss": 1.1527, + "step": 1730 + }, + { + "epoch": 5.878378378378378, + "grad_norm": 1.0750256776809692, + "learning_rate": 0.0002, + "loss": 1.2949, + "step": 1740 + }, + { + "epoch": 5.912162162162162, + "grad_norm": 1.0471885204315186, + "learning_rate": 0.0002, + "loss": 1.3052, + "step": 1750 + }, + { + "epoch": 5.945945945945946, + "grad_norm": 0.9119327664375305, + "learning_rate": 0.0002, + "loss": 1.2318, + "step": 1760 + }, + { + "epoch": 5.97972972972973, + "grad_norm": 1.0975338220596313, + "learning_rate": 0.0002, + "loss": 1.2652, + "step": 1770 + }, + { + "epoch": 6.0, + "eval_loss": 2.104356527328491, + "eval_runtime": 71.0586, + "eval_samples_per_second": 7.248, + "eval_steps_per_second": 0.915, + "step": 1776 + }, + { + "epoch": 6.013513513513513, + "grad_norm": 0.9404756426811218, + "learning_rate": 0.0002, + "loss": 1.1342, + "step": 1780 + }, + { + "epoch": 6.047297297297297, + "grad_norm": 1.3757696151733398, + "learning_rate": 0.0002, + "loss": 0.9892, + "step": 1790 + }, + { + "epoch": 6.081081081081081, + "grad_norm": 1.5798641443252563, + "learning_rate": 0.0002, + "loss": 1.0826, + "step": 1800 + }, + { + "epoch": 6.114864864864865, + "grad_norm": 1.3777581453323364, + "learning_rate": 0.0002, + "loss": 0.9929, + "step": 1810 + }, + { + "epoch": 6.148648648648648, + "grad_norm": 1.136362910270691, + "learning_rate": 0.0002, + "loss": 1.0257, + "step": 1820 + }, + { + "epoch": 6.1824324324324325, + "grad_norm": 1.3719290494918823, + "learning_rate": 0.0002, + "loss": 1.0623, + "step": 1830 + }, + { + "epoch": 6.216216216216216, + "grad_norm": 1.375697374343872, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 1840 + }, + { + "epoch": 6.25, + "grad_norm": 1.3208998441696167, + "learning_rate": 0.0002, + "loss": 1.0307, + "step": 1850 + }, + { + "epoch": 6.283783783783784, + "grad_norm": 1.3176994323730469, + "learning_rate": 0.0002, + "loss": 1.0298, + "step": 1860 + }, + { + "epoch": 6.3175675675675675, + "grad_norm": 1.3333075046539307, + "learning_rate": 0.0002, + "loss": 1.0809, + "step": 1870 + }, + { + "epoch": 6.351351351351352, + "grad_norm": 1.6315182447433472, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 1880 + }, + { + "epoch": 6.385135135135135, + "grad_norm": 1.1802350282669067, + "learning_rate": 0.0002, + "loss": 1.0139, + "step": 1890 + }, + { + "epoch": 6.418918918918919, + "grad_norm": 1.0628817081451416, + "learning_rate": 0.0002, + "loss": 1.07, + "step": 1900 + }, + { + "epoch": 6.452702702702703, + "grad_norm": 1.3136482238769531, + "learning_rate": 0.0002, + "loss": 1.0916, + "step": 1910 + }, + { + "epoch": 6.486486486486487, + "grad_norm": 1.4804624319076538, + "learning_rate": 0.0002, + "loss": 1.0453, + "step": 1920 + }, + { + "epoch": 6.52027027027027, + "grad_norm": 1.1129399538040161, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 1930 + }, + { + "epoch": 6.554054054054054, + "grad_norm": 1.324576497077942, + "learning_rate": 0.0002, + "loss": 1.0645, + "step": 1940 + }, + { + "epoch": 6.587837837837838, + "grad_norm": 1.3321561813354492, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 1950 + }, + { + "epoch": 6.621621621621622, + "grad_norm": 1.2377620935440063, + "learning_rate": 0.0002, + "loss": 1.027, + "step": 1960 + }, + { + "epoch": 6.655405405405405, + "grad_norm": 1.1174288988113403, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 1970 + }, + { + "epoch": 6.6891891891891895, + "grad_norm": 1.2291412353515625, + "learning_rate": 0.0002, + "loss": 1.1074, + "step": 1980 + }, + { + "epoch": 6.722972972972973, + "grad_norm": 1.2079328298568726, + "learning_rate": 0.0002, + "loss": 1.101, + "step": 1990 + }, + { + "epoch": 6.756756756756757, + "grad_norm": 1.125183343887329, + "learning_rate": 0.0002, + "loss": 1.1603, + "step": 2000 + }, + { + "epoch": 6.79054054054054, + "grad_norm": 1.1737638711929321, + "learning_rate": 0.0002, + "loss": 1.008, + "step": 2010 + }, + { + "epoch": 6.824324324324325, + "grad_norm": 1.3917324542999268, + "learning_rate": 0.0002, + "loss": 1.1211, + "step": 2020 + }, + { + "epoch": 6.858108108108108, + "grad_norm": 1.1063282489776611, + "learning_rate": 0.0002, + "loss": 1.1436, + "step": 2030 + }, + { + "epoch": 6.891891891891892, + "grad_norm": 1.2951769828796387, + "learning_rate": 0.0002, + "loss": 1.0888, + "step": 2040 + }, + { + "epoch": 6.925675675675675, + "grad_norm": 1.2272734642028809, + "learning_rate": 0.0002, + "loss": 1.153, + "step": 2050 + }, + { + "epoch": 6.95945945945946, + "grad_norm": 1.5298433303833008, + "learning_rate": 0.0002, + "loss": 1.1156, + "step": 2060 + }, + { + "epoch": 6.993243243243243, + "grad_norm": 1.1478265523910522, + "learning_rate": 0.0002, + "loss": 1.1214, + "step": 2070 + }, + { + "epoch": 7.0, + "eval_loss": 2.238886594772339, + "eval_runtime": 62.8821, + "eval_samples_per_second": 8.19, + "eval_steps_per_second": 1.034, + "step": 2072 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.588752060409446e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2072/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6011bc3fc6bfcd608285a3a7de0ffb9a3ada6263 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708eaeb3f5942f8e9e3d82fb548f2777483fb39f3e0a2276156e7a9c512c5fbd +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a317d53ee810115b933ad49de47d01c4ffb3f592 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab25170e22dc72e46aac358dce5161d250e28bbaa73f7c45d62b662032c14d46 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1991be6f7e481c0265453663e6a66edb5145ba59 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:390c4b62d183660743f8fbf4bc57e1ab0acbb9a235ebb6836e364214f347c312 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2410aaf1e9ce33ab5a2e5af6bd7cce5dfe90c8ac --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7cb725423554e22087f3d3ffc6c2cc1410e42a4b38f40fba21230b96dc4db9 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..93f706541927097b1f211280f5330458d1ab630d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/trainer_state.json @@ -0,0 +1,1749 @@ +{ + "best_metric": 1.83539617061615, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", + "epoch": 8.0, + "eval_steps": 10, + "global_step": 2368, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.3993101716041565, + "learning_rate": 0.0002, + "loss": 1.7254, + "step": 300 + }, + { + "epoch": 1.0472972972972974, + "grad_norm": 0.45567989349365234, + "learning_rate": 0.0002, + "loss": 1.7985, + "step": 310 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.3767794072628021, + "learning_rate": 0.0002, + "loss": 1.757, + "step": 320 + }, + { + "epoch": 1.114864864864865, + "grad_norm": 0.5181908011436462, + "learning_rate": 0.0002, + "loss": 1.7917, + "step": 330 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4213193356990814, + "learning_rate": 0.0002, + "loss": 1.7723, + "step": 340 + }, + { + "epoch": 1.1824324324324325, + "grad_norm": 0.45519495010375977, + "learning_rate": 0.0002, + "loss": 1.8203, + "step": 350 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.35332638025283813, + "learning_rate": 0.0002, + "loss": 1.6946, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 0.3675481677055359, + "learning_rate": 0.0002, + "loss": 1.7541, + "step": 370 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.4569270610809326, + "learning_rate": 0.0002, + "loss": 1.7458, + "step": 380 + }, + { + "epoch": 1.3175675675675675, + "grad_norm": 0.37950295209884644, + "learning_rate": 0.0002, + "loss": 1.7988, + "step": 390 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5744572877883911, + "learning_rate": 0.0002, + "loss": 1.7032, + "step": 400 + }, + { + "epoch": 1.385135135135135, + "grad_norm": 0.44380778074264526, + "learning_rate": 0.0002, + "loss": 1.7437, + "step": 410 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.43328171968460083, + "learning_rate": 0.0002, + "loss": 1.7454, + "step": 420 + }, + { + "epoch": 1.4527027027027026, + "grad_norm": 0.41290056705474854, + "learning_rate": 0.0002, + "loss": 1.7636, + "step": 430 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.3771473169326782, + "learning_rate": 0.0002, + "loss": 1.7332, + "step": 440 + }, + { + "epoch": 1.5202702702702702, + "grad_norm": 0.42537811398506165, + "learning_rate": 0.0002, + "loss": 1.7618, + "step": 450 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.39705610275268555, + "learning_rate": 0.0002, + "loss": 1.8523, + "step": 460 + }, + { + "epoch": 1.5878378378378377, + "grad_norm": 0.4178248643875122, + "learning_rate": 0.0002, + "loss": 1.7673, + "step": 470 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.39107105135917664, + "learning_rate": 0.0002, + "loss": 1.742, + "step": 480 + }, + { + "epoch": 1.6554054054054053, + "grad_norm": 0.38505619764328003, + "learning_rate": 0.0002, + "loss": 1.6984, + "step": 490 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.43590813875198364, + "learning_rate": 0.0002, + "loss": 1.7382, + "step": 500 + }, + { + "epoch": 1.722972972972973, + "grad_norm": 0.42785948514938354, + "learning_rate": 0.0002, + "loss": 1.7139, + "step": 510 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3829004168510437, + "learning_rate": 0.0002, + "loss": 1.7551, + "step": 520 + }, + { + "epoch": 1.7905405405405406, + "grad_norm": 0.35287904739379883, + "learning_rate": 0.0002, + "loss": 1.7744, + "step": 530 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.38657888770103455, + "learning_rate": 0.0002, + "loss": 1.7714, + "step": 540 + }, + { + "epoch": 1.8581081081081081, + "grad_norm": 0.41452157497406006, + "learning_rate": 0.0002, + "loss": 1.7535, + "step": 550 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.3898279070854187, + "learning_rate": 0.0002, + "loss": 1.7627, + "step": 560 + }, + { + "epoch": 1.9256756756756757, + "grad_norm": 0.4147624373435974, + "learning_rate": 0.0002, + "loss": 1.7494, + "step": 570 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.4374851584434509, + "learning_rate": 0.0002, + "loss": 1.7693, + "step": 580 + }, + { + "epoch": 1.9932432432432432, + "grad_norm": 0.48530328273773193, + "learning_rate": 0.0002, + "loss": 1.7796, + "step": 590 + }, + { + "epoch": 2.0, + "eval_loss": 1.83539617061615, + "eval_runtime": 70.9865, + "eval_samples_per_second": 7.255, + "eval_steps_per_second": 0.916, + "step": 592 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.40344223380088806, + "learning_rate": 0.0002, + "loss": 1.7294, + "step": 600 + }, + { + "epoch": 2.060810810810811, + "grad_norm": 0.48268747329711914, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 610 + }, + { + "epoch": 2.0945945945945947, + "grad_norm": 0.4675706923007965, + "learning_rate": 0.0002, + "loss": 1.6315, + "step": 620 + }, + { + "epoch": 2.1283783783783785, + "grad_norm": 0.47494322061538696, + "learning_rate": 0.0002, + "loss": 1.6627, + "step": 630 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.4555308520793915, + "learning_rate": 0.0002, + "loss": 1.5668, + "step": 640 + }, + { + "epoch": 2.195945945945946, + "grad_norm": 0.43085595965385437, + "learning_rate": 0.0002, + "loss": 1.6537, + "step": 650 + }, + { + "epoch": 2.22972972972973, + "grad_norm": 0.4364128112792969, + "learning_rate": 0.0002, + "loss": 1.6316, + "step": 660 + }, + { + "epoch": 2.2635135135135136, + "grad_norm": 0.4711395800113678, + "learning_rate": 0.0002, + "loss": 1.669, + "step": 670 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.5109705328941345, + "learning_rate": 0.0002, + "loss": 1.5758, + "step": 680 + }, + { + "epoch": 2.331081081081081, + "grad_norm": 0.5185648798942566, + "learning_rate": 0.0002, + "loss": 1.5912, + "step": 690 + }, + { + "epoch": 2.364864864864865, + "grad_norm": 0.49192842841148376, + "learning_rate": 0.0002, + "loss": 1.6605, + "step": 700 + }, + { + "epoch": 2.3986486486486487, + "grad_norm": 0.5619909763336182, + "learning_rate": 0.0002, + "loss": 1.6688, + "step": 710 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.4932861328125, + "learning_rate": 0.0002, + "loss": 1.7836, + "step": 720 + }, + { + "epoch": 2.4662162162162162, + "grad_norm": 0.5211932063102722, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 730 + }, + { + "epoch": 2.5, + "grad_norm": 0.4138050377368927, + "learning_rate": 0.0002, + "loss": 1.667, + "step": 740 + }, + { + "epoch": 2.5337837837837838, + "grad_norm": 0.4644908010959625, + "learning_rate": 0.0002, + "loss": 1.658, + "step": 750 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.4513227641582489, + "learning_rate": 0.0002, + "loss": 1.6451, + "step": 760 + }, + { + "epoch": 2.6013513513513513, + "grad_norm": 0.4735109508037567, + "learning_rate": 0.0002, + "loss": 1.7071, + "step": 770 + }, + { + "epoch": 2.635135135135135, + "grad_norm": 0.5453559756278992, + "learning_rate": 0.0002, + "loss": 1.6659, + "step": 780 + }, + { + "epoch": 2.668918918918919, + "grad_norm": 0.5422565937042236, + "learning_rate": 0.0002, + "loss": 1.7211, + "step": 790 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4288518726825714, + "learning_rate": 0.0002, + "loss": 1.6623, + "step": 800 + }, + { + "epoch": 2.7364864864864864, + "grad_norm": 0.4085204005241394, + "learning_rate": 0.0002, + "loss": 1.7197, + "step": 810 + }, + { + "epoch": 2.77027027027027, + "grad_norm": 0.49770182371139526, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 820 + }, + { + "epoch": 2.804054054054054, + "grad_norm": 0.5005106329917908, + "learning_rate": 0.0002, + "loss": 1.6332, + "step": 830 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.4763440489768982, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 840 + }, + { + "epoch": 2.8716216216216215, + "grad_norm": 0.44995108246803284, + "learning_rate": 0.0002, + "loss": 1.7149, + "step": 850 + }, + { + "epoch": 2.9054054054054053, + "grad_norm": 0.5299676656723022, + "learning_rate": 0.0002, + "loss": 1.6438, + "step": 860 + }, + { + "epoch": 2.939189189189189, + "grad_norm": 0.49627119302749634, + "learning_rate": 0.0002, + "loss": 1.6457, + "step": 870 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.502545177936554, + "learning_rate": 0.0002, + "loss": 1.6517, + "step": 880 + }, + { + "epoch": 3.0, + "eval_loss": 1.8520468473434448, + "eval_runtime": 70.9917, + "eval_samples_per_second": 7.254, + "eval_steps_per_second": 0.916, + "step": 888 + }, + { + "epoch": 3.0067567567567566, + "grad_norm": 0.4756380319595337, + "learning_rate": 0.0002, + "loss": 1.6271, + "step": 890 + }, + { + "epoch": 3.0405405405405403, + "grad_norm": 0.5167421102523804, + "learning_rate": 0.0002, + "loss": 1.563, + "step": 900 + }, + { + "epoch": 3.074324324324324, + "grad_norm": 0.5524939298629761, + "learning_rate": 0.0002, + "loss": 1.48, + "step": 910 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 0.7045221924781799, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 920 + }, + { + "epoch": 3.141891891891892, + "grad_norm": 0.5692355036735535, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 930 + }, + { + "epoch": 3.175675675675676, + "grad_norm": 0.5467017292976379, + "learning_rate": 0.0002, + "loss": 1.5297, + "step": 940 + }, + { + "epoch": 3.2094594594594597, + "grad_norm": 0.6004040241241455, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 950 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.5713295936584473, + "learning_rate": 0.0002, + "loss": 1.5255, + "step": 960 + }, + { + "epoch": 3.277027027027027, + "grad_norm": 0.6054869890213013, + "learning_rate": 0.0002, + "loss": 1.5412, + "step": 970 + }, + { + "epoch": 3.310810810810811, + "grad_norm": 0.6304576992988586, + "learning_rate": 0.0002, + "loss": 1.5167, + "step": 980 + }, + { + "epoch": 3.3445945945945947, + "grad_norm": 0.5347281694412231, + "learning_rate": 0.0002, + "loss": 1.52, + "step": 990 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 0.598211407661438, + "learning_rate": 0.0002, + "loss": 1.5707, + "step": 1000 + }, + { + "epoch": 3.4121621621621623, + "grad_norm": 0.637312650680542, + "learning_rate": 0.0002, + "loss": 1.5243, + "step": 1010 + }, + { + "epoch": 3.445945945945946, + "grad_norm": 0.6092430949211121, + "learning_rate": 0.0002, + "loss": 1.5356, + "step": 1020 + }, + { + "epoch": 3.47972972972973, + "grad_norm": 0.6421037912368774, + "learning_rate": 0.0002, + "loss": 1.5856, + "step": 1030 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 0.6712167263031006, + "learning_rate": 0.0002, + "loss": 1.5553, + "step": 1040 + }, + { + "epoch": 3.5472972972972974, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4708, + "step": 1050 + }, + { + "epoch": 3.581081081081081, + "grad_norm": 1.418167233467102, + "learning_rate": 0.0002, + "loss": 1.5159, + "step": 1060 + }, + { + "epoch": 3.614864864864865, + "grad_norm": 0.6092377305030823, + "learning_rate": 0.0002, + "loss": 1.5264, + "step": 1070 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.5632478594779968, + "learning_rate": 0.0002, + "loss": 1.5227, + "step": 1080 + }, + { + "epoch": 3.6824324324324325, + "grad_norm": 0.6007736921310425, + "learning_rate": 0.0002, + "loss": 1.5492, + "step": 1090 + }, + { + "epoch": 3.7162162162162162, + "grad_norm": 0.6031264066696167, + "learning_rate": 0.0002, + "loss": 1.5002, + "step": 1100 + }, + { + "epoch": 3.75, + "grad_norm": 0.5440598726272583, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 1110 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 0.6304370760917664, + "learning_rate": 0.0002, + "loss": 1.5743, + "step": 1120 + }, + { + "epoch": 3.8175675675675675, + "grad_norm": 0.6729280948638916, + "learning_rate": 0.0002, + "loss": 1.6429, + "step": 1130 + }, + { + "epoch": 3.8513513513513513, + "grad_norm": 0.5881901979446411, + "learning_rate": 0.0002, + "loss": 1.594, + "step": 1140 + }, + { + "epoch": 3.885135135135135, + "grad_norm": 0.5508038997650146, + "learning_rate": 0.0002, + "loss": 1.5008, + "step": 1150 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 0.5926295518875122, + "learning_rate": 0.0002, + "loss": 1.5045, + "step": 1160 + }, + { + "epoch": 3.9527027027027026, + "grad_norm": 0.5882043838500977, + "learning_rate": 0.0002, + "loss": 1.5223, + "step": 1170 + }, + { + "epoch": 3.9864864864864864, + "grad_norm": 0.604119598865509, + "learning_rate": 0.0002, + "loss": 1.5874, + "step": 1180 + }, + { + "epoch": 4.0, + "eval_loss": 1.8923152685165405, + "eval_runtime": 62.9577, + "eval_samples_per_second": 8.18, + "eval_steps_per_second": 1.032, + "step": 1184 + }, + { + "epoch": 4.02027027027027, + "grad_norm": 0.696061909198761, + "learning_rate": 0.0002, + "loss": 1.4183, + "step": 1190 + }, + { + "epoch": 4.054054054054054, + "grad_norm": 0.751200258731842, + "learning_rate": 0.0002, + "loss": 1.3455, + "step": 1200 + }, + { + "epoch": 4.087837837837838, + "grad_norm": 0.9667422771453857, + "learning_rate": 0.0002, + "loss": 1.35, + "step": 1210 + }, + { + "epoch": 4.121621621621622, + "grad_norm": 0.7374204397201538, + "learning_rate": 0.0002, + "loss": 1.4058, + "step": 1220 + }, + { + "epoch": 4.155405405405405, + "grad_norm": 0.8050723075866699, + "learning_rate": 0.0002, + "loss": 1.4454, + "step": 1230 + }, + { + "epoch": 4.1891891891891895, + "grad_norm": 0.7360416054725647, + "learning_rate": 0.0002, + "loss": 1.4129, + "step": 1240 + }, + { + "epoch": 4.222972972972973, + "grad_norm": 0.7947028279304504, + "learning_rate": 0.0002, + "loss": 1.3899, + "step": 1250 + }, + { + "epoch": 4.256756756756757, + "grad_norm": 0.7336545586585999, + "learning_rate": 0.0002, + "loss": 1.4264, + "step": 1260 + }, + { + "epoch": 4.29054054054054, + "grad_norm": 0.7051223516464233, + "learning_rate": 0.0002, + "loss": 1.4047, + "step": 1270 + }, + { + "epoch": 4.324324324324325, + "grad_norm": 0.7939404845237732, + "learning_rate": 0.0002, + "loss": 1.3507, + "step": 1280 + }, + { + "epoch": 4.358108108108108, + "grad_norm": 0.7818657755851746, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 1290 + }, + { + "epoch": 4.391891891891892, + "grad_norm": 0.7490634918212891, + "learning_rate": 0.0002, + "loss": 1.3533, + "step": 1300 + }, + { + "epoch": 4.425675675675675, + "grad_norm": 0.9319770932197571, + "learning_rate": 0.0002, + "loss": 1.3912, + "step": 1310 + }, + { + "epoch": 4.45945945945946, + "grad_norm": 0.7811282873153687, + "learning_rate": 0.0002, + "loss": 1.439, + "step": 1320 + }, + { + "epoch": 4.493243243243243, + "grad_norm": 0.7785378694534302, + "learning_rate": 0.0002, + "loss": 1.3973, + "step": 1330 + }, + { + "epoch": 4.527027027027027, + "grad_norm": 0.8697562217712402, + "learning_rate": 0.0002, + "loss": 1.3931, + "step": 1340 + }, + { + "epoch": 4.5608108108108105, + "grad_norm": 0.7927497625350952, + "learning_rate": 0.0002, + "loss": 1.3846, + "step": 1350 + }, + { + "epoch": 4.594594594594595, + "grad_norm": 0.9746347665786743, + "learning_rate": 0.0002, + "loss": 1.3188, + "step": 1360 + }, + { + "epoch": 4.628378378378378, + "grad_norm": 0.7353375554084778, + "learning_rate": 0.0002, + "loss": 1.4611, + "step": 1370 + }, + { + "epoch": 4.662162162162162, + "grad_norm": 0.8139469027519226, + "learning_rate": 0.0002, + "loss": 1.4067, + "step": 1380 + }, + { + "epoch": 4.695945945945946, + "grad_norm": 1.728020429611206, + "learning_rate": 0.0002, + "loss": 1.3727, + "step": 1390 + }, + { + "epoch": 4.72972972972973, + "grad_norm": 0.8249040246009827, + "learning_rate": 0.0002, + "loss": 1.3971, + "step": 1400 + }, + { + "epoch": 4.763513513513513, + "grad_norm": 0.7916110157966614, + "learning_rate": 0.0002, + "loss": 1.4238, + "step": 1410 + }, + { + "epoch": 4.797297297297297, + "grad_norm": 0.7286198735237122, + "learning_rate": 0.0002, + "loss": 1.4064, + "step": 1420 + }, + { + "epoch": 4.831081081081081, + "grad_norm": 0.7969672083854675, + "learning_rate": 0.0002, + "loss": 1.305, + "step": 1430 + }, + { + "epoch": 4.864864864864865, + "grad_norm": 0.9593119621276855, + "learning_rate": 0.0002, + "loss": 1.4109, + "step": 1440 + }, + { + "epoch": 4.898648648648649, + "grad_norm": 0.8609084486961365, + "learning_rate": 0.0002, + "loss": 1.4112, + "step": 1450 + }, + { + "epoch": 4.9324324324324325, + "grad_norm": 0.705203115940094, + "learning_rate": 0.0002, + "loss": 1.3126, + "step": 1460 + }, + { + "epoch": 4.966216216216216, + "grad_norm": 0.9503173232078552, + "learning_rate": 0.0002, + "loss": 1.4226, + "step": 1470 + }, + { + "epoch": 5.0, + "grad_norm": 0.7174800038337708, + "learning_rate": 0.0002, + "loss": 1.4457, + "step": 1480 + }, + { + "epoch": 5.0, + "eval_loss": 1.9753261804580688, + "eval_runtime": 70.4622, + "eval_samples_per_second": 7.309, + "eval_steps_per_second": 0.922, + "step": 1480 + }, + { + "epoch": 5.033783783783784, + "grad_norm": 1.450723648071289, + "learning_rate": 0.0002, + "loss": 1.2986, + "step": 1490 + }, + { + "epoch": 5.0675675675675675, + "grad_norm": 0.9207791686058044, + "learning_rate": 0.0002, + "loss": 1.2184, + "step": 1500 + }, + { + "epoch": 5.101351351351352, + "grad_norm": 1.0742532014846802, + "learning_rate": 0.0002, + "loss": 1.1628, + "step": 1510 + }, + { + "epoch": 5.135135135135135, + "grad_norm": 1.1070902347564697, + "learning_rate": 0.0002, + "loss": 1.2221, + "step": 1520 + }, + { + "epoch": 5.168918918918919, + "grad_norm": 0.9838612079620361, + "learning_rate": 0.0002, + "loss": 1.1737, + "step": 1530 + }, + { + "epoch": 5.202702702702703, + "grad_norm": 0.9286013245582581, + "learning_rate": 0.0002, + "loss": 1.2095, + "step": 1540 + }, + { + "epoch": 5.236486486486487, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.0002, + "loss": 1.2243, + "step": 1550 + }, + { + "epoch": 5.27027027027027, + "grad_norm": 0.9734522104263306, + "learning_rate": 0.0002, + "loss": 1.1729, + "step": 1560 + }, + { + "epoch": 5.304054054054054, + "grad_norm": 1.1838241815567017, + "learning_rate": 0.0002, + "loss": 1.1528, + "step": 1570 + }, + { + "epoch": 5.337837837837838, + "grad_norm": 1.1389052867889404, + "learning_rate": 0.0002, + "loss": 1.2135, + "step": 1580 + }, + { + "epoch": 5.371621621621622, + "grad_norm": 1.2093408107757568, + "learning_rate": 0.0002, + "loss": 1.2486, + "step": 1590 + }, + { + "epoch": 5.405405405405405, + "grad_norm": 0.9418244361877441, + "learning_rate": 0.0002, + "loss": 1.2017, + "step": 1600 + }, + { + "epoch": 5.4391891891891895, + "grad_norm": 0.9843172430992126, + "learning_rate": 0.0002, + "loss": 1.2554, + "step": 1610 + }, + { + "epoch": 5.472972972972973, + "grad_norm": 1.0316557884216309, + "learning_rate": 0.0002, + "loss": 1.2967, + "step": 1620 + }, + { + "epoch": 5.506756756756757, + "grad_norm": 1.0008920431137085, + "learning_rate": 0.0002, + "loss": 1.2509, + "step": 1630 + }, + { + "epoch": 5.54054054054054, + "grad_norm": 1.1854851245880127, + "learning_rate": 0.0002, + "loss": 1.2908, + "step": 1640 + }, + { + "epoch": 5.574324324324325, + "grad_norm": 0.9324101209640503, + "learning_rate": 0.0002, + "loss": 1.2679, + "step": 1650 + }, + { + "epoch": 5.608108108108108, + "grad_norm": 0.993882954120636, + "learning_rate": 0.0002, + "loss": 1.202, + "step": 1660 + }, + { + "epoch": 5.641891891891892, + "grad_norm": 0.8795919418334961, + "learning_rate": 0.0002, + "loss": 1.2498, + "step": 1670 + }, + { + "epoch": 5.675675675675675, + "grad_norm": 1.203471064567566, + "learning_rate": 0.0002, + "loss": 1.2244, + "step": 1680 + }, + { + "epoch": 5.70945945945946, + "grad_norm": 0.916689932346344, + "learning_rate": 0.0002, + "loss": 1.2452, + "step": 1690 + }, + { + "epoch": 5.743243243243243, + "grad_norm": 0.8567600846290588, + "learning_rate": 0.0002, + "loss": 1.1774, + "step": 1700 + }, + { + "epoch": 5.777027027027027, + "grad_norm": 0.9426271319389343, + "learning_rate": 0.0002, + "loss": 1.2585, + "step": 1710 + }, + { + "epoch": 5.8108108108108105, + "grad_norm": 1.0812019109725952, + "learning_rate": 0.0002, + "loss": 1.239, + "step": 1720 + }, + { + "epoch": 5.844594594594595, + "grad_norm": 1.0045292377471924, + "learning_rate": 0.0002, + "loss": 1.1527, + "step": 1730 + }, + { + "epoch": 5.878378378378378, + "grad_norm": 1.0750256776809692, + "learning_rate": 0.0002, + "loss": 1.2949, + "step": 1740 + }, + { + "epoch": 5.912162162162162, + "grad_norm": 1.0471885204315186, + "learning_rate": 0.0002, + "loss": 1.3052, + "step": 1750 + }, + { + "epoch": 5.945945945945946, + "grad_norm": 0.9119327664375305, + "learning_rate": 0.0002, + "loss": 1.2318, + "step": 1760 + }, + { + "epoch": 5.97972972972973, + "grad_norm": 1.0975338220596313, + "learning_rate": 0.0002, + "loss": 1.2652, + "step": 1770 + }, + { + "epoch": 6.0, + "eval_loss": 2.104356527328491, + "eval_runtime": 71.0586, + "eval_samples_per_second": 7.248, + "eval_steps_per_second": 0.915, + "step": 1776 + }, + { + "epoch": 6.013513513513513, + "grad_norm": 0.9404756426811218, + "learning_rate": 0.0002, + "loss": 1.1342, + "step": 1780 + }, + { + "epoch": 6.047297297297297, + "grad_norm": 1.3757696151733398, + "learning_rate": 0.0002, + "loss": 0.9892, + "step": 1790 + }, + { + "epoch": 6.081081081081081, + "grad_norm": 1.5798641443252563, + "learning_rate": 0.0002, + "loss": 1.0826, + "step": 1800 + }, + { + "epoch": 6.114864864864865, + "grad_norm": 1.3777581453323364, + "learning_rate": 0.0002, + "loss": 0.9929, + "step": 1810 + }, + { + "epoch": 6.148648648648648, + "grad_norm": 1.136362910270691, + "learning_rate": 0.0002, + "loss": 1.0257, + "step": 1820 + }, + { + "epoch": 6.1824324324324325, + "grad_norm": 1.3719290494918823, + "learning_rate": 0.0002, + "loss": 1.0623, + "step": 1830 + }, + { + "epoch": 6.216216216216216, + "grad_norm": 1.375697374343872, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 1840 + }, + { + "epoch": 6.25, + "grad_norm": 1.3208998441696167, + "learning_rate": 0.0002, + "loss": 1.0307, + "step": 1850 + }, + { + "epoch": 6.283783783783784, + "grad_norm": 1.3176994323730469, + "learning_rate": 0.0002, + "loss": 1.0298, + "step": 1860 + }, + { + "epoch": 6.3175675675675675, + "grad_norm": 1.3333075046539307, + "learning_rate": 0.0002, + "loss": 1.0809, + "step": 1870 + }, + { + "epoch": 6.351351351351352, + "grad_norm": 1.6315182447433472, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 1880 + }, + { + "epoch": 6.385135135135135, + "grad_norm": 1.1802350282669067, + "learning_rate": 0.0002, + "loss": 1.0139, + "step": 1890 + }, + { + "epoch": 6.418918918918919, + "grad_norm": 1.0628817081451416, + "learning_rate": 0.0002, + "loss": 1.07, + "step": 1900 + }, + { + "epoch": 6.452702702702703, + "grad_norm": 1.3136482238769531, + "learning_rate": 0.0002, + "loss": 1.0916, + "step": 1910 + }, + { + "epoch": 6.486486486486487, + "grad_norm": 1.4804624319076538, + "learning_rate": 0.0002, + "loss": 1.0453, + "step": 1920 + }, + { + "epoch": 6.52027027027027, + "grad_norm": 1.1129399538040161, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 1930 + }, + { + "epoch": 6.554054054054054, + "grad_norm": 1.324576497077942, + "learning_rate": 0.0002, + "loss": 1.0645, + "step": 1940 + }, + { + "epoch": 6.587837837837838, + "grad_norm": 1.3321561813354492, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 1950 + }, + { + "epoch": 6.621621621621622, + "grad_norm": 1.2377620935440063, + "learning_rate": 0.0002, + "loss": 1.027, + "step": 1960 + }, + { + "epoch": 6.655405405405405, + "grad_norm": 1.1174288988113403, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 1970 + }, + { + "epoch": 6.6891891891891895, + "grad_norm": 1.2291412353515625, + "learning_rate": 0.0002, + "loss": 1.1074, + "step": 1980 + }, + { + "epoch": 6.722972972972973, + "grad_norm": 1.2079328298568726, + "learning_rate": 0.0002, + "loss": 1.101, + "step": 1990 + }, + { + "epoch": 6.756756756756757, + "grad_norm": 1.125183343887329, + "learning_rate": 0.0002, + "loss": 1.1603, + "step": 2000 + }, + { + "epoch": 6.79054054054054, + "grad_norm": 1.1737638711929321, + "learning_rate": 0.0002, + "loss": 1.008, + "step": 2010 + }, + { + "epoch": 6.824324324324325, + "grad_norm": 1.3917324542999268, + "learning_rate": 0.0002, + "loss": 1.1211, + "step": 2020 + }, + { + "epoch": 6.858108108108108, + "grad_norm": 1.1063282489776611, + "learning_rate": 0.0002, + "loss": 1.1436, + "step": 2030 + }, + { + "epoch": 6.891891891891892, + "grad_norm": 1.2951769828796387, + "learning_rate": 0.0002, + "loss": 1.0888, + "step": 2040 + }, + { + "epoch": 6.925675675675675, + "grad_norm": 1.2272734642028809, + "learning_rate": 0.0002, + "loss": 1.153, + "step": 2050 + }, + { + "epoch": 6.95945945945946, + "grad_norm": 1.5298433303833008, + "learning_rate": 0.0002, + "loss": 1.1156, + "step": 2060 + }, + { + "epoch": 6.993243243243243, + "grad_norm": 1.1478265523910522, + "learning_rate": 0.0002, + "loss": 1.1214, + "step": 2070 + }, + { + "epoch": 7.0, + "eval_loss": 2.238886594772339, + "eval_runtime": 62.8821, + "eval_samples_per_second": 8.19, + "eval_steps_per_second": 1.034, + "step": 2072 + }, + { + "epoch": 7.027027027027027, + "grad_norm": 1.6612180471420288, + "learning_rate": 0.0002, + "loss": 0.9044, + "step": 2080 + }, + { + "epoch": 7.0608108108108105, + "grad_norm": 1.266597867012024, + "learning_rate": 0.0002, + "loss": 0.932, + "step": 2090 + }, + { + "epoch": 7.094594594594595, + "grad_norm": 1.4388158321380615, + "learning_rate": 0.0002, + "loss": 0.9108, + "step": 2100 + }, + { + "epoch": 7.128378378378378, + "grad_norm": 1.5639206171035767, + "learning_rate": 0.0002, + "loss": 0.8743, + "step": 2110 + }, + { + "epoch": 7.162162162162162, + "grad_norm": 1.4063223600387573, + "learning_rate": 0.0002, + "loss": 0.8907, + "step": 2120 + }, + { + "epoch": 7.195945945945946, + "grad_norm": 1.7724202871322632, + "learning_rate": 0.0002, + "loss": 0.9383, + "step": 2130 + }, + { + "epoch": 7.22972972972973, + "grad_norm": 1.628645658493042, + "learning_rate": 0.0002, + "loss": 0.944, + "step": 2140 + }, + { + "epoch": 7.263513513513513, + "grad_norm": 1.5467971563339233, + "learning_rate": 0.0002, + "loss": 0.9197, + "step": 2150 + }, + { + "epoch": 7.297297297297297, + "grad_norm": 1.3064892292022705, + "learning_rate": 0.0002, + "loss": 0.8928, + "step": 2160 + }, + { + "epoch": 7.331081081081081, + "grad_norm": 1.1528593301773071, + "learning_rate": 0.0002, + "loss": 0.8599, + "step": 2170 + }, + { + "epoch": 7.364864864864865, + "grad_norm": 1.82744562625885, + "learning_rate": 0.0002, + "loss": 0.8757, + "step": 2180 + }, + { + "epoch": 7.398648648648648, + "grad_norm": 1.581808090209961, + "learning_rate": 0.0002, + "loss": 0.8856, + "step": 2190 + }, + { + "epoch": 7.4324324324324325, + "grad_norm": 1.7797787189483643, + "learning_rate": 0.0002, + "loss": 0.9736, + "step": 2200 + }, + { + "epoch": 7.466216216216216, + "grad_norm": 2.161501169204712, + "learning_rate": 0.0002, + "loss": 0.8861, + "step": 2210 + }, + { + "epoch": 7.5, + "grad_norm": 1.4904208183288574, + "learning_rate": 0.0002, + "loss": 0.8976, + "step": 2220 + }, + { + "epoch": 7.533783783783784, + "grad_norm": 1.76048743724823, + "learning_rate": 0.0002, + "loss": 0.9143, + "step": 2230 + }, + { + "epoch": 7.5675675675675675, + "grad_norm": 1.39728844165802, + "learning_rate": 0.0002, + "loss": 0.9403, + "step": 2240 + }, + { + "epoch": 7.601351351351351, + "grad_norm": 1.4059574604034424, + "learning_rate": 0.0002, + "loss": 0.9236, + "step": 2250 + }, + { + "epoch": 7.635135135135135, + "grad_norm": 1.5134271383285522, + "learning_rate": 0.0002, + "loss": 0.9192, + "step": 2260 + }, + { + "epoch": 7.668918918918919, + "grad_norm": 1.384108066558838, + "learning_rate": 0.0002, + "loss": 0.9459, + "step": 2270 + }, + { + "epoch": 7.702702702702703, + "grad_norm": 1.4390848875045776, + "learning_rate": 0.0002, + "loss": 0.9808, + "step": 2280 + }, + { + "epoch": 7.736486486486487, + "grad_norm": 1.6258286237716675, + "learning_rate": 0.0002, + "loss": 0.8559, + "step": 2290 + }, + { + "epoch": 7.77027027027027, + "grad_norm": 1.5682430267333984, + "learning_rate": 0.0002, + "loss": 0.966, + "step": 2300 + }, + { + "epoch": 7.804054054054054, + "grad_norm": 1.3329198360443115, + "learning_rate": 0.0002, + "loss": 0.8859, + "step": 2310 + }, + { + "epoch": 7.837837837837838, + "grad_norm": 1.3879269361495972, + "learning_rate": 0.0002, + "loss": 0.8762, + "step": 2320 + }, + { + "epoch": 7.871621621621622, + "grad_norm": 1.6853514909744263, + "learning_rate": 0.0002, + "loss": 0.9348, + "step": 2330 + }, + { + "epoch": 7.905405405405405, + "grad_norm": 1.5088176727294922, + "learning_rate": 0.0002, + "loss": 0.9542, + "step": 2340 + }, + { + "epoch": 7.9391891891891895, + "grad_norm": 1.4418280124664307, + "learning_rate": 0.0002, + "loss": 0.8656, + "step": 2350 + }, + { + "epoch": 7.972972972972973, + "grad_norm": 1.4557723999023438, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 2360 + }, + { + "epoch": 8.0, + "eval_loss": 2.413877487182617, + "eval_runtime": 71.4821, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.909, + "step": 2368 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0958573783325082e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-2368/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db7a6b205ffbfa0a6ae7bb77f1bfdcb2c9627672 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6023c8110f553bc4d00a2b5ef506df01b5fb9c203b4b71f09c2db97897394a +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6150aea7c37694592aa91a2238434c8923420de1 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007ffe432a5871edec2e104cd254ece542294c79d064dcc34a501947fac01079 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9a6cf55d81db67c268e84be1380f6cbdd6033b1 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5604b28fc6f3aa24a8a3578f5777d877cc45762868594e99beff805860e54dd0 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d79f8db9628ac1a6bcb862a9ee5c99264c18aac --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a166c29a4b6dc9b9a85c14eb31bedb4fa45c2a50d6c524db7d109b4c7a3fd3 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..789f2575a483250cc51198772ec6d2c823ee36be --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/trainer_state.json @@ -0,0 +1,244 @@ +{ + "best_metric": 1.8428829908370972, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296", + "epoch": 1.0, + "eval_steps": 10, + "global_step": 296, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3698217229156352e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd07b136186b6cab7a95c56cee29b819ea8abc60 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd12938c2921e88ddbf8cf072ef9cf5ff10438c3aa24d2d488c7e0194b13eaf2 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..52fe5b8ba7c514db2d68cd01f34898d5ae74f346 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52ab9f4ffdeab647985cc4cb51e37c263796ed3c9353e2a49d8f3605b5a0ccdf +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..793753afc974dea1a500fc70d617ddeeedb320cd --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3428d6a350092bbebba87c3e88e6e8e719a6be58b7b2b69e0a9ca374defb928c +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5176883534513c0a31d37cbf867408f480f8dad1 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e6cb10b7615baa5f7486a65a8c0d74c8cdd53bdcb980f10e2b96c30076598a6 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c2fa6a245dc4695f22d9a7c99f8fd9614c34185e --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/trainer_state.json @@ -0,0 +1,462 @@ +{ + "best_metric": 1.83539617061615, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 592, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.3993101716041565, + "learning_rate": 0.0002, + "loss": 1.7254, + "step": 300 + }, + { + "epoch": 1.0472972972972974, + "grad_norm": 0.45567989349365234, + "learning_rate": 0.0002, + "loss": 1.7985, + "step": 310 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.3767794072628021, + "learning_rate": 0.0002, + "loss": 1.757, + "step": 320 + }, + { + "epoch": 1.114864864864865, + "grad_norm": 0.5181908011436462, + "learning_rate": 0.0002, + "loss": 1.7917, + "step": 330 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4213193356990814, + "learning_rate": 0.0002, + "loss": 1.7723, + "step": 340 + }, + { + "epoch": 1.1824324324324325, + "grad_norm": 0.45519495010375977, + "learning_rate": 0.0002, + "loss": 1.8203, + "step": 350 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.35332638025283813, + "learning_rate": 0.0002, + "loss": 1.6946, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 0.3675481677055359, + "learning_rate": 0.0002, + "loss": 1.7541, + "step": 370 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.4569270610809326, + "learning_rate": 0.0002, + "loss": 1.7458, + "step": 380 + }, + { + "epoch": 1.3175675675675675, + "grad_norm": 0.37950295209884644, + "learning_rate": 0.0002, + "loss": 1.7988, + "step": 390 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5744572877883911, + "learning_rate": 0.0002, + "loss": 1.7032, + "step": 400 + }, + { + "epoch": 1.385135135135135, + "grad_norm": 0.44380778074264526, + "learning_rate": 0.0002, + "loss": 1.7437, + "step": 410 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.43328171968460083, + "learning_rate": 0.0002, + "loss": 1.7454, + "step": 420 + }, + { + "epoch": 1.4527027027027026, + "grad_norm": 0.41290056705474854, + "learning_rate": 0.0002, + "loss": 1.7636, + "step": 430 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.3771473169326782, + "learning_rate": 0.0002, + "loss": 1.7332, + "step": 440 + }, + { + "epoch": 1.5202702702702702, + "grad_norm": 0.42537811398506165, + "learning_rate": 0.0002, + "loss": 1.7618, + "step": 450 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.39705610275268555, + "learning_rate": 0.0002, + "loss": 1.8523, + "step": 460 + }, + { + "epoch": 1.5878378378378377, + "grad_norm": 0.4178248643875122, + "learning_rate": 0.0002, + "loss": 1.7673, + "step": 470 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.39107105135917664, + "learning_rate": 0.0002, + "loss": 1.742, + "step": 480 + }, + { + "epoch": 1.6554054054054053, + "grad_norm": 0.38505619764328003, + "learning_rate": 0.0002, + "loss": 1.6984, + "step": 490 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.43590813875198364, + "learning_rate": 0.0002, + "loss": 1.7382, + "step": 500 + }, + { + "epoch": 1.722972972972973, + "grad_norm": 0.42785948514938354, + "learning_rate": 0.0002, + "loss": 1.7139, + "step": 510 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3829004168510437, + "learning_rate": 0.0002, + "loss": 1.7551, + "step": 520 + }, + { + "epoch": 1.7905405405405406, + "grad_norm": 0.35287904739379883, + "learning_rate": 0.0002, + "loss": 1.7744, + "step": 530 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.38657888770103455, + "learning_rate": 0.0002, + "loss": 1.7714, + "step": 540 + }, + { + "epoch": 1.8581081081081081, + "grad_norm": 0.41452157497406006, + "learning_rate": 0.0002, + "loss": 1.7535, + "step": 550 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.3898279070854187, + "learning_rate": 0.0002, + "loss": 1.7627, + "step": 560 + }, + { + "epoch": 1.9256756756756757, + "grad_norm": 0.4147624373435974, + "learning_rate": 0.0002, + "loss": 1.7494, + "step": 570 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.4374851584434509, + "learning_rate": 0.0002, + "loss": 1.7693, + "step": 580 + }, + { + "epoch": 1.9932432432432432, + "grad_norm": 0.48530328273773193, + "learning_rate": 0.0002, + "loss": 1.7796, + "step": 590 + }, + { + "epoch": 2.0, + "eval_loss": 1.83539617061615, + "eval_runtime": 70.9865, + "eval_samples_per_second": 7.255, + "eval_steps_per_second": 0.916, + "step": 592 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.7396434458312704e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb69bc065824b7e91991eaaa6861a93d3c25a36d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815a11aec635edbafb93a82ddcc335598bf1f5b79f7dc3043ab990aec4fe78d8 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28ff83aeaab6e2eb76b18735db8164a1a7668eab --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e22ae9afe10630e38c4d23255225b008ce93b9584cb88b905402421f9509dd9 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb4ea0e607365fe420ff9917cefae6dd3037f86e --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e531e8d869e87066173c61749a606667f3694f8269a04b141f81a70a24e395 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85ee74cacb943c234171d6f70b7e674c0e07b5aa --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec26a7650c2cad775229be4cd28bf4c8b46e2d91baadbcc7e3643edd2ab7bb32 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1acf14d994bbf1ae04f41c481dbe708a77eced7f --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/trainer_state.json @@ -0,0 +1,673 @@ +{ + "best_metric": 1.83539617061615, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 888, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.033783783783783786, + "grad_norm": 0.5820087194442749, + "learning_rate": 0.0002, + "loss": 2.6467, + "step": 10 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.4625075161457062, + "learning_rate": 0.0002, + "loss": 2.2808, + "step": 20 + }, + { + "epoch": 0.10135135135135136, + "grad_norm": 0.46946242451667786, + "learning_rate": 0.0002, + "loss": 2.0116, + "step": 30 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5181305408477783, + "learning_rate": 0.0002, + "loss": 1.9089, + "step": 40 + }, + { + "epoch": 0.16891891891891891, + "grad_norm": 0.7439630627632141, + "learning_rate": 0.0002, + "loss": 1.9232, + "step": 50 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.5144319534301758, + "learning_rate": 0.0002, + "loss": 1.9646, + "step": 60 + }, + { + "epoch": 0.23648648648648649, + "grad_norm": 0.46696192026138306, + "learning_rate": 0.0002, + "loss": 1.921, + "step": 70 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.4330582022666931, + "learning_rate": 0.0002, + "loss": 1.8794, + "step": 80 + }, + { + "epoch": 0.30405405405405406, + "grad_norm": 0.502414882183075, + "learning_rate": 0.0002, + "loss": 1.8897, + "step": 90 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.4174366295337677, + "learning_rate": 0.0002, + "loss": 1.8166, + "step": 100 + }, + { + "epoch": 0.3716216216216216, + "grad_norm": 0.4296933710575104, + "learning_rate": 0.0002, + "loss": 1.8264, + "step": 110 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4299834668636322, + "learning_rate": 0.0002, + "loss": 1.9223, + "step": 120 + }, + { + "epoch": 0.4391891891891892, + "grad_norm": 0.5583795309066772, + "learning_rate": 0.0002, + "loss": 1.8708, + "step": 130 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.5205192565917969, + "learning_rate": 0.0002, + "loss": 1.7786, + "step": 140 + }, + { + "epoch": 0.5067567567567568, + "grad_norm": 0.4683739244937897, + "learning_rate": 0.0002, + "loss": 1.8293, + "step": 150 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.497546523809433, + "learning_rate": 0.0002, + "loss": 1.9102, + "step": 160 + }, + { + "epoch": 0.5743243243243243, + "grad_norm": 0.40443721413612366, + "learning_rate": 0.0002, + "loss": 1.8077, + "step": 170 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.39056605100631714, + "learning_rate": 0.0002, + "loss": 1.8446, + "step": 180 + }, + { + "epoch": 0.6418918918918919, + "grad_norm": 0.42397141456604004, + "learning_rate": 0.0002, + "loss": 1.8747, + "step": 190 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4679499566555023, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 200 + }, + { + "epoch": 0.7094594594594594, + "grad_norm": 0.39300158619880676, + "learning_rate": 0.0002, + "loss": 1.8401, + "step": 210 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4001515805721283, + "learning_rate": 0.0002, + "loss": 1.8345, + "step": 220 + }, + { + "epoch": 0.777027027027027, + "grad_norm": 0.4094320833683014, + "learning_rate": 0.0002, + "loss": 1.7571, + "step": 230 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.37315094470977783, + "learning_rate": 0.0002, + "loss": 1.8972, + "step": 240 + }, + { + "epoch": 0.8445945945945946, + "grad_norm": 0.4331067204475403, + "learning_rate": 0.0002, + "loss": 1.8337, + "step": 250 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.39758574962615967, + "learning_rate": 0.0002, + "loss": 1.8555, + "step": 260 + }, + { + "epoch": 0.9121621621621622, + "grad_norm": 0.38240519165992737, + "learning_rate": 0.0002, + "loss": 1.8624, + "step": 270 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.40907856822013855, + "learning_rate": 0.0002, + "loss": 1.7531, + "step": 280 + }, + { + "epoch": 0.9797297297297297, + "grad_norm": 0.34108003973960876, + "learning_rate": 0.0002, + "loss": 1.8435, + "step": 290 + }, + { + "epoch": 1.0, + "eval_loss": 1.8428829908370972, + "eval_runtime": 62.3963, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.042, + "step": 296 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.3993101716041565, + "learning_rate": 0.0002, + "loss": 1.7254, + "step": 300 + }, + { + "epoch": 1.0472972972972974, + "grad_norm": 0.45567989349365234, + "learning_rate": 0.0002, + "loss": 1.7985, + "step": 310 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.3767794072628021, + "learning_rate": 0.0002, + "loss": 1.757, + "step": 320 + }, + { + "epoch": 1.114864864864865, + "grad_norm": 0.5181908011436462, + "learning_rate": 0.0002, + "loss": 1.7917, + "step": 330 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4213193356990814, + "learning_rate": 0.0002, + "loss": 1.7723, + "step": 340 + }, + { + "epoch": 1.1824324324324325, + "grad_norm": 0.45519495010375977, + "learning_rate": 0.0002, + "loss": 1.8203, + "step": 350 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.35332638025283813, + "learning_rate": 0.0002, + "loss": 1.6946, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 0.3675481677055359, + "learning_rate": 0.0002, + "loss": 1.7541, + "step": 370 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.4569270610809326, + "learning_rate": 0.0002, + "loss": 1.7458, + "step": 380 + }, + { + "epoch": 1.3175675675675675, + "grad_norm": 0.37950295209884644, + "learning_rate": 0.0002, + "loss": 1.7988, + "step": 390 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5744572877883911, + "learning_rate": 0.0002, + "loss": 1.7032, + "step": 400 + }, + { + "epoch": 1.385135135135135, + "grad_norm": 0.44380778074264526, + "learning_rate": 0.0002, + "loss": 1.7437, + "step": 410 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.43328171968460083, + "learning_rate": 0.0002, + "loss": 1.7454, + "step": 420 + }, + { + "epoch": 1.4527027027027026, + "grad_norm": 0.41290056705474854, + "learning_rate": 0.0002, + "loss": 1.7636, + "step": 430 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.3771473169326782, + "learning_rate": 0.0002, + "loss": 1.7332, + "step": 440 + }, + { + "epoch": 1.5202702702702702, + "grad_norm": 0.42537811398506165, + "learning_rate": 0.0002, + "loss": 1.7618, + "step": 450 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.39705610275268555, + "learning_rate": 0.0002, + "loss": 1.8523, + "step": 460 + }, + { + "epoch": 1.5878378378378377, + "grad_norm": 0.4178248643875122, + "learning_rate": 0.0002, + "loss": 1.7673, + "step": 470 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.39107105135917664, + "learning_rate": 0.0002, + "loss": 1.742, + "step": 480 + }, + { + "epoch": 1.6554054054054053, + "grad_norm": 0.38505619764328003, + "learning_rate": 0.0002, + "loss": 1.6984, + "step": 490 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.43590813875198364, + "learning_rate": 0.0002, + "loss": 1.7382, + "step": 500 + }, + { + "epoch": 1.722972972972973, + "grad_norm": 0.42785948514938354, + "learning_rate": 0.0002, + "loss": 1.7139, + "step": 510 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3829004168510437, + "learning_rate": 0.0002, + "loss": 1.7551, + "step": 520 + }, + { + "epoch": 1.7905405405405406, + "grad_norm": 0.35287904739379883, + "learning_rate": 0.0002, + "loss": 1.7744, + "step": 530 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.38657888770103455, + "learning_rate": 0.0002, + "loss": 1.7714, + "step": 540 + }, + { + "epoch": 1.8581081081081081, + "grad_norm": 0.41452157497406006, + "learning_rate": 0.0002, + "loss": 1.7535, + "step": 550 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.3898279070854187, + "learning_rate": 0.0002, + "loss": 1.7627, + "step": 560 + }, + { + "epoch": 1.9256756756756757, + "grad_norm": 0.4147624373435974, + "learning_rate": 0.0002, + "loss": 1.7494, + "step": 570 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.4374851584434509, + "learning_rate": 0.0002, + "loss": 1.7693, + "step": 580 + }, + { + "epoch": 1.9932432432432432, + "grad_norm": 0.48530328273773193, + "learning_rate": 0.0002, + "loss": 1.7796, + "step": 590 + }, + { + "epoch": 2.0, + "eval_loss": 1.83539617061615, + "eval_runtime": 70.9865, + "eval_samples_per_second": 7.255, + "eval_steps_per_second": 0.916, + "step": 592 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.40344223380088806, + "learning_rate": 0.0002, + "loss": 1.7294, + "step": 600 + }, + { + "epoch": 2.060810810810811, + "grad_norm": 0.48268747329711914, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 610 + }, + { + "epoch": 2.0945945945945947, + "grad_norm": 0.4675706923007965, + "learning_rate": 0.0002, + "loss": 1.6315, + "step": 620 + }, + { + "epoch": 2.1283783783783785, + "grad_norm": 0.47494322061538696, + "learning_rate": 0.0002, + "loss": 1.6627, + "step": 630 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.4555308520793915, + "learning_rate": 0.0002, + "loss": 1.5668, + "step": 640 + }, + { + "epoch": 2.195945945945946, + "grad_norm": 0.43085595965385437, + "learning_rate": 0.0002, + "loss": 1.6537, + "step": 650 + }, + { + "epoch": 2.22972972972973, + "grad_norm": 0.4364128112792969, + "learning_rate": 0.0002, + "loss": 1.6316, + "step": 660 + }, + { + "epoch": 2.2635135135135136, + "grad_norm": 0.4711395800113678, + "learning_rate": 0.0002, + "loss": 1.669, + "step": 670 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.5109705328941345, + "learning_rate": 0.0002, + "loss": 1.5758, + "step": 680 + }, + { + "epoch": 2.331081081081081, + "grad_norm": 0.5185648798942566, + "learning_rate": 0.0002, + "loss": 1.5912, + "step": 690 + }, + { + "epoch": 2.364864864864865, + "grad_norm": 0.49192842841148376, + "learning_rate": 0.0002, + "loss": 1.6605, + "step": 700 + }, + { + "epoch": 2.3986486486486487, + "grad_norm": 0.5619909763336182, + "learning_rate": 0.0002, + "loss": 1.6688, + "step": 710 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.4932861328125, + "learning_rate": 0.0002, + "loss": 1.7836, + "step": 720 + }, + { + "epoch": 2.4662162162162162, + "grad_norm": 0.5211932063102722, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 730 + }, + { + "epoch": 2.5, + "grad_norm": 0.4138050377368927, + "learning_rate": 0.0002, + "loss": 1.667, + "step": 740 + }, + { + "epoch": 2.5337837837837838, + "grad_norm": 0.4644908010959625, + "learning_rate": 0.0002, + "loss": 1.658, + "step": 750 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.4513227641582489, + "learning_rate": 0.0002, + "loss": 1.6451, + "step": 760 + }, + { + "epoch": 2.6013513513513513, + "grad_norm": 0.4735109508037567, + "learning_rate": 0.0002, + "loss": 1.7071, + "step": 770 + }, + { + "epoch": 2.635135135135135, + "grad_norm": 0.5453559756278992, + "learning_rate": 0.0002, + "loss": 1.6659, + "step": 780 + }, + { + "epoch": 2.668918918918919, + "grad_norm": 0.5422565937042236, + "learning_rate": 0.0002, + "loss": 1.7211, + "step": 790 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4288518726825714, + "learning_rate": 0.0002, + "loss": 1.6623, + "step": 800 + }, + { + "epoch": 2.7364864864864864, + "grad_norm": 0.4085204005241394, + "learning_rate": 0.0002, + "loss": 1.7197, + "step": 810 + }, + { + "epoch": 2.77027027027027, + "grad_norm": 0.49770182371139526, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 820 + }, + { + "epoch": 2.804054054054054, + "grad_norm": 0.5005106329917908, + "learning_rate": 0.0002, + "loss": 1.6332, + "step": 830 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.4763440489768982, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 840 + }, + { + "epoch": 2.8716216216216215, + "grad_norm": 0.44995108246803284, + "learning_rate": 0.0002, + "loss": 1.7149, + "step": 850 + }, + { + "epoch": 2.9054054054054053, + "grad_norm": 0.5299676656723022, + "learning_rate": 0.0002, + "loss": 1.6438, + "step": 860 + }, + { + "epoch": 2.939189189189189, + "grad_norm": 0.49627119302749634, + "learning_rate": 0.0002, + "loss": 1.6457, + "step": 870 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.502545177936554, + "learning_rate": 0.0002, + "loss": 1.6517, + "step": 880 + }, + { + "epoch": 3.0, + "eval_loss": 1.8520468473434448, + "eval_runtime": 70.9917, + "eval_samples_per_second": 7.254, + "eval_steps_per_second": 0.916, + "step": 888 + } + ], + "logging_steps": 10, + "max_steps": 2368, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.109465168746906e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-888/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353b8c3d36532f1ad17da6f41538722c26cdcddf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385e5da1817ab8ad8dd9d82e0f198663245ef77a2a0bf6cf06d1105171be7411 +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/training_log.jsonl b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..65c4dd2fcd01995a2acd9b4095f6d879e9219d74 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/training_log.jsonl @@ -0,0 +1,8 @@ +{"epoch": 1.0, "step": 296, "epoch_duration": 586.018669128418, "total_accumulated_duration": 586.018669128418, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 9688.99365234375}, "avg_memory_reserved": {"GPU_0": 10406.0}, "peak_memory_reserved": {"GPU_0": 10406.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}]} +{"epoch": 2.0, "step": 592, "epoch_duration": 612.8172078132629, "total_accumulated_duration": 1198.835876941681, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13688.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-296", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}, {"eval_loss": 1.8428829908370972, "eval_runtime": 62.3963, "eval_samples_per_second": 8.254, "eval_steps_per_second": 1.042, "epoch": 1.0, "step": 296}, {"loss": 1.7254, "grad_norm": 0.3993101716041565, "learning_rate": 0.0002, "epoch": 1.0135135135135136, "step": 300}, {"loss": 1.7985, "grad_norm": 0.45567989349365234, "learning_rate": 0.0002, "epoch": 1.0472972972972974, "step": 310}, {"loss": 1.757, "grad_norm": 0.3767794072628021, "learning_rate": 0.0002, "epoch": 1.0810810810810811, "step": 320}, {"loss": 1.7917, "grad_norm": 0.5181908011436462, "learning_rate": 0.0002, "epoch": 1.114864864864865, "step": 330}, {"loss": 1.7723, "grad_norm": 0.4213193356990814, "learning_rate": 0.0002, "epoch": 1.1486486486486487, "step": 340}, {"loss": 1.8203, "grad_norm": 0.45519495010375977, "learning_rate": 0.0002, "epoch": 1.1824324324324325, "step": 350}, {"loss": 1.6946, "grad_norm": 0.35332638025283813, "learning_rate": 0.0002, "epoch": 1.2162162162162162, "step": 360}, {"loss": 1.7541, "grad_norm": 0.3675481677055359, "learning_rate": 0.0002, "epoch": 1.25, "step": 370}, {"loss": 1.7458, "grad_norm": 0.4569270610809326, "learning_rate": 0.0002, "epoch": 1.2837837837837838, "step": 380}, {"loss": 1.7988, "grad_norm": 0.37950295209884644, "learning_rate": 0.0002, "epoch": 1.3175675675675675, "step": 390}, {"loss": 1.7032, "grad_norm": 0.5744572877883911, "learning_rate": 0.0002, "epoch": 1.3513513513513513, "step": 400}, {"loss": 1.7437, "grad_norm": 0.44380778074264526, "learning_rate": 0.0002, "epoch": 1.385135135135135, "step": 410}, {"loss": 1.7454, "grad_norm": 0.43328171968460083, "learning_rate": 0.0002, "epoch": 1.4189189189189189, "step": 420}, {"loss": 1.7636, "grad_norm": 0.41290056705474854, "learning_rate": 0.0002, "epoch": 1.4527027027027026, "step": 430}, {"loss": 1.7332, "grad_norm": 0.3771473169326782, "learning_rate": 0.0002, "epoch": 1.4864864864864864, "step": 440}, {"loss": 1.7618, "grad_norm": 0.42537811398506165, "learning_rate": 0.0002, "epoch": 1.5202702702702702, "step": 450}, {"loss": 1.8523, "grad_norm": 0.39705610275268555, "learning_rate": 0.0002, "epoch": 1.554054054054054, "step": 460}, {"loss": 1.7673, "grad_norm": 0.4178248643875122, "learning_rate": 0.0002, "epoch": 1.5878378378378377, "step": 470}, {"loss": 1.742, "grad_norm": 0.39107105135917664, "learning_rate": 0.0002, "epoch": 1.6216216216216215, "step": 480}, {"loss": 1.6984, "grad_norm": 0.38505619764328003, "learning_rate": 0.0002, "epoch": 1.6554054054054053, "step": 490}, {"loss": 1.7382, "grad_norm": 0.43590813875198364, "learning_rate": 0.0002, "epoch": 1.689189189189189, "step": 500}, {"loss": 1.7139, "grad_norm": 0.42785948514938354, "learning_rate": 0.0002, "epoch": 1.722972972972973, "step": 510}, {"loss": 1.7551, "grad_norm": 0.3829004168510437, "learning_rate": 0.0002, "epoch": 1.7567567567567568, "step": 520}, {"loss": 1.7744, "grad_norm": 0.35287904739379883, "learning_rate": 0.0002, "epoch": 1.7905405405405406, "step": 530}, {"loss": 1.7714, "grad_norm": 0.38657888770103455, "learning_rate": 0.0002, "epoch": 1.8243243243243243, "step": 540}, {"loss": 1.7535, "grad_norm": 0.41452157497406006, "learning_rate": 0.0002, "epoch": 1.8581081081081081, "step": 550}, {"loss": 1.7627, "grad_norm": 0.3898279070854187, "learning_rate": 0.0002, "epoch": 1.8918918918918919, "step": 560}, {"loss": 1.7494, "grad_norm": 0.4147624373435974, "learning_rate": 0.0002, "epoch": 1.9256756756756757, "step": 570}, {"loss": 1.7693, "grad_norm": 0.4374851584434509, "learning_rate": 0.0002, "epoch": 1.9594594594594594, "step": 580}, {"loss": 1.7796, "grad_norm": 0.48530328273773193, "learning_rate": 0.0002, "epoch": 1.9932432432432432, "step": 590}]} +{"epoch": 3.0, "step": 888, "epoch_duration": 612.5414674282074, "total_accumulated_duration": 1811.3773443698883, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13688.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}, {"eval_loss": 1.8428829908370972, "eval_runtime": 62.3963, "eval_samples_per_second": 8.254, "eval_steps_per_second": 1.042, "epoch": 1.0, "step": 296}, {"loss": 1.7254, "grad_norm": 0.3993101716041565, "learning_rate": 0.0002, "epoch": 1.0135135135135136, "step": 300}, {"loss": 1.7985, "grad_norm": 0.45567989349365234, "learning_rate": 0.0002, "epoch": 1.0472972972972974, "step": 310}, {"loss": 1.757, "grad_norm": 0.3767794072628021, "learning_rate": 0.0002, "epoch": 1.0810810810810811, "step": 320}, {"loss": 1.7917, "grad_norm": 0.5181908011436462, "learning_rate": 0.0002, "epoch": 1.114864864864865, "step": 330}, {"loss": 1.7723, "grad_norm": 0.4213193356990814, "learning_rate": 0.0002, "epoch": 1.1486486486486487, "step": 340}, {"loss": 1.8203, "grad_norm": 0.45519495010375977, "learning_rate": 0.0002, "epoch": 1.1824324324324325, "step": 350}, {"loss": 1.6946, "grad_norm": 0.35332638025283813, "learning_rate": 0.0002, "epoch": 1.2162162162162162, "step": 360}, {"loss": 1.7541, "grad_norm": 0.3675481677055359, "learning_rate": 0.0002, "epoch": 1.25, "step": 370}, {"loss": 1.7458, "grad_norm": 0.4569270610809326, "learning_rate": 0.0002, "epoch": 1.2837837837837838, "step": 380}, {"loss": 1.7988, "grad_norm": 0.37950295209884644, "learning_rate": 0.0002, "epoch": 1.3175675675675675, "step": 390}, {"loss": 1.7032, "grad_norm": 0.5744572877883911, "learning_rate": 0.0002, "epoch": 1.3513513513513513, "step": 400}, {"loss": 1.7437, "grad_norm": 0.44380778074264526, "learning_rate": 0.0002, "epoch": 1.385135135135135, "step": 410}, {"loss": 1.7454, "grad_norm": 0.43328171968460083, "learning_rate": 0.0002, "epoch": 1.4189189189189189, "step": 420}, {"loss": 1.7636, "grad_norm": 0.41290056705474854, "learning_rate": 0.0002, "epoch": 1.4527027027027026, "step": 430}, {"loss": 1.7332, "grad_norm": 0.3771473169326782, "learning_rate": 0.0002, "epoch": 1.4864864864864864, "step": 440}, {"loss": 1.7618, "grad_norm": 0.42537811398506165, "learning_rate": 0.0002, "epoch": 1.5202702702702702, "step": 450}, {"loss": 1.8523, "grad_norm": 0.39705610275268555, "learning_rate": 0.0002, "epoch": 1.554054054054054, "step": 460}, {"loss": 1.7673, "grad_norm": 0.4178248643875122, "learning_rate": 0.0002, "epoch": 1.5878378378378377, "step": 470}, {"loss": 1.742, "grad_norm": 0.39107105135917664, "learning_rate": 0.0002, "epoch": 1.6216216216216215, "step": 480}, {"loss": 1.6984, "grad_norm": 0.38505619764328003, "learning_rate": 0.0002, "epoch": 1.6554054054054053, "step": 490}, {"loss": 1.7382, "grad_norm": 0.43590813875198364, "learning_rate": 0.0002, "epoch": 1.689189189189189, "step": 500}, {"loss": 1.7139, "grad_norm": 0.42785948514938354, "learning_rate": 0.0002, "epoch": 1.722972972972973, "step": 510}, {"loss": 1.7551, "grad_norm": 0.3829004168510437, "learning_rate": 0.0002, "epoch": 1.7567567567567568, "step": 520}, {"loss": 1.7744, "grad_norm": 0.35287904739379883, "learning_rate": 0.0002, "epoch": 1.7905405405405406, "step": 530}, {"loss": 1.7714, "grad_norm": 0.38657888770103455, "learning_rate": 0.0002, "epoch": 1.8243243243243243, "step": 540}, {"loss": 1.7535, "grad_norm": 0.41452157497406006, "learning_rate": 0.0002, "epoch": 1.8581081081081081, "step": 550}, {"loss": 1.7627, "grad_norm": 0.3898279070854187, "learning_rate": 0.0002, "epoch": 1.8918918918918919, "step": 560}, {"loss": 1.7494, "grad_norm": 0.4147624373435974, "learning_rate": 0.0002, "epoch": 1.9256756756756757, "step": 570}, {"loss": 1.7693, "grad_norm": 0.4374851584434509, "learning_rate": 0.0002, "epoch": 1.9594594594594594, "step": 580}, {"loss": 1.7796, "grad_norm": 0.48530328273773193, "learning_rate": 0.0002, "epoch": 1.9932432432432432, "step": 590}, {"eval_loss": 1.83539617061615, "eval_runtime": 70.9865, "eval_samples_per_second": 7.255, "eval_steps_per_second": 0.916, "epoch": 2.0, "step": 592}, {"loss": 1.7294, "grad_norm": 0.40344223380088806, "learning_rate": 0.0002, "epoch": 2.027027027027027, "step": 600}, {"loss": 1.6693, "grad_norm": 0.48268747329711914, "learning_rate": 0.0002, "epoch": 2.060810810810811, "step": 610}, {"loss": 1.6315, "grad_norm": 0.4675706923007965, "learning_rate": 0.0002, "epoch": 2.0945945945945947, "step": 620}, {"loss": 1.6627, "grad_norm": 0.47494322061538696, "learning_rate": 0.0002, "epoch": 2.1283783783783785, "step": 630}, {"loss": 1.5668, "grad_norm": 0.4555308520793915, "learning_rate": 0.0002, "epoch": 2.1621621621621623, "step": 640}, {"loss": 1.6537, "grad_norm": 0.43085595965385437, "learning_rate": 0.0002, "epoch": 2.195945945945946, "step": 650}, {"loss": 1.6316, "grad_norm": 0.4364128112792969, "learning_rate": 0.0002, "epoch": 2.22972972972973, "step": 660}, {"loss": 1.669, "grad_norm": 0.4711395800113678, "learning_rate": 0.0002, "epoch": 2.2635135135135136, "step": 670}, {"loss": 1.5758, "grad_norm": 0.5109705328941345, "learning_rate": 0.0002, "epoch": 2.2972972972972974, "step": 680}, {"loss": 1.5912, "grad_norm": 0.5185648798942566, "learning_rate": 0.0002, "epoch": 2.331081081081081, "step": 690}, {"loss": 1.6605, "grad_norm": 0.49192842841148376, "learning_rate": 0.0002, "epoch": 2.364864864864865, "step": 700}, {"loss": 1.6688, "grad_norm": 0.5619909763336182, "learning_rate": 0.0002, "epoch": 2.3986486486486487, "step": 710}, {"loss": 1.7836, "grad_norm": 0.4932861328125, "learning_rate": 0.0002, "epoch": 2.4324324324324325, "step": 720}, {"loss": 1.6532, "grad_norm": 0.5211932063102722, "learning_rate": 0.0002, "epoch": 2.4662162162162162, "step": 730}, {"loss": 1.667, "grad_norm": 0.4138050377368927, "learning_rate": 0.0002, "epoch": 2.5, "step": 740}, {"loss": 1.658, "grad_norm": 0.4644908010959625, "learning_rate": 0.0002, "epoch": 2.5337837837837838, "step": 750}, {"loss": 1.6451, "grad_norm": 0.4513227641582489, "learning_rate": 0.0002, "epoch": 2.5675675675675675, "step": 760}, {"loss": 1.7071, "grad_norm": 0.4735109508037567, "learning_rate": 0.0002, "epoch": 2.6013513513513513, "step": 770}, {"loss": 1.6659, "grad_norm": 0.5453559756278992, "learning_rate": 0.0002, "epoch": 2.635135135135135, "step": 780}, {"loss": 1.7211, "grad_norm": 0.5422565937042236, "learning_rate": 0.0002, "epoch": 2.668918918918919, "step": 790}, {"loss": 1.6623, "grad_norm": 0.4288518726825714, "learning_rate": 0.0002, "epoch": 2.7027027027027026, "step": 800}, {"loss": 1.7197, "grad_norm": 0.4085204005241394, "learning_rate": 0.0002, "epoch": 2.7364864864864864, "step": 810}, {"loss": 1.6376, "grad_norm": 0.49770182371139526, "learning_rate": 0.0002, "epoch": 2.77027027027027, "step": 820}, {"loss": 1.6332, "grad_norm": 0.5005106329917908, "learning_rate": 0.0002, "epoch": 2.804054054054054, "step": 830}, {"loss": 1.6675, "grad_norm": 0.4763440489768982, "learning_rate": 0.0002, "epoch": 2.8378378378378377, "step": 840}, {"loss": 1.7149, "grad_norm": 0.44995108246803284, "learning_rate": 0.0002, "epoch": 2.8716216216216215, "step": 850}, {"loss": 1.6438, "grad_norm": 0.5299676656723022, "learning_rate": 0.0002, "epoch": 2.9054054054054053, "step": 860}, {"loss": 1.6457, "grad_norm": 0.49627119302749634, "learning_rate": 0.0002, "epoch": 2.939189189189189, "step": 870}, {"loss": 1.6517, "grad_norm": 0.502545177936554, "learning_rate": 0.0002, "epoch": 2.972972972972973, "step": 880}]} +{"epoch": 4.0, "step": 1184, "epoch_duration": 599.5847628116608, "total_accumulated_duration": 2410.962107181549, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13688.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}, {"eval_loss": 1.8428829908370972, "eval_runtime": 62.3963, "eval_samples_per_second": 8.254, "eval_steps_per_second": 1.042, "epoch": 1.0, "step": 296}, {"loss": 1.7254, "grad_norm": 0.3993101716041565, "learning_rate": 0.0002, "epoch": 1.0135135135135136, "step": 300}, {"loss": 1.7985, "grad_norm": 0.45567989349365234, "learning_rate": 0.0002, "epoch": 1.0472972972972974, "step": 310}, {"loss": 1.757, "grad_norm": 0.3767794072628021, "learning_rate": 0.0002, "epoch": 1.0810810810810811, "step": 320}, {"loss": 1.7917, "grad_norm": 0.5181908011436462, "learning_rate": 0.0002, "epoch": 1.114864864864865, "step": 330}, {"loss": 1.7723, "grad_norm": 0.4213193356990814, "learning_rate": 0.0002, "epoch": 1.1486486486486487, "step": 340}, {"loss": 1.8203, "grad_norm": 0.45519495010375977, "learning_rate": 0.0002, "epoch": 1.1824324324324325, "step": 350}, {"loss": 1.6946, "grad_norm": 0.35332638025283813, "learning_rate": 0.0002, "epoch": 1.2162162162162162, "step": 360}, {"loss": 1.7541, "grad_norm": 0.3675481677055359, "learning_rate": 0.0002, "epoch": 1.25, "step": 370}, {"loss": 1.7458, "grad_norm": 0.4569270610809326, "learning_rate": 0.0002, "epoch": 1.2837837837837838, "step": 380}, {"loss": 1.7988, "grad_norm": 0.37950295209884644, "learning_rate": 0.0002, "epoch": 1.3175675675675675, "step": 390}, {"loss": 1.7032, "grad_norm": 0.5744572877883911, "learning_rate": 0.0002, "epoch": 1.3513513513513513, "step": 400}, {"loss": 1.7437, "grad_norm": 0.44380778074264526, "learning_rate": 0.0002, "epoch": 1.385135135135135, "step": 410}, {"loss": 1.7454, "grad_norm": 0.43328171968460083, "learning_rate": 0.0002, "epoch": 1.4189189189189189, "step": 420}, {"loss": 1.7636, "grad_norm": 0.41290056705474854, "learning_rate": 0.0002, "epoch": 1.4527027027027026, "step": 430}, {"loss": 1.7332, "grad_norm": 0.3771473169326782, "learning_rate": 0.0002, "epoch": 1.4864864864864864, "step": 440}, {"loss": 1.7618, "grad_norm": 0.42537811398506165, "learning_rate": 0.0002, "epoch": 1.5202702702702702, "step": 450}, {"loss": 1.8523, "grad_norm": 0.39705610275268555, "learning_rate": 0.0002, "epoch": 1.554054054054054, "step": 460}, {"loss": 1.7673, "grad_norm": 0.4178248643875122, "learning_rate": 0.0002, "epoch": 1.5878378378378377, "step": 470}, {"loss": 1.742, "grad_norm": 0.39107105135917664, "learning_rate": 0.0002, "epoch": 1.6216216216216215, "step": 480}, {"loss": 1.6984, "grad_norm": 0.38505619764328003, "learning_rate": 0.0002, "epoch": 1.6554054054054053, "step": 490}, {"loss": 1.7382, "grad_norm": 0.43590813875198364, "learning_rate": 0.0002, "epoch": 1.689189189189189, "step": 500}, {"loss": 1.7139, "grad_norm": 0.42785948514938354, "learning_rate": 0.0002, "epoch": 1.722972972972973, "step": 510}, {"loss": 1.7551, "grad_norm": 0.3829004168510437, "learning_rate": 0.0002, "epoch": 1.7567567567567568, "step": 520}, {"loss": 1.7744, "grad_norm": 0.35287904739379883, "learning_rate": 0.0002, "epoch": 1.7905405405405406, "step": 530}, {"loss": 1.7714, "grad_norm": 0.38657888770103455, "learning_rate": 0.0002, "epoch": 1.8243243243243243, "step": 540}, {"loss": 1.7535, "grad_norm": 0.41452157497406006, "learning_rate": 0.0002, "epoch": 1.8581081081081081, "step": 550}, {"loss": 1.7627, "grad_norm": 0.3898279070854187, "learning_rate": 0.0002, "epoch": 1.8918918918918919, "step": 560}, {"loss": 1.7494, "grad_norm": 0.4147624373435974, "learning_rate": 0.0002, "epoch": 1.9256756756756757, "step": 570}, {"loss": 1.7693, "grad_norm": 0.4374851584434509, "learning_rate": 0.0002, "epoch": 1.9594594594594594, "step": 580}, {"loss": 1.7796, "grad_norm": 0.48530328273773193, "learning_rate": 0.0002, "epoch": 1.9932432432432432, "step": 590}, {"eval_loss": 1.83539617061615, "eval_runtime": 70.9865, "eval_samples_per_second": 7.255, "eval_steps_per_second": 0.916, "epoch": 2.0, "step": 592}, {"loss": 1.7294, "grad_norm": 0.40344223380088806, "learning_rate": 0.0002, "epoch": 2.027027027027027, "step": 600}, {"loss": 1.6693, "grad_norm": 0.48268747329711914, "learning_rate": 0.0002, "epoch": 2.060810810810811, "step": 610}, {"loss": 1.6315, "grad_norm": 0.4675706923007965, "learning_rate": 0.0002, "epoch": 2.0945945945945947, "step": 620}, {"loss": 1.6627, "grad_norm": 0.47494322061538696, "learning_rate": 0.0002, "epoch": 2.1283783783783785, "step": 630}, {"loss": 1.5668, "grad_norm": 0.4555308520793915, "learning_rate": 0.0002, "epoch": 2.1621621621621623, "step": 640}, {"loss": 1.6537, "grad_norm": 0.43085595965385437, "learning_rate": 0.0002, "epoch": 2.195945945945946, "step": 650}, {"loss": 1.6316, "grad_norm": 0.4364128112792969, "learning_rate": 0.0002, "epoch": 2.22972972972973, "step": 660}, {"loss": 1.669, "grad_norm": 0.4711395800113678, "learning_rate": 0.0002, "epoch": 2.2635135135135136, "step": 670}, {"loss": 1.5758, "grad_norm": 0.5109705328941345, "learning_rate": 0.0002, "epoch": 2.2972972972972974, "step": 680}, {"loss": 1.5912, "grad_norm": 0.5185648798942566, "learning_rate": 0.0002, "epoch": 2.331081081081081, "step": 690}, {"loss": 1.6605, "grad_norm": 0.49192842841148376, "learning_rate": 0.0002, "epoch": 2.364864864864865, "step": 700}, {"loss": 1.6688, "grad_norm": 0.5619909763336182, "learning_rate": 0.0002, "epoch": 2.3986486486486487, "step": 710}, {"loss": 1.7836, "grad_norm": 0.4932861328125, "learning_rate": 0.0002, "epoch": 2.4324324324324325, "step": 720}, {"loss": 1.6532, "grad_norm": 0.5211932063102722, "learning_rate": 0.0002, "epoch": 2.4662162162162162, "step": 730}, {"loss": 1.667, "grad_norm": 0.4138050377368927, "learning_rate": 0.0002, "epoch": 2.5, "step": 740}, {"loss": 1.658, "grad_norm": 0.4644908010959625, "learning_rate": 0.0002, "epoch": 2.5337837837837838, "step": 750}, {"loss": 1.6451, "grad_norm": 0.4513227641582489, "learning_rate": 0.0002, "epoch": 2.5675675675675675, "step": 760}, {"loss": 1.7071, "grad_norm": 0.4735109508037567, "learning_rate": 0.0002, "epoch": 2.6013513513513513, "step": 770}, {"loss": 1.6659, "grad_norm": 0.5453559756278992, "learning_rate": 0.0002, "epoch": 2.635135135135135, "step": 780}, {"loss": 1.7211, "grad_norm": 0.5422565937042236, "learning_rate": 0.0002, "epoch": 2.668918918918919, "step": 790}, {"loss": 1.6623, "grad_norm": 0.4288518726825714, "learning_rate": 0.0002, "epoch": 2.7027027027027026, "step": 800}, {"loss": 1.7197, "grad_norm": 0.4085204005241394, "learning_rate": 0.0002, "epoch": 2.7364864864864864, "step": 810}, {"loss": 1.6376, "grad_norm": 0.49770182371139526, "learning_rate": 0.0002, "epoch": 2.77027027027027, "step": 820}, {"loss": 1.6332, "grad_norm": 0.5005106329917908, "learning_rate": 0.0002, "epoch": 2.804054054054054, "step": 830}, {"loss": 1.6675, "grad_norm": 0.4763440489768982, "learning_rate": 0.0002, "epoch": 2.8378378378378377, "step": 840}, {"loss": 1.7149, "grad_norm": 0.44995108246803284, "learning_rate": 0.0002, "epoch": 2.8716216216216215, "step": 850}, {"loss": 1.6438, "grad_norm": 0.5299676656723022, "learning_rate": 0.0002, "epoch": 2.9054054054054053, "step": 860}, {"loss": 1.6457, "grad_norm": 0.49627119302749634, "learning_rate": 0.0002, "epoch": 2.939189189189189, "step": 870}, {"loss": 1.6517, "grad_norm": 0.502545177936554, "learning_rate": 0.0002, "epoch": 2.972972972972973, "step": 880}, {"eval_loss": 1.8520468473434448, "eval_runtime": 70.9917, "eval_samples_per_second": 7.254, "eval_steps_per_second": 0.916, "epoch": 3.0, "step": 888}, {"loss": 1.6271, "grad_norm": 0.4756380319595337, "learning_rate": 0.0002, "epoch": 3.0067567567567566, "step": 890}, {"loss": 1.563, "grad_norm": 0.5167421102523804, "learning_rate": 0.0002, "epoch": 3.0405405405405403, "step": 900}, {"loss": 1.48, "grad_norm": 0.5524939298629761, "learning_rate": 0.0002, "epoch": 3.074324324324324, "step": 910}, {"loss": 1.5297, "grad_norm": 0.7045221924781799, "learning_rate": 0.0002, "epoch": 3.108108108108108, "step": 920}, {"loss": 1.5548, "grad_norm": 0.5692355036735535, "learning_rate": 0.0002, "epoch": 3.141891891891892, "step": 930}, {"loss": 1.5297, "grad_norm": 0.5467017292976379, "learning_rate": 0.0002, "epoch": 3.175675675675676, "step": 940}, {"loss": 1.5559, "grad_norm": 0.6004040241241455, "learning_rate": 0.0002, "epoch": 3.2094594594594597, "step": 950}, {"loss": 1.5255, "grad_norm": 0.5713295936584473, "learning_rate": 0.0002, "epoch": 3.2432432432432434, "step": 960}, {"loss": 1.5412, "grad_norm": 0.6054869890213013, "learning_rate": 0.0002, "epoch": 3.277027027027027, "step": 970}, {"loss": 1.5167, "grad_norm": 0.6304576992988586, "learning_rate": 0.0002, "epoch": 3.310810810810811, "step": 980}, {"loss": 1.52, "grad_norm": 0.5347281694412231, "learning_rate": 0.0002, "epoch": 3.3445945945945947, "step": 990}, {"loss": 1.5707, "grad_norm": 0.598211407661438, "learning_rate": 0.0002, "epoch": 3.3783783783783785, "step": 1000}, {"loss": 1.5243, "grad_norm": 0.637312650680542, "learning_rate": 0.0002, "epoch": 3.4121621621621623, "step": 1010}, {"loss": 1.5356, "grad_norm": 0.6092430949211121, "learning_rate": 0.0002, "epoch": 3.445945945945946, "step": 1020}, {"loss": 1.5856, "grad_norm": 0.6421037912368774, "learning_rate": 0.0002, "epoch": 3.47972972972973, "step": 1030}, {"loss": 1.5553, "grad_norm": 0.6712167263031006, "learning_rate": 0.0002, "epoch": 3.5135135135135136, "step": 1040}, {"loss": 1.4708, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 3.5472972972972974, "step": 1050}, {"loss": 1.5159, "grad_norm": 1.418167233467102, "learning_rate": 0.0002, "epoch": 3.581081081081081, "step": 1060}, {"loss": 1.5264, "grad_norm": 0.6092377305030823, "learning_rate": 0.0002, "epoch": 3.614864864864865, "step": 1070}, {"loss": 1.5227, "grad_norm": 0.5632478594779968, "learning_rate": 0.0002, "epoch": 3.6486486486486487, "step": 1080}, {"loss": 1.5492, "grad_norm": 0.6007736921310425, "learning_rate": 0.0002, "epoch": 3.6824324324324325, "step": 1090}, {"loss": 1.5002, "grad_norm": 0.6031264066696167, "learning_rate": 0.0002, "epoch": 3.7162162162162162, "step": 1100}, {"loss": 1.4797, "grad_norm": 0.5440598726272583, "learning_rate": 0.0002, "epoch": 3.75, "step": 1110}, {"loss": 1.5743, "grad_norm": 0.6304370760917664, "learning_rate": 0.0002, "epoch": 3.7837837837837838, "step": 1120}, {"loss": 1.6429, "grad_norm": 0.6729280948638916, "learning_rate": 0.0002, "epoch": 3.8175675675675675, "step": 1130}, {"loss": 1.594, "grad_norm": 0.5881901979446411, "learning_rate": 0.0002, "epoch": 3.8513513513513513, "step": 1140}, {"loss": 1.5008, "grad_norm": 0.5508038997650146, "learning_rate": 0.0002, "epoch": 3.885135135135135, "step": 1150}, {"loss": 1.5045, "grad_norm": 0.5926295518875122, "learning_rate": 0.0002, "epoch": 3.918918918918919, "step": 1160}, {"loss": 1.5223, "grad_norm": 0.5882043838500977, "learning_rate": 0.0002, "epoch": 3.9527027027027026, "step": 1170}, {"loss": 1.5874, "grad_norm": 0.604119598865509, "learning_rate": 0.0002, "epoch": 3.9864864864864864, "step": 1180}]} +{"epoch": 5.0, "step": 1480, "epoch_duration": 603.6123406887054, "total_accumulated_duration": 3014.5744478702545, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13688.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}, {"eval_loss": 1.8428829908370972, "eval_runtime": 62.3963, "eval_samples_per_second": 8.254, "eval_steps_per_second": 1.042, "epoch": 1.0, "step": 296}, {"loss": 1.7254, "grad_norm": 0.3993101716041565, "learning_rate": 0.0002, "epoch": 1.0135135135135136, "step": 300}, {"loss": 1.7985, "grad_norm": 0.45567989349365234, "learning_rate": 0.0002, "epoch": 1.0472972972972974, "step": 310}, {"loss": 1.757, "grad_norm": 0.3767794072628021, "learning_rate": 0.0002, "epoch": 1.0810810810810811, "step": 320}, {"loss": 1.7917, "grad_norm": 0.5181908011436462, "learning_rate": 0.0002, "epoch": 1.114864864864865, "step": 330}, {"loss": 1.7723, "grad_norm": 0.4213193356990814, "learning_rate": 0.0002, "epoch": 1.1486486486486487, "step": 340}, {"loss": 1.8203, "grad_norm": 0.45519495010375977, "learning_rate": 0.0002, "epoch": 1.1824324324324325, "step": 350}, {"loss": 1.6946, "grad_norm": 0.35332638025283813, "learning_rate": 0.0002, "epoch": 1.2162162162162162, "step": 360}, {"loss": 1.7541, "grad_norm": 0.3675481677055359, "learning_rate": 0.0002, "epoch": 1.25, "step": 370}, {"loss": 1.7458, "grad_norm": 0.4569270610809326, "learning_rate": 0.0002, "epoch": 1.2837837837837838, "step": 380}, {"loss": 1.7988, "grad_norm": 0.37950295209884644, "learning_rate": 0.0002, "epoch": 1.3175675675675675, "step": 390}, {"loss": 1.7032, "grad_norm": 0.5744572877883911, "learning_rate": 0.0002, "epoch": 1.3513513513513513, "step": 400}, {"loss": 1.7437, "grad_norm": 0.44380778074264526, "learning_rate": 0.0002, "epoch": 1.385135135135135, "step": 410}, {"loss": 1.7454, "grad_norm": 0.43328171968460083, "learning_rate": 0.0002, "epoch": 1.4189189189189189, "step": 420}, {"loss": 1.7636, "grad_norm": 0.41290056705474854, "learning_rate": 0.0002, "epoch": 1.4527027027027026, "step": 430}, {"loss": 1.7332, "grad_norm": 0.3771473169326782, "learning_rate": 0.0002, "epoch": 1.4864864864864864, "step": 440}, {"loss": 1.7618, "grad_norm": 0.42537811398506165, "learning_rate": 0.0002, "epoch": 1.5202702702702702, "step": 450}, {"loss": 1.8523, "grad_norm": 0.39705610275268555, "learning_rate": 0.0002, "epoch": 1.554054054054054, "step": 460}, {"loss": 1.7673, "grad_norm": 0.4178248643875122, "learning_rate": 0.0002, "epoch": 1.5878378378378377, "step": 470}, {"loss": 1.742, "grad_norm": 0.39107105135917664, "learning_rate": 0.0002, "epoch": 1.6216216216216215, "step": 480}, {"loss": 1.6984, "grad_norm": 0.38505619764328003, "learning_rate": 0.0002, "epoch": 1.6554054054054053, "step": 490}, {"loss": 1.7382, "grad_norm": 0.43590813875198364, "learning_rate": 0.0002, "epoch": 1.689189189189189, "step": 500}, {"loss": 1.7139, "grad_norm": 0.42785948514938354, "learning_rate": 0.0002, "epoch": 1.722972972972973, "step": 510}, {"loss": 1.7551, "grad_norm": 0.3829004168510437, "learning_rate": 0.0002, "epoch": 1.7567567567567568, "step": 520}, {"loss": 1.7744, "grad_norm": 0.35287904739379883, "learning_rate": 0.0002, "epoch": 1.7905405405405406, "step": 530}, {"loss": 1.7714, "grad_norm": 0.38657888770103455, "learning_rate": 0.0002, "epoch": 1.8243243243243243, "step": 540}, {"loss": 1.7535, "grad_norm": 0.41452157497406006, "learning_rate": 0.0002, "epoch": 1.8581081081081081, "step": 550}, {"loss": 1.7627, "grad_norm": 0.3898279070854187, "learning_rate": 0.0002, "epoch": 1.8918918918918919, "step": 560}, {"loss": 1.7494, "grad_norm": 0.4147624373435974, "learning_rate": 0.0002, "epoch": 1.9256756756756757, "step": 570}, {"loss": 1.7693, "grad_norm": 0.4374851584434509, "learning_rate": 0.0002, "epoch": 1.9594594594594594, "step": 580}, {"loss": 1.7796, "grad_norm": 0.48530328273773193, "learning_rate": 0.0002, "epoch": 1.9932432432432432, "step": 590}, {"eval_loss": 1.83539617061615, "eval_runtime": 70.9865, "eval_samples_per_second": 7.255, "eval_steps_per_second": 0.916, "epoch": 2.0, "step": 592}, {"loss": 1.7294, "grad_norm": 0.40344223380088806, "learning_rate": 0.0002, "epoch": 2.027027027027027, "step": 600}, {"loss": 1.6693, "grad_norm": 0.48268747329711914, "learning_rate": 0.0002, "epoch": 2.060810810810811, "step": 610}, {"loss": 1.6315, "grad_norm": 0.4675706923007965, "learning_rate": 0.0002, "epoch": 2.0945945945945947, "step": 620}, {"loss": 1.6627, "grad_norm": 0.47494322061538696, "learning_rate": 0.0002, "epoch": 2.1283783783783785, "step": 630}, {"loss": 1.5668, "grad_norm": 0.4555308520793915, "learning_rate": 0.0002, "epoch": 2.1621621621621623, "step": 640}, {"loss": 1.6537, "grad_norm": 0.43085595965385437, "learning_rate": 0.0002, "epoch": 2.195945945945946, "step": 650}, {"loss": 1.6316, "grad_norm": 0.4364128112792969, "learning_rate": 0.0002, "epoch": 2.22972972972973, "step": 660}, {"loss": 1.669, "grad_norm": 0.4711395800113678, "learning_rate": 0.0002, "epoch": 2.2635135135135136, "step": 670}, {"loss": 1.5758, "grad_norm": 0.5109705328941345, "learning_rate": 0.0002, "epoch": 2.2972972972972974, "step": 680}, {"loss": 1.5912, "grad_norm": 0.5185648798942566, "learning_rate": 0.0002, "epoch": 2.331081081081081, "step": 690}, {"loss": 1.6605, "grad_norm": 0.49192842841148376, "learning_rate": 0.0002, "epoch": 2.364864864864865, "step": 700}, {"loss": 1.6688, "grad_norm": 0.5619909763336182, "learning_rate": 0.0002, "epoch": 2.3986486486486487, "step": 710}, {"loss": 1.7836, "grad_norm": 0.4932861328125, "learning_rate": 0.0002, "epoch": 2.4324324324324325, "step": 720}, {"loss": 1.6532, "grad_norm": 0.5211932063102722, "learning_rate": 0.0002, "epoch": 2.4662162162162162, "step": 730}, {"loss": 1.667, "grad_norm": 0.4138050377368927, "learning_rate": 0.0002, "epoch": 2.5, "step": 740}, {"loss": 1.658, "grad_norm": 0.4644908010959625, "learning_rate": 0.0002, "epoch": 2.5337837837837838, "step": 750}, {"loss": 1.6451, "grad_norm": 0.4513227641582489, "learning_rate": 0.0002, "epoch": 2.5675675675675675, "step": 760}, {"loss": 1.7071, "grad_norm": 0.4735109508037567, "learning_rate": 0.0002, "epoch": 2.6013513513513513, "step": 770}, {"loss": 1.6659, "grad_norm": 0.5453559756278992, "learning_rate": 0.0002, "epoch": 2.635135135135135, "step": 780}, {"loss": 1.7211, "grad_norm": 0.5422565937042236, "learning_rate": 0.0002, "epoch": 2.668918918918919, "step": 790}, {"loss": 1.6623, "grad_norm": 0.4288518726825714, "learning_rate": 0.0002, "epoch": 2.7027027027027026, "step": 800}, {"loss": 1.7197, "grad_norm": 0.4085204005241394, "learning_rate": 0.0002, "epoch": 2.7364864864864864, "step": 810}, {"loss": 1.6376, "grad_norm": 0.49770182371139526, "learning_rate": 0.0002, "epoch": 2.77027027027027, "step": 820}, {"loss": 1.6332, "grad_norm": 0.5005106329917908, "learning_rate": 0.0002, "epoch": 2.804054054054054, "step": 830}, {"loss": 1.6675, "grad_norm": 0.4763440489768982, "learning_rate": 0.0002, "epoch": 2.8378378378378377, "step": 840}, {"loss": 1.7149, "grad_norm": 0.44995108246803284, "learning_rate": 0.0002, "epoch": 2.8716216216216215, "step": 850}, {"loss": 1.6438, "grad_norm": 0.5299676656723022, "learning_rate": 0.0002, "epoch": 2.9054054054054053, "step": 860}, {"loss": 1.6457, "grad_norm": 0.49627119302749634, "learning_rate": 0.0002, "epoch": 2.939189189189189, "step": 870}, {"loss": 1.6517, "grad_norm": 0.502545177936554, "learning_rate": 0.0002, "epoch": 2.972972972972973, "step": 880}, {"eval_loss": 1.8520468473434448, "eval_runtime": 70.9917, "eval_samples_per_second": 7.254, "eval_steps_per_second": 0.916, "epoch": 3.0, "step": 888}, {"loss": 1.6271, "grad_norm": 0.4756380319595337, "learning_rate": 0.0002, "epoch": 3.0067567567567566, "step": 890}, {"loss": 1.563, "grad_norm": 0.5167421102523804, "learning_rate": 0.0002, "epoch": 3.0405405405405403, "step": 900}, {"loss": 1.48, "grad_norm": 0.5524939298629761, "learning_rate": 0.0002, "epoch": 3.074324324324324, "step": 910}, {"loss": 1.5297, "grad_norm": 0.7045221924781799, "learning_rate": 0.0002, "epoch": 3.108108108108108, "step": 920}, {"loss": 1.5548, "grad_norm": 0.5692355036735535, "learning_rate": 0.0002, "epoch": 3.141891891891892, "step": 930}, {"loss": 1.5297, "grad_norm": 0.5467017292976379, "learning_rate": 0.0002, "epoch": 3.175675675675676, "step": 940}, {"loss": 1.5559, "grad_norm": 0.6004040241241455, "learning_rate": 0.0002, "epoch": 3.2094594594594597, "step": 950}, {"loss": 1.5255, "grad_norm": 0.5713295936584473, "learning_rate": 0.0002, "epoch": 3.2432432432432434, "step": 960}, {"loss": 1.5412, "grad_norm": 0.6054869890213013, "learning_rate": 0.0002, "epoch": 3.277027027027027, "step": 970}, {"loss": 1.5167, "grad_norm": 0.6304576992988586, "learning_rate": 0.0002, "epoch": 3.310810810810811, "step": 980}, {"loss": 1.52, "grad_norm": 0.5347281694412231, "learning_rate": 0.0002, "epoch": 3.3445945945945947, "step": 990}, {"loss": 1.5707, "grad_norm": 0.598211407661438, "learning_rate": 0.0002, "epoch": 3.3783783783783785, "step": 1000}, {"loss": 1.5243, "grad_norm": 0.637312650680542, "learning_rate": 0.0002, "epoch": 3.4121621621621623, "step": 1010}, {"loss": 1.5356, "grad_norm": 0.6092430949211121, "learning_rate": 0.0002, "epoch": 3.445945945945946, "step": 1020}, {"loss": 1.5856, "grad_norm": 0.6421037912368774, "learning_rate": 0.0002, "epoch": 3.47972972972973, "step": 1030}, {"loss": 1.5553, "grad_norm": 0.6712167263031006, "learning_rate": 0.0002, "epoch": 3.5135135135135136, "step": 1040}, {"loss": 1.4708, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 3.5472972972972974, "step": 1050}, {"loss": 1.5159, "grad_norm": 1.418167233467102, "learning_rate": 0.0002, "epoch": 3.581081081081081, "step": 1060}, {"loss": 1.5264, "grad_norm": 0.6092377305030823, "learning_rate": 0.0002, "epoch": 3.614864864864865, "step": 1070}, {"loss": 1.5227, "grad_norm": 0.5632478594779968, "learning_rate": 0.0002, "epoch": 3.6486486486486487, "step": 1080}, {"loss": 1.5492, "grad_norm": 0.6007736921310425, "learning_rate": 0.0002, "epoch": 3.6824324324324325, "step": 1090}, {"loss": 1.5002, "grad_norm": 0.6031264066696167, "learning_rate": 0.0002, "epoch": 3.7162162162162162, "step": 1100}, {"loss": 1.4797, "grad_norm": 0.5440598726272583, "learning_rate": 0.0002, "epoch": 3.75, "step": 1110}, {"loss": 1.5743, "grad_norm": 0.6304370760917664, "learning_rate": 0.0002, "epoch": 3.7837837837837838, "step": 1120}, {"loss": 1.6429, "grad_norm": 0.6729280948638916, "learning_rate": 0.0002, "epoch": 3.8175675675675675, "step": 1130}, {"loss": 1.594, "grad_norm": 0.5881901979446411, "learning_rate": 0.0002, "epoch": 3.8513513513513513, "step": 1140}, {"loss": 1.5008, "grad_norm": 0.5508038997650146, "learning_rate": 0.0002, "epoch": 3.885135135135135, "step": 1150}, {"loss": 1.5045, "grad_norm": 0.5926295518875122, "learning_rate": 0.0002, "epoch": 3.918918918918919, "step": 1160}, {"loss": 1.5223, "grad_norm": 0.5882043838500977, "learning_rate": 0.0002, "epoch": 3.9527027027027026, "step": 1170}, {"loss": 1.5874, "grad_norm": 0.604119598865509, "learning_rate": 0.0002, "epoch": 3.9864864864864864, "step": 1180}, {"eval_loss": 1.8923152685165405, "eval_runtime": 62.9577, "eval_samples_per_second": 8.18, "eval_steps_per_second": 1.032, "epoch": 4.0, "step": 1184}, {"loss": 1.4183, "grad_norm": 0.696061909198761, "learning_rate": 0.0002, "epoch": 4.02027027027027, "step": 1190}, {"loss": 1.3455, "grad_norm": 0.751200258731842, "learning_rate": 0.0002, "epoch": 4.054054054054054, "step": 1200}, {"loss": 1.35, "grad_norm": 0.9667422771453857, "learning_rate": 0.0002, "epoch": 4.087837837837838, "step": 1210}, {"loss": 1.4058, "grad_norm": 0.7374204397201538, "learning_rate": 0.0002, "epoch": 4.121621621621622, "step": 1220}, {"loss": 1.4454, "grad_norm": 0.8050723075866699, "learning_rate": 0.0002, "epoch": 4.155405405405405, "step": 1230}, {"loss": 1.4129, "grad_norm": 0.7360416054725647, "learning_rate": 0.0002, "epoch": 4.1891891891891895, "step": 1240}, {"loss": 1.3899, "grad_norm": 0.7947028279304504, "learning_rate": 0.0002, "epoch": 4.222972972972973, "step": 1250}, {"loss": 1.4264, "grad_norm": 0.7336545586585999, "learning_rate": 0.0002, "epoch": 4.256756756756757, "step": 1260}, {"loss": 1.4047, "grad_norm": 0.7051223516464233, "learning_rate": 0.0002, "epoch": 4.29054054054054, "step": 1270}, {"loss": 1.3507, "grad_norm": 0.7939404845237732, "learning_rate": 0.0002, "epoch": 4.324324324324325, "step": 1280}, {"loss": 1.387, "grad_norm": 0.7818657755851746, "learning_rate": 0.0002, "epoch": 4.358108108108108, "step": 1290}, {"loss": 1.3533, "grad_norm": 0.7490634918212891, "learning_rate": 0.0002, "epoch": 4.391891891891892, "step": 1300}, {"loss": 1.3912, "grad_norm": 0.9319770932197571, "learning_rate": 0.0002, "epoch": 4.425675675675675, "step": 1310}, {"loss": 1.439, "grad_norm": 0.7811282873153687, "learning_rate": 0.0002, "epoch": 4.45945945945946, "step": 1320}, {"loss": 1.3973, "grad_norm": 0.7785378694534302, "learning_rate": 0.0002, "epoch": 4.493243243243243, "step": 1330}, {"loss": 1.3931, "grad_norm": 0.8697562217712402, "learning_rate": 0.0002, "epoch": 4.527027027027027, "step": 1340}, {"loss": 1.3846, "grad_norm": 0.7927497625350952, "learning_rate": 0.0002, "epoch": 4.5608108108108105, "step": 1350}, {"loss": 1.3188, "grad_norm": 0.9746347665786743, "learning_rate": 0.0002, "epoch": 4.594594594594595, "step": 1360}, {"loss": 1.4611, "grad_norm": 0.7353375554084778, "learning_rate": 0.0002, "epoch": 4.628378378378378, "step": 1370}, {"loss": 1.4067, "grad_norm": 0.8139469027519226, "learning_rate": 0.0002, "epoch": 4.662162162162162, "step": 1380}, {"loss": 1.3727, "grad_norm": 1.728020429611206, "learning_rate": 0.0002, "epoch": 4.695945945945946, "step": 1390}, {"loss": 1.3971, "grad_norm": 0.8249040246009827, "learning_rate": 0.0002, "epoch": 4.72972972972973, "step": 1400}, {"loss": 1.4238, "grad_norm": 0.7916110157966614, "learning_rate": 0.0002, "epoch": 4.763513513513513, "step": 1410}, {"loss": 1.4064, "grad_norm": 0.7286198735237122, "learning_rate": 0.0002, "epoch": 4.797297297297297, "step": 1420}, {"loss": 1.305, "grad_norm": 0.7969672083854675, "learning_rate": 0.0002, "epoch": 4.831081081081081, "step": 1430}, {"loss": 1.4109, "grad_norm": 0.9593119621276855, "learning_rate": 0.0002, "epoch": 4.864864864864865, "step": 1440}, {"loss": 1.4112, "grad_norm": 0.8609084486961365, "learning_rate": 0.0002, "epoch": 4.898648648648649, "step": 1450}, {"loss": 1.3126, "grad_norm": 0.705203115940094, "learning_rate": 0.0002, "epoch": 4.9324324324324325, "step": 1460}, {"loss": 1.4226, "grad_norm": 0.9503173232078552, "learning_rate": 0.0002, "epoch": 4.966216216216216, "step": 1470}, {"loss": 1.4457, "grad_norm": 0.7174800038337708, "learning_rate": 0.0002, "epoch": 5.0, "step": 1480}]} +{"epoch": 6.0, "step": 1776, "epoch_duration": 611.6589844226837, "total_accumulated_duration": 3626.2334322929382, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13688.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}, {"eval_loss": 1.8428829908370972, "eval_runtime": 62.3963, "eval_samples_per_second": 8.254, "eval_steps_per_second": 1.042, "epoch": 1.0, "step": 296}, {"loss": 1.7254, "grad_norm": 0.3993101716041565, "learning_rate": 0.0002, "epoch": 1.0135135135135136, "step": 300}, {"loss": 1.7985, "grad_norm": 0.45567989349365234, "learning_rate": 0.0002, "epoch": 1.0472972972972974, "step": 310}, {"loss": 1.757, "grad_norm": 0.3767794072628021, "learning_rate": 0.0002, "epoch": 1.0810810810810811, "step": 320}, {"loss": 1.7917, "grad_norm": 0.5181908011436462, "learning_rate": 0.0002, "epoch": 1.114864864864865, "step": 330}, {"loss": 1.7723, "grad_norm": 0.4213193356990814, "learning_rate": 0.0002, "epoch": 1.1486486486486487, "step": 340}, {"loss": 1.8203, "grad_norm": 0.45519495010375977, "learning_rate": 0.0002, "epoch": 1.1824324324324325, "step": 350}, {"loss": 1.6946, "grad_norm": 0.35332638025283813, "learning_rate": 0.0002, "epoch": 1.2162162162162162, "step": 360}, {"loss": 1.7541, "grad_norm": 0.3675481677055359, "learning_rate": 0.0002, "epoch": 1.25, "step": 370}, {"loss": 1.7458, "grad_norm": 0.4569270610809326, "learning_rate": 0.0002, "epoch": 1.2837837837837838, "step": 380}, {"loss": 1.7988, "grad_norm": 0.37950295209884644, "learning_rate": 0.0002, "epoch": 1.3175675675675675, "step": 390}, {"loss": 1.7032, "grad_norm": 0.5744572877883911, "learning_rate": 0.0002, "epoch": 1.3513513513513513, "step": 400}, {"loss": 1.7437, "grad_norm": 0.44380778074264526, "learning_rate": 0.0002, "epoch": 1.385135135135135, "step": 410}, {"loss": 1.7454, "grad_norm": 0.43328171968460083, "learning_rate": 0.0002, "epoch": 1.4189189189189189, "step": 420}, {"loss": 1.7636, "grad_norm": 0.41290056705474854, "learning_rate": 0.0002, "epoch": 1.4527027027027026, "step": 430}, {"loss": 1.7332, "grad_norm": 0.3771473169326782, "learning_rate": 0.0002, "epoch": 1.4864864864864864, "step": 440}, {"loss": 1.7618, "grad_norm": 0.42537811398506165, "learning_rate": 0.0002, "epoch": 1.5202702702702702, "step": 450}, {"loss": 1.8523, "grad_norm": 0.39705610275268555, "learning_rate": 0.0002, "epoch": 1.554054054054054, "step": 460}, {"loss": 1.7673, "grad_norm": 0.4178248643875122, "learning_rate": 0.0002, "epoch": 1.5878378378378377, "step": 470}, {"loss": 1.742, "grad_norm": 0.39107105135917664, "learning_rate": 0.0002, "epoch": 1.6216216216216215, "step": 480}, {"loss": 1.6984, "grad_norm": 0.38505619764328003, "learning_rate": 0.0002, "epoch": 1.6554054054054053, "step": 490}, {"loss": 1.7382, "grad_norm": 0.43590813875198364, "learning_rate": 0.0002, "epoch": 1.689189189189189, "step": 500}, {"loss": 1.7139, "grad_norm": 0.42785948514938354, "learning_rate": 0.0002, "epoch": 1.722972972972973, "step": 510}, {"loss": 1.7551, "grad_norm": 0.3829004168510437, "learning_rate": 0.0002, "epoch": 1.7567567567567568, "step": 520}, {"loss": 1.7744, "grad_norm": 0.35287904739379883, "learning_rate": 0.0002, "epoch": 1.7905405405405406, "step": 530}, {"loss": 1.7714, "grad_norm": 0.38657888770103455, "learning_rate": 0.0002, "epoch": 1.8243243243243243, "step": 540}, {"loss": 1.7535, "grad_norm": 0.41452157497406006, "learning_rate": 0.0002, "epoch": 1.8581081081081081, "step": 550}, {"loss": 1.7627, "grad_norm": 0.3898279070854187, "learning_rate": 0.0002, "epoch": 1.8918918918918919, "step": 560}, {"loss": 1.7494, "grad_norm": 0.4147624373435974, "learning_rate": 0.0002, "epoch": 1.9256756756756757, "step": 570}, {"loss": 1.7693, "grad_norm": 0.4374851584434509, "learning_rate": 0.0002, "epoch": 1.9594594594594594, "step": 580}, {"loss": 1.7796, "grad_norm": 0.48530328273773193, "learning_rate": 0.0002, "epoch": 1.9932432432432432, "step": 590}, {"eval_loss": 1.83539617061615, "eval_runtime": 70.9865, "eval_samples_per_second": 7.255, "eval_steps_per_second": 0.916, "epoch": 2.0, "step": 592}, {"loss": 1.7294, "grad_norm": 0.40344223380088806, "learning_rate": 0.0002, "epoch": 2.027027027027027, "step": 600}, {"loss": 1.6693, "grad_norm": 0.48268747329711914, "learning_rate": 0.0002, "epoch": 2.060810810810811, "step": 610}, {"loss": 1.6315, "grad_norm": 0.4675706923007965, "learning_rate": 0.0002, "epoch": 2.0945945945945947, "step": 620}, {"loss": 1.6627, "grad_norm": 0.47494322061538696, "learning_rate": 0.0002, "epoch": 2.1283783783783785, "step": 630}, {"loss": 1.5668, "grad_norm": 0.4555308520793915, "learning_rate": 0.0002, "epoch": 2.1621621621621623, "step": 640}, {"loss": 1.6537, "grad_norm": 0.43085595965385437, "learning_rate": 0.0002, "epoch": 2.195945945945946, "step": 650}, {"loss": 1.6316, "grad_norm": 0.4364128112792969, "learning_rate": 0.0002, "epoch": 2.22972972972973, "step": 660}, {"loss": 1.669, "grad_norm": 0.4711395800113678, "learning_rate": 0.0002, "epoch": 2.2635135135135136, "step": 670}, {"loss": 1.5758, "grad_norm": 0.5109705328941345, "learning_rate": 0.0002, "epoch": 2.2972972972972974, "step": 680}, {"loss": 1.5912, "grad_norm": 0.5185648798942566, "learning_rate": 0.0002, "epoch": 2.331081081081081, "step": 690}, {"loss": 1.6605, "grad_norm": 0.49192842841148376, "learning_rate": 0.0002, "epoch": 2.364864864864865, "step": 700}, {"loss": 1.6688, "grad_norm": 0.5619909763336182, "learning_rate": 0.0002, "epoch": 2.3986486486486487, "step": 710}, {"loss": 1.7836, "grad_norm": 0.4932861328125, "learning_rate": 0.0002, "epoch": 2.4324324324324325, "step": 720}, {"loss": 1.6532, "grad_norm": 0.5211932063102722, "learning_rate": 0.0002, "epoch": 2.4662162162162162, "step": 730}, {"loss": 1.667, "grad_norm": 0.4138050377368927, "learning_rate": 0.0002, "epoch": 2.5, "step": 740}, {"loss": 1.658, "grad_norm": 0.4644908010959625, "learning_rate": 0.0002, "epoch": 2.5337837837837838, "step": 750}, {"loss": 1.6451, "grad_norm": 0.4513227641582489, "learning_rate": 0.0002, "epoch": 2.5675675675675675, "step": 760}, {"loss": 1.7071, "grad_norm": 0.4735109508037567, "learning_rate": 0.0002, "epoch": 2.6013513513513513, "step": 770}, {"loss": 1.6659, "grad_norm": 0.5453559756278992, "learning_rate": 0.0002, "epoch": 2.635135135135135, "step": 780}, {"loss": 1.7211, "grad_norm": 0.5422565937042236, "learning_rate": 0.0002, "epoch": 2.668918918918919, "step": 790}, {"loss": 1.6623, "grad_norm": 0.4288518726825714, "learning_rate": 0.0002, "epoch": 2.7027027027027026, "step": 800}, {"loss": 1.7197, "grad_norm": 0.4085204005241394, "learning_rate": 0.0002, "epoch": 2.7364864864864864, "step": 810}, {"loss": 1.6376, "grad_norm": 0.49770182371139526, "learning_rate": 0.0002, "epoch": 2.77027027027027, "step": 820}, {"loss": 1.6332, "grad_norm": 0.5005106329917908, "learning_rate": 0.0002, "epoch": 2.804054054054054, "step": 830}, {"loss": 1.6675, "grad_norm": 0.4763440489768982, "learning_rate": 0.0002, "epoch": 2.8378378378378377, "step": 840}, {"loss": 1.7149, "grad_norm": 0.44995108246803284, "learning_rate": 0.0002, "epoch": 2.8716216216216215, "step": 850}, {"loss": 1.6438, "grad_norm": 0.5299676656723022, "learning_rate": 0.0002, "epoch": 2.9054054054054053, "step": 860}, {"loss": 1.6457, "grad_norm": 0.49627119302749634, "learning_rate": 0.0002, "epoch": 2.939189189189189, "step": 870}, {"loss": 1.6517, "grad_norm": 0.502545177936554, "learning_rate": 0.0002, "epoch": 2.972972972972973, "step": 880}, {"eval_loss": 1.8520468473434448, "eval_runtime": 70.9917, "eval_samples_per_second": 7.254, "eval_steps_per_second": 0.916, "epoch": 3.0, "step": 888}, {"loss": 1.6271, "grad_norm": 0.4756380319595337, "learning_rate": 0.0002, "epoch": 3.0067567567567566, "step": 890}, {"loss": 1.563, "grad_norm": 0.5167421102523804, "learning_rate": 0.0002, "epoch": 3.0405405405405403, "step": 900}, {"loss": 1.48, "grad_norm": 0.5524939298629761, "learning_rate": 0.0002, "epoch": 3.074324324324324, "step": 910}, {"loss": 1.5297, "grad_norm": 0.7045221924781799, "learning_rate": 0.0002, "epoch": 3.108108108108108, "step": 920}, {"loss": 1.5548, "grad_norm": 0.5692355036735535, "learning_rate": 0.0002, "epoch": 3.141891891891892, "step": 930}, {"loss": 1.5297, "grad_norm": 0.5467017292976379, "learning_rate": 0.0002, "epoch": 3.175675675675676, "step": 940}, {"loss": 1.5559, "grad_norm": 0.6004040241241455, "learning_rate": 0.0002, "epoch": 3.2094594594594597, "step": 950}, {"loss": 1.5255, "grad_norm": 0.5713295936584473, "learning_rate": 0.0002, "epoch": 3.2432432432432434, "step": 960}, {"loss": 1.5412, "grad_norm": 0.6054869890213013, "learning_rate": 0.0002, "epoch": 3.277027027027027, "step": 970}, {"loss": 1.5167, "grad_norm": 0.6304576992988586, "learning_rate": 0.0002, "epoch": 3.310810810810811, "step": 980}, {"loss": 1.52, "grad_norm": 0.5347281694412231, "learning_rate": 0.0002, "epoch": 3.3445945945945947, "step": 990}, {"loss": 1.5707, "grad_norm": 0.598211407661438, "learning_rate": 0.0002, "epoch": 3.3783783783783785, "step": 1000}, {"loss": 1.5243, "grad_norm": 0.637312650680542, "learning_rate": 0.0002, "epoch": 3.4121621621621623, "step": 1010}, {"loss": 1.5356, "grad_norm": 0.6092430949211121, "learning_rate": 0.0002, "epoch": 3.445945945945946, "step": 1020}, {"loss": 1.5856, "grad_norm": 0.6421037912368774, "learning_rate": 0.0002, "epoch": 3.47972972972973, "step": 1030}, {"loss": 1.5553, "grad_norm": 0.6712167263031006, "learning_rate": 0.0002, "epoch": 3.5135135135135136, "step": 1040}, {"loss": 1.4708, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 3.5472972972972974, "step": 1050}, {"loss": 1.5159, "grad_norm": 1.418167233467102, "learning_rate": 0.0002, "epoch": 3.581081081081081, "step": 1060}, {"loss": 1.5264, "grad_norm": 0.6092377305030823, "learning_rate": 0.0002, "epoch": 3.614864864864865, "step": 1070}, {"loss": 1.5227, "grad_norm": 0.5632478594779968, "learning_rate": 0.0002, "epoch": 3.6486486486486487, "step": 1080}, {"loss": 1.5492, "grad_norm": 0.6007736921310425, "learning_rate": 0.0002, "epoch": 3.6824324324324325, "step": 1090}, {"loss": 1.5002, "grad_norm": 0.6031264066696167, "learning_rate": 0.0002, "epoch": 3.7162162162162162, "step": 1100}, {"loss": 1.4797, "grad_norm": 0.5440598726272583, "learning_rate": 0.0002, "epoch": 3.75, "step": 1110}, {"loss": 1.5743, "grad_norm": 0.6304370760917664, "learning_rate": 0.0002, "epoch": 3.7837837837837838, "step": 1120}, {"loss": 1.6429, "grad_norm": 0.6729280948638916, "learning_rate": 0.0002, "epoch": 3.8175675675675675, "step": 1130}, {"loss": 1.594, "grad_norm": 0.5881901979446411, "learning_rate": 0.0002, "epoch": 3.8513513513513513, "step": 1140}, {"loss": 1.5008, "grad_norm": 0.5508038997650146, "learning_rate": 0.0002, "epoch": 3.885135135135135, "step": 1150}, {"loss": 1.5045, "grad_norm": 0.5926295518875122, "learning_rate": 0.0002, "epoch": 3.918918918918919, "step": 1160}, {"loss": 1.5223, "grad_norm": 0.5882043838500977, "learning_rate": 0.0002, "epoch": 3.9527027027027026, "step": 1170}, {"loss": 1.5874, "grad_norm": 0.604119598865509, "learning_rate": 0.0002, "epoch": 3.9864864864864864, "step": 1180}, {"eval_loss": 1.8923152685165405, "eval_runtime": 62.9577, "eval_samples_per_second": 8.18, "eval_steps_per_second": 1.032, "epoch": 4.0, "step": 1184}, {"loss": 1.4183, "grad_norm": 0.696061909198761, "learning_rate": 0.0002, "epoch": 4.02027027027027, "step": 1190}, {"loss": 1.3455, "grad_norm": 0.751200258731842, "learning_rate": 0.0002, "epoch": 4.054054054054054, "step": 1200}, {"loss": 1.35, "grad_norm": 0.9667422771453857, "learning_rate": 0.0002, "epoch": 4.087837837837838, "step": 1210}, {"loss": 1.4058, "grad_norm": 0.7374204397201538, "learning_rate": 0.0002, "epoch": 4.121621621621622, "step": 1220}, {"loss": 1.4454, "grad_norm": 0.8050723075866699, "learning_rate": 0.0002, "epoch": 4.155405405405405, "step": 1230}, {"loss": 1.4129, "grad_norm": 0.7360416054725647, "learning_rate": 0.0002, "epoch": 4.1891891891891895, "step": 1240}, {"loss": 1.3899, "grad_norm": 0.7947028279304504, "learning_rate": 0.0002, "epoch": 4.222972972972973, "step": 1250}, {"loss": 1.4264, "grad_norm": 0.7336545586585999, "learning_rate": 0.0002, "epoch": 4.256756756756757, "step": 1260}, {"loss": 1.4047, "grad_norm": 0.7051223516464233, "learning_rate": 0.0002, "epoch": 4.29054054054054, "step": 1270}, {"loss": 1.3507, "grad_norm": 0.7939404845237732, "learning_rate": 0.0002, "epoch": 4.324324324324325, "step": 1280}, {"loss": 1.387, "grad_norm": 0.7818657755851746, "learning_rate": 0.0002, "epoch": 4.358108108108108, "step": 1290}, {"loss": 1.3533, "grad_norm": 0.7490634918212891, "learning_rate": 0.0002, "epoch": 4.391891891891892, "step": 1300}, {"loss": 1.3912, "grad_norm": 0.9319770932197571, "learning_rate": 0.0002, "epoch": 4.425675675675675, "step": 1310}, {"loss": 1.439, "grad_norm": 0.7811282873153687, "learning_rate": 0.0002, "epoch": 4.45945945945946, "step": 1320}, {"loss": 1.3973, "grad_norm": 0.7785378694534302, "learning_rate": 0.0002, "epoch": 4.493243243243243, "step": 1330}, {"loss": 1.3931, "grad_norm": 0.8697562217712402, "learning_rate": 0.0002, "epoch": 4.527027027027027, "step": 1340}, {"loss": 1.3846, "grad_norm": 0.7927497625350952, "learning_rate": 0.0002, "epoch": 4.5608108108108105, "step": 1350}, {"loss": 1.3188, "grad_norm": 0.9746347665786743, "learning_rate": 0.0002, "epoch": 4.594594594594595, "step": 1360}, {"loss": 1.4611, "grad_norm": 0.7353375554084778, "learning_rate": 0.0002, "epoch": 4.628378378378378, "step": 1370}, {"loss": 1.4067, "grad_norm": 0.8139469027519226, "learning_rate": 0.0002, "epoch": 4.662162162162162, "step": 1380}, {"loss": 1.3727, "grad_norm": 1.728020429611206, "learning_rate": 0.0002, "epoch": 4.695945945945946, "step": 1390}, {"loss": 1.3971, "grad_norm": 0.8249040246009827, "learning_rate": 0.0002, "epoch": 4.72972972972973, "step": 1400}, {"loss": 1.4238, "grad_norm": 0.7916110157966614, "learning_rate": 0.0002, "epoch": 4.763513513513513, "step": 1410}, {"loss": 1.4064, "grad_norm": 0.7286198735237122, "learning_rate": 0.0002, "epoch": 4.797297297297297, "step": 1420}, {"loss": 1.305, "grad_norm": 0.7969672083854675, "learning_rate": 0.0002, "epoch": 4.831081081081081, "step": 1430}, {"loss": 1.4109, "grad_norm": 0.9593119621276855, "learning_rate": 0.0002, "epoch": 4.864864864864865, "step": 1440}, {"loss": 1.4112, "grad_norm": 0.8609084486961365, "learning_rate": 0.0002, "epoch": 4.898648648648649, "step": 1450}, {"loss": 1.3126, "grad_norm": 0.705203115940094, "learning_rate": 0.0002, "epoch": 4.9324324324324325, "step": 1460}, {"loss": 1.4226, "grad_norm": 0.9503173232078552, "learning_rate": 0.0002, "epoch": 4.966216216216216, "step": 1470}, {"loss": 1.4457, "grad_norm": 0.7174800038337708, "learning_rate": 0.0002, "epoch": 5.0, "step": 1480}, {"eval_loss": 1.9753261804580688, "eval_runtime": 70.4622, "eval_samples_per_second": 7.309, "eval_steps_per_second": 0.922, "epoch": 5.0, "step": 1480}, {"loss": 1.2986, "grad_norm": 1.450723648071289, "learning_rate": 0.0002, "epoch": 5.033783783783784, "step": 1490}, {"loss": 1.2184, "grad_norm": 0.9207791686058044, "learning_rate": 0.0002, "epoch": 5.0675675675675675, "step": 1500}, {"loss": 1.1628, "grad_norm": 1.0742532014846802, "learning_rate": 0.0002, "epoch": 5.101351351351352, "step": 1510}, {"loss": 1.2221, "grad_norm": 1.1070902347564697, "learning_rate": 0.0002, "epoch": 5.135135135135135, "step": 1520}, {"loss": 1.1737, "grad_norm": 0.9838612079620361, "learning_rate": 0.0002, "epoch": 5.168918918918919, "step": 1530}, {"loss": 1.2095, "grad_norm": 0.9286013245582581, "learning_rate": 0.0002, "epoch": 5.202702702702703, "step": 1540}, {"loss": 1.2243, "grad_norm": 0.9755229949951172, "learning_rate": 0.0002, "epoch": 5.236486486486487, "step": 1550}, {"loss": 1.1729, "grad_norm": 0.9734522104263306, "learning_rate": 0.0002, "epoch": 5.27027027027027, "step": 1560}, {"loss": 1.1528, "grad_norm": 1.1838241815567017, "learning_rate": 0.0002, "epoch": 5.304054054054054, "step": 1570}, {"loss": 1.2135, "grad_norm": 1.1389052867889404, "learning_rate": 0.0002, "epoch": 5.337837837837838, "step": 1580}, {"loss": 1.2486, "grad_norm": 1.2093408107757568, "learning_rate": 0.0002, "epoch": 5.371621621621622, "step": 1590}, {"loss": 1.2017, "grad_norm": 0.9418244361877441, "learning_rate": 0.0002, "epoch": 5.405405405405405, "step": 1600}, {"loss": 1.2554, "grad_norm": 0.9843172430992126, "learning_rate": 0.0002, "epoch": 5.4391891891891895, "step": 1610}, {"loss": 1.2967, "grad_norm": 1.0316557884216309, "learning_rate": 0.0002, "epoch": 5.472972972972973, "step": 1620}, {"loss": 1.2509, "grad_norm": 1.0008920431137085, "learning_rate": 0.0002, "epoch": 5.506756756756757, "step": 1630}, {"loss": 1.2908, "grad_norm": 1.1854851245880127, "learning_rate": 0.0002, "epoch": 5.54054054054054, "step": 1640}, {"loss": 1.2679, "grad_norm": 0.9324101209640503, "learning_rate": 0.0002, "epoch": 5.574324324324325, "step": 1650}, {"loss": 1.202, "grad_norm": 0.993882954120636, "learning_rate": 0.0002, "epoch": 5.608108108108108, "step": 1660}, {"loss": 1.2498, "grad_norm": 0.8795919418334961, "learning_rate": 0.0002, "epoch": 5.641891891891892, "step": 1670}, {"loss": 1.2244, "grad_norm": 1.203471064567566, "learning_rate": 0.0002, "epoch": 5.675675675675675, "step": 1680}, {"loss": 1.2452, "grad_norm": 0.916689932346344, "learning_rate": 0.0002, "epoch": 5.70945945945946, "step": 1690}, {"loss": 1.1774, "grad_norm": 0.8567600846290588, "learning_rate": 0.0002, "epoch": 5.743243243243243, "step": 1700}, {"loss": 1.2585, "grad_norm": 0.9426271319389343, "learning_rate": 0.0002, "epoch": 5.777027027027027, "step": 1710}, {"loss": 1.239, "grad_norm": 1.0812019109725952, "learning_rate": 0.0002, "epoch": 5.8108108108108105, "step": 1720}, {"loss": 1.1527, "grad_norm": 1.0045292377471924, "learning_rate": 0.0002, "epoch": 5.844594594594595, "step": 1730}, {"loss": 1.2949, "grad_norm": 1.0750256776809692, "learning_rate": 0.0002, "epoch": 5.878378378378378, "step": 1740}, {"loss": 1.3052, "grad_norm": 1.0471885204315186, "learning_rate": 0.0002, "epoch": 5.912162162162162, "step": 1750}, {"loss": 1.2318, "grad_norm": 0.9119327664375305, "learning_rate": 0.0002, "epoch": 5.945945945945946, "step": 1760}, {"loss": 1.2652, "grad_norm": 1.0975338220596313, "learning_rate": 0.0002, "epoch": 5.97972972972973, "step": 1770}]} +{"epoch": 7.0, "step": 2072, "epoch_duration": 611.8136160373688, "total_accumulated_duration": 4238.047048330307, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13688.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}, {"eval_loss": 1.8428829908370972, "eval_runtime": 62.3963, "eval_samples_per_second": 8.254, "eval_steps_per_second": 1.042, "epoch": 1.0, "step": 296}, {"loss": 1.7254, "grad_norm": 0.3993101716041565, "learning_rate": 0.0002, "epoch": 1.0135135135135136, "step": 300}, {"loss": 1.7985, "grad_norm": 0.45567989349365234, "learning_rate": 0.0002, "epoch": 1.0472972972972974, "step": 310}, {"loss": 1.757, "grad_norm": 0.3767794072628021, "learning_rate": 0.0002, "epoch": 1.0810810810810811, "step": 320}, {"loss": 1.7917, "grad_norm": 0.5181908011436462, "learning_rate": 0.0002, "epoch": 1.114864864864865, "step": 330}, {"loss": 1.7723, "grad_norm": 0.4213193356990814, "learning_rate": 0.0002, "epoch": 1.1486486486486487, "step": 340}, {"loss": 1.8203, "grad_norm": 0.45519495010375977, "learning_rate": 0.0002, "epoch": 1.1824324324324325, "step": 350}, {"loss": 1.6946, "grad_norm": 0.35332638025283813, "learning_rate": 0.0002, "epoch": 1.2162162162162162, "step": 360}, {"loss": 1.7541, "grad_norm": 0.3675481677055359, "learning_rate": 0.0002, "epoch": 1.25, "step": 370}, {"loss": 1.7458, "grad_norm": 0.4569270610809326, "learning_rate": 0.0002, "epoch": 1.2837837837837838, "step": 380}, {"loss": 1.7988, "grad_norm": 0.37950295209884644, "learning_rate": 0.0002, "epoch": 1.3175675675675675, "step": 390}, {"loss": 1.7032, "grad_norm": 0.5744572877883911, "learning_rate": 0.0002, "epoch": 1.3513513513513513, "step": 400}, {"loss": 1.7437, "grad_norm": 0.44380778074264526, "learning_rate": 0.0002, "epoch": 1.385135135135135, "step": 410}, {"loss": 1.7454, "grad_norm": 0.43328171968460083, "learning_rate": 0.0002, "epoch": 1.4189189189189189, "step": 420}, {"loss": 1.7636, "grad_norm": 0.41290056705474854, "learning_rate": 0.0002, "epoch": 1.4527027027027026, "step": 430}, {"loss": 1.7332, "grad_norm": 0.3771473169326782, "learning_rate": 0.0002, "epoch": 1.4864864864864864, "step": 440}, {"loss": 1.7618, "grad_norm": 0.42537811398506165, "learning_rate": 0.0002, "epoch": 1.5202702702702702, "step": 450}, {"loss": 1.8523, "grad_norm": 0.39705610275268555, "learning_rate": 0.0002, "epoch": 1.554054054054054, "step": 460}, {"loss": 1.7673, "grad_norm": 0.4178248643875122, "learning_rate": 0.0002, "epoch": 1.5878378378378377, "step": 470}, {"loss": 1.742, "grad_norm": 0.39107105135917664, "learning_rate": 0.0002, "epoch": 1.6216216216216215, "step": 480}, {"loss": 1.6984, "grad_norm": 0.38505619764328003, "learning_rate": 0.0002, "epoch": 1.6554054054054053, "step": 490}, {"loss": 1.7382, "grad_norm": 0.43590813875198364, "learning_rate": 0.0002, "epoch": 1.689189189189189, "step": 500}, {"loss": 1.7139, "grad_norm": 0.42785948514938354, "learning_rate": 0.0002, "epoch": 1.722972972972973, "step": 510}, {"loss": 1.7551, "grad_norm": 0.3829004168510437, "learning_rate": 0.0002, "epoch": 1.7567567567567568, "step": 520}, {"loss": 1.7744, "grad_norm": 0.35287904739379883, "learning_rate": 0.0002, "epoch": 1.7905405405405406, "step": 530}, {"loss": 1.7714, "grad_norm": 0.38657888770103455, "learning_rate": 0.0002, "epoch": 1.8243243243243243, "step": 540}, {"loss": 1.7535, "grad_norm": 0.41452157497406006, "learning_rate": 0.0002, "epoch": 1.8581081081081081, "step": 550}, {"loss": 1.7627, "grad_norm": 0.3898279070854187, "learning_rate": 0.0002, "epoch": 1.8918918918918919, "step": 560}, {"loss": 1.7494, "grad_norm": 0.4147624373435974, "learning_rate": 0.0002, "epoch": 1.9256756756756757, "step": 570}, {"loss": 1.7693, "grad_norm": 0.4374851584434509, "learning_rate": 0.0002, "epoch": 1.9594594594594594, "step": 580}, {"loss": 1.7796, "grad_norm": 0.48530328273773193, "learning_rate": 0.0002, "epoch": 1.9932432432432432, "step": 590}, {"eval_loss": 1.83539617061615, "eval_runtime": 70.9865, "eval_samples_per_second": 7.255, "eval_steps_per_second": 0.916, "epoch": 2.0, "step": 592}, {"loss": 1.7294, "grad_norm": 0.40344223380088806, "learning_rate": 0.0002, "epoch": 2.027027027027027, "step": 600}, {"loss": 1.6693, "grad_norm": 0.48268747329711914, "learning_rate": 0.0002, "epoch": 2.060810810810811, "step": 610}, {"loss": 1.6315, "grad_norm": 0.4675706923007965, "learning_rate": 0.0002, "epoch": 2.0945945945945947, "step": 620}, {"loss": 1.6627, "grad_norm": 0.47494322061538696, "learning_rate": 0.0002, "epoch": 2.1283783783783785, "step": 630}, {"loss": 1.5668, "grad_norm": 0.4555308520793915, "learning_rate": 0.0002, "epoch": 2.1621621621621623, "step": 640}, {"loss": 1.6537, "grad_norm": 0.43085595965385437, "learning_rate": 0.0002, "epoch": 2.195945945945946, "step": 650}, {"loss": 1.6316, "grad_norm": 0.4364128112792969, "learning_rate": 0.0002, "epoch": 2.22972972972973, "step": 660}, {"loss": 1.669, "grad_norm": 0.4711395800113678, "learning_rate": 0.0002, "epoch": 2.2635135135135136, "step": 670}, {"loss": 1.5758, "grad_norm": 0.5109705328941345, "learning_rate": 0.0002, "epoch": 2.2972972972972974, "step": 680}, {"loss": 1.5912, "grad_norm": 0.5185648798942566, "learning_rate": 0.0002, "epoch": 2.331081081081081, "step": 690}, {"loss": 1.6605, "grad_norm": 0.49192842841148376, "learning_rate": 0.0002, "epoch": 2.364864864864865, "step": 700}, {"loss": 1.6688, "grad_norm": 0.5619909763336182, "learning_rate": 0.0002, "epoch": 2.3986486486486487, "step": 710}, {"loss": 1.7836, "grad_norm": 0.4932861328125, "learning_rate": 0.0002, "epoch": 2.4324324324324325, "step": 720}, {"loss": 1.6532, "grad_norm": 0.5211932063102722, "learning_rate": 0.0002, "epoch": 2.4662162162162162, "step": 730}, {"loss": 1.667, "grad_norm": 0.4138050377368927, "learning_rate": 0.0002, "epoch": 2.5, "step": 740}, {"loss": 1.658, "grad_norm": 0.4644908010959625, "learning_rate": 0.0002, "epoch": 2.5337837837837838, "step": 750}, {"loss": 1.6451, "grad_norm": 0.4513227641582489, "learning_rate": 0.0002, "epoch": 2.5675675675675675, "step": 760}, {"loss": 1.7071, "grad_norm": 0.4735109508037567, "learning_rate": 0.0002, "epoch": 2.6013513513513513, "step": 770}, {"loss": 1.6659, "grad_norm": 0.5453559756278992, "learning_rate": 0.0002, "epoch": 2.635135135135135, "step": 780}, {"loss": 1.7211, "grad_norm": 0.5422565937042236, "learning_rate": 0.0002, "epoch": 2.668918918918919, "step": 790}, {"loss": 1.6623, "grad_norm": 0.4288518726825714, "learning_rate": 0.0002, "epoch": 2.7027027027027026, "step": 800}, {"loss": 1.7197, "grad_norm": 0.4085204005241394, "learning_rate": 0.0002, "epoch": 2.7364864864864864, "step": 810}, {"loss": 1.6376, "grad_norm": 0.49770182371139526, "learning_rate": 0.0002, "epoch": 2.77027027027027, "step": 820}, {"loss": 1.6332, "grad_norm": 0.5005106329917908, "learning_rate": 0.0002, "epoch": 2.804054054054054, "step": 830}, {"loss": 1.6675, "grad_norm": 0.4763440489768982, "learning_rate": 0.0002, "epoch": 2.8378378378378377, "step": 840}, {"loss": 1.7149, "grad_norm": 0.44995108246803284, "learning_rate": 0.0002, "epoch": 2.8716216216216215, "step": 850}, {"loss": 1.6438, "grad_norm": 0.5299676656723022, "learning_rate": 0.0002, "epoch": 2.9054054054054053, "step": 860}, {"loss": 1.6457, "grad_norm": 0.49627119302749634, "learning_rate": 0.0002, "epoch": 2.939189189189189, "step": 870}, {"loss": 1.6517, "grad_norm": 0.502545177936554, "learning_rate": 0.0002, "epoch": 2.972972972972973, "step": 880}, {"eval_loss": 1.8520468473434448, "eval_runtime": 70.9917, "eval_samples_per_second": 7.254, "eval_steps_per_second": 0.916, "epoch": 3.0, "step": 888}, {"loss": 1.6271, "grad_norm": 0.4756380319595337, "learning_rate": 0.0002, "epoch": 3.0067567567567566, "step": 890}, {"loss": 1.563, "grad_norm": 0.5167421102523804, "learning_rate": 0.0002, "epoch": 3.0405405405405403, "step": 900}, {"loss": 1.48, "grad_norm": 0.5524939298629761, "learning_rate": 0.0002, "epoch": 3.074324324324324, "step": 910}, {"loss": 1.5297, "grad_norm": 0.7045221924781799, "learning_rate": 0.0002, "epoch": 3.108108108108108, "step": 920}, {"loss": 1.5548, "grad_norm": 0.5692355036735535, "learning_rate": 0.0002, "epoch": 3.141891891891892, "step": 930}, {"loss": 1.5297, "grad_norm": 0.5467017292976379, "learning_rate": 0.0002, "epoch": 3.175675675675676, "step": 940}, {"loss": 1.5559, "grad_norm": 0.6004040241241455, "learning_rate": 0.0002, "epoch": 3.2094594594594597, "step": 950}, {"loss": 1.5255, "grad_norm": 0.5713295936584473, "learning_rate": 0.0002, "epoch": 3.2432432432432434, "step": 960}, {"loss": 1.5412, "grad_norm": 0.6054869890213013, "learning_rate": 0.0002, "epoch": 3.277027027027027, "step": 970}, {"loss": 1.5167, "grad_norm": 0.6304576992988586, "learning_rate": 0.0002, "epoch": 3.310810810810811, "step": 980}, {"loss": 1.52, "grad_norm": 0.5347281694412231, "learning_rate": 0.0002, "epoch": 3.3445945945945947, "step": 990}, {"loss": 1.5707, "grad_norm": 0.598211407661438, "learning_rate": 0.0002, "epoch": 3.3783783783783785, "step": 1000}, {"loss": 1.5243, "grad_norm": 0.637312650680542, "learning_rate": 0.0002, "epoch": 3.4121621621621623, "step": 1010}, {"loss": 1.5356, "grad_norm": 0.6092430949211121, "learning_rate": 0.0002, "epoch": 3.445945945945946, "step": 1020}, {"loss": 1.5856, "grad_norm": 0.6421037912368774, "learning_rate": 0.0002, "epoch": 3.47972972972973, "step": 1030}, {"loss": 1.5553, "grad_norm": 0.6712167263031006, "learning_rate": 0.0002, "epoch": 3.5135135135135136, "step": 1040}, {"loss": 1.4708, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 3.5472972972972974, "step": 1050}, {"loss": 1.5159, "grad_norm": 1.418167233467102, "learning_rate": 0.0002, "epoch": 3.581081081081081, "step": 1060}, {"loss": 1.5264, "grad_norm": 0.6092377305030823, "learning_rate": 0.0002, "epoch": 3.614864864864865, "step": 1070}, {"loss": 1.5227, "grad_norm": 0.5632478594779968, "learning_rate": 0.0002, "epoch": 3.6486486486486487, "step": 1080}, {"loss": 1.5492, "grad_norm": 0.6007736921310425, "learning_rate": 0.0002, "epoch": 3.6824324324324325, "step": 1090}, {"loss": 1.5002, "grad_norm": 0.6031264066696167, "learning_rate": 0.0002, "epoch": 3.7162162162162162, "step": 1100}, {"loss": 1.4797, "grad_norm": 0.5440598726272583, "learning_rate": 0.0002, "epoch": 3.75, "step": 1110}, {"loss": 1.5743, "grad_norm": 0.6304370760917664, "learning_rate": 0.0002, "epoch": 3.7837837837837838, "step": 1120}, {"loss": 1.6429, "grad_norm": 0.6729280948638916, "learning_rate": 0.0002, "epoch": 3.8175675675675675, "step": 1130}, {"loss": 1.594, "grad_norm": 0.5881901979446411, "learning_rate": 0.0002, "epoch": 3.8513513513513513, "step": 1140}, {"loss": 1.5008, "grad_norm": 0.5508038997650146, "learning_rate": 0.0002, "epoch": 3.885135135135135, "step": 1150}, {"loss": 1.5045, "grad_norm": 0.5926295518875122, "learning_rate": 0.0002, "epoch": 3.918918918918919, "step": 1160}, {"loss": 1.5223, "grad_norm": 0.5882043838500977, "learning_rate": 0.0002, "epoch": 3.9527027027027026, "step": 1170}, {"loss": 1.5874, "grad_norm": 0.604119598865509, "learning_rate": 0.0002, "epoch": 3.9864864864864864, "step": 1180}, {"eval_loss": 1.8923152685165405, "eval_runtime": 62.9577, "eval_samples_per_second": 8.18, "eval_steps_per_second": 1.032, "epoch": 4.0, "step": 1184}, {"loss": 1.4183, "grad_norm": 0.696061909198761, "learning_rate": 0.0002, "epoch": 4.02027027027027, "step": 1190}, {"loss": 1.3455, "grad_norm": 0.751200258731842, "learning_rate": 0.0002, "epoch": 4.054054054054054, "step": 1200}, {"loss": 1.35, "grad_norm": 0.9667422771453857, "learning_rate": 0.0002, "epoch": 4.087837837837838, "step": 1210}, {"loss": 1.4058, "grad_norm": 0.7374204397201538, "learning_rate": 0.0002, "epoch": 4.121621621621622, "step": 1220}, {"loss": 1.4454, "grad_norm": 0.8050723075866699, "learning_rate": 0.0002, "epoch": 4.155405405405405, "step": 1230}, {"loss": 1.4129, "grad_norm": 0.7360416054725647, "learning_rate": 0.0002, "epoch": 4.1891891891891895, "step": 1240}, {"loss": 1.3899, "grad_norm": 0.7947028279304504, "learning_rate": 0.0002, "epoch": 4.222972972972973, "step": 1250}, {"loss": 1.4264, "grad_norm": 0.7336545586585999, "learning_rate": 0.0002, "epoch": 4.256756756756757, "step": 1260}, {"loss": 1.4047, "grad_norm": 0.7051223516464233, "learning_rate": 0.0002, "epoch": 4.29054054054054, "step": 1270}, {"loss": 1.3507, "grad_norm": 0.7939404845237732, "learning_rate": 0.0002, "epoch": 4.324324324324325, "step": 1280}, {"loss": 1.387, "grad_norm": 0.7818657755851746, "learning_rate": 0.0002, "epoch": 4.358108108108108, "step": 1290}, {"loss": 1.3533, "grad_norm": 0.7490634918212891, "learning_rate": 0.0002, "epoch": 4.391891891891892, "step": 1300}, {"loss": 1.3912, "grad_norm": 0.9319770932197571, "learning_rate": 0.0002, "epoch": 4.425675675675675, "step": 1310}, {"loss": 1.439, "grad_norm": 0.7811282873153687, "learning_rate": 0.0002, "epoch": 4.45945945945946, "step": 1320}, {"loss": 1.3973, "grad_norm": 0.7785378694534302, "learning_rate": 0.0002, "epoch": 4.493243243243243, "step": 1330}, {"loss": 1.3931, "grad_norm": 0.8697562217712402, "learning_rate": 0.0002, "epoch": 4.527027027027027, "step": 1340}, {"loss": 1.3846, "grad_norm": 0.7927497625350952, "learning_rate": 0.0002, "epoch": 4.5608108108108105, "step": 1350}, {"loss": 1.3188, "grad_norm": 0.9746347665786743, "learning_rate": 0.0002, "epoch": 4.594594594594595, "step": 1360}, {"loss": 1.4611, "grad_norm": 0.7353375554084778, "learning_rate": 0.0002, "epoch": 4.628378378378378, "step": 1370}, {"loss": 1.4067, "grad_norm": 0.8139469027519226, "learning_rate": 0.0002, "epoch": 4.662162162162162, "step": 1380}, {"loss": 1.3727, "grad_norm": 1.728020429611206, "learning_rate": 0.0002, "epoch": 4.695945945945946, "step": 1390}, {"loss": 1.3971, "grad_norm": 0.8249040246009827, "learning_rate": 0.0002, "epoch": 4.72972972972973, "step": 1400}, {"loss": 1.4238, "grad_norm": 0.7916110157966614, "learning_rate": 0.0002, "epoch": 4.763513513513513, "step": 1410}, {"loss": 1.4064, "grad_norm": 0.7286198735237122, "learning_rate": 0.0002, "epoch": 4.797297297297297, "step": 1420}, {"loss": 1.305, "grad_norm": 0.7969672083854675, "learning_rate": 0.0002, "epoch": 4.831081081081081, "step": 1430}, {"loss": 1.4109, "grad_norm": 0.9593119621276855, "learning_rate": 0.0002, "epoch": 4.864864864864865, "step": 1440}, {"loss": 1.4112, "grad_norm": 0.8609084486961365, "learning_rate": 0.0002, "epoch": 4.898648648648649, "step": 1450}, {"loss": 1.3126, "grad_norm": 0.705203115940094, "learning_rate": 0.0002, "epoch": 4.9324324324324325, "step": 1460}, {"loss": 1.4226, "grad_norm": 0.9503173232078552, "learning_rate": 0.0002, "epoch": 4.966216216216216, "step": 1470}, {"loss": 1.4457, "grad_norm": 0.7174800038337708, "learning_rate": 0.0002, "epoch": 5.0, "step": 1480}, {"eval_loss": 1.9753261804580688, "eval_runtime": 70.4622, "eval_samples_per_second": 7.309, "eval_steps_per_second": 0.922, "epoch": 5.0, "step": 1480}, {"loss": 1.2986, "grad_norm": 1.450723648071289, "learning_rate": 0.0002, "epoch": 5.033783783783784, "step": 1490}, {"loss": 1.2184, "grad_norm": 0.9207791686058044, "learning_rate": 0.0002, "epoch": 5.0675675675675675, "step": 1500}, {"loss": 1.1628, "grad_norm": 1.0742532014846802, "learning_rate": 0.0002, "epoch": 5.101351351351352, "step": 1510}, {"loss": 1.2221, "grad_norm": 1.1070902347564697, "learning_rate": 0.0002, "epoch": 5.135135135135135, "step": 1520}, {"loss": 1.1737, "grad_norm": 0.9838612079620361, "learning_rate": 0.0002, "epoch": 5.168918918918919, "step": 1530}, {"loss": 1.2095, "grad_norm": 0.9286013245582581, "learning_rate": 0.0002, "epoch": 5.202702702702703, "step": 1540}, {"loss": 1.2243, "grad_norm": 0.9755229949951172, "learning_rate": 0.0002, "epoch": 5.236486486486487, "step": 1550}, {"loss": 1.1729, "grad_norm": 0.9734522104263306, "learning_rate": 0.0002, "epoch": 5.27027027027027, "step": 1560}, {"loss": 1.1528, "grad_norm": 1.1838241815567017, "learning_rate": 0.0002, "epoch": 5.304054054054054, "step": 1570}, {"loss": 1.2135, "grad_norm": 1.1389052867889404, "learning_rate": 0.0002, "epoch": 5.337837837837838, "step": 1580}, {"loss": 1.2486, "grad_norm": 1.2093408107757568, "learning_rate": 0.0002, "epoch": 5.371621621621622, "step": 1590}, {"loss": 1.2017, "grad_norm": 0.9418244361877441, "learning_rate": 0.0002, "epoch": 5.405405405405405, "step": 1600}, {"loss": 1.2554, "grad_norm": 0.9843172430992126, "learning_rate": 0.0002, "epoch": 5.4391891891891895, "step": 1610}, {"loss": 1.2967, "grad_norm": 1.0316557884216309, "learning_rate": 0.0002, "epoch": 5.472972972972973, "step": 1620}, {"loss": 1.2509, "grad_norm": 1.0008920431137085, "learning_rate": 0.0002, "epoch": 5.506756756756757, "step": 1630}, {"loss": 1.2908, "grad_norm": 1.1854851245880127, "learning_rate": 0.0002, "epoch": 5.54054054054054, "step": 1640}, {"loss": 1.2679, "grad_norm": 0.9324101209640503, "learning_rate": 0.0002, "epoch": 5.574324324324325, "step": 1650}, {"loss": 1.202, "grad_norm": 0.993882954120636, "learning_rate": 0.0002, "epoch": 5.608108108108108, "step": 1660}, {"loss": 1.2498, "grad_norm": 0.8795919418334961, "learning_rate": 0.0002, "epoch": 5.641891891891892, "step": 1670}, {"loss": 1.2244, "grad_norm": 1.203471064567566, "learning_rate": 0.0002, "epoch": 5.675675675675675, "step": 1680}, {"loss": 1.2452, "grad_norm": 0.916689932346344, "learning_rate": 0.0002, "epoch": 5.70945945945946, "step": 1690}, {"loss": 1.1774, "grad_norm": 0.8567600846290588, "learning_rate": 0.0002, "epoch": 5.743243243243243, "step": 1700}, {"loss": 1.2585, "grad_norm": 0.9426271319389343, "learning_rate": 0.0002, "epoch": 5.777027027027027, "step": 1710}, {"loss": 1.239, "grad_norm": 1.0812019109725952, "learning_rate": 0.0002, "epoch": 5.8108108108108105, "step": 1720}, {"loss": 1.1527, "grad_norm": 1.0045292377471924, "learning_rate": 0.0002, "epoch": 5.844594594594595, "step": 1730}, {"loss": 1.2949, "grad_norm": 1.0750256776809692, "learning_rate": 0.0002, "epoch": 5.878378378378378, "step": 1740}, {"loss": 1.3052, "grad_norm": 1.0471885204315186, "learning_rate": 0.0002, "epoch": 5.912162162162162, "step": 1750}, {"loss": 1.2318, "grad_norm": 0.9119327664375305, "learning_rate": 0.0002, "epoch": 5.945945945945946, "step": 1760}, {"loss": 1.2652, "grad_norm": 1.0975338220596313, "learning_rate": 0.0002, "epoch": 5.97972972972973, "step": 1770}, {"eval_loss": 2.104356527328491, "eval_runtime": 71.0586, "eval_samples_per_second": 7.248, "eval_steps_per_second": 0.915, "epoch": 6.0, "step": 1776}, {"loss": 1.1342, "grad_norm": 0.9404756426811218, "learning_rate": 0.0002, "epoch": 6.013513513513513, "step": 1780}, {"loss": 0.9892, "grad_norm": 1.3757696151733398, "learning_rate": 0.0002, "epoch": 6.047297297297297, "step": 1790}, {"loss": 1.0826, "grad_norm": 1.5798641443252563, "learning_rate": 0.0002, "epoch": 6.081081081081081, "step": 1800}, {"loss": 0.9929, "grad_norm": 1.3777581453323364, "learning_rate": 0.0002, "epoch": 6.114864864864865, "step": 1810}, {"loss": 1.0257, "grad_norm": 1.136362910270691, "learning_rate": 0.0002, "epoch": 6.148648648648648, "step": 1820}, {"loss": 1.0623, "grad_norm": 1.3719290494918823, "learning_rate": 0.0002, "epoch": 6.1824324324324325, "step": 1830}, {"loss": 1.0144, "grad_norm": 1.375697374343872, "learning_rate": 0.0002, "epoch": 6.216216216216216, "step": 1840}, {"loss": 1.0307, "grad_norm": 1.3208998441696167, "learning_rate": 0.0002, "epoch": 6.25, "step": 1850}, {"loss": 1.0298, "grad_norm": 1.3176994323730469, "learning_rate": 0.0002, "epoch": 6.283783783783784, "step": 1860}, {"loss": 1.0809, "grad_norm": 1.3333075046539307, "learning_rate": 0.0002, "epoch": 6.3175675675675675, "step": 1870}, {"loss": 1.1059, "grad_norm": 1.6315182447433472, "learning_rate": 0.0002, "epoch": 6.351351351351352, "step": 1880}, {"loss": 1.0139, "grad_norm": 1.1802350282669067, "learning_rate": 0.0002, "epoch": 6.385135135135135, "step": 1890}, {"loss": 1.07, "grad_norm": 1.0628817081451416, "learning_rate": 0.0002, "epoch": 6.418918918918919, "step": 1900}, {"loss": 1.0916, "grad_norm": 1.3136482238769531, "learning_rate": 0.0002, "epoch": 6.452702702702703, "step": 1910}, {"loss": 1.0453, "grad_norm": 1.4804624319076538, "learning_rate": 0.0002, "epoch": 6.486486486486487, "step": 1920}, {"loss": 1.1146, "grad_norm": 1.1129399538040161, "learning_rate": 0.0002, "epoch": 6.52027027027027, "step": 1930}, {"loss": 1.0645, "grad_norm": 1.324576497077942, "learning_rate": 0.0002, "epoch": 6.554054054054054, "step": 1940}, {"loss": 1.0534, "grad_norm": 1.3321561813354492, "learning_rate": 0.0002, "epoch": 6.587837837837838, "step": 1950}, {"loss": 1.027, "grad_norm": 1.2377620935440063, "learning_rate": 0.0002, "epoch": 6.621621621621622, "step": 1960}, {"loss": 1.0144, "grad_norm": 1.1174288988113403, "learning_rate": 0.0002, "epoch": 6.655405405405405, "step": 1970}, {"loss": 1.1074, "grad_norm": 1.2291412353515625, "learning_rate": 0.0002, "epoch": 6.6891891891891895, "step": 1980}, {"loss": 1.101, "grad_norm": 1.2079328298568726, "learning_rate": 0.0002, "epoch": 6.722972972972973, "step": 1990}, {"loss": 1.1603, "grad_norm": 1.125183343887329, "learning_rate": 0.0002, "epoch": 6.756756756756757, "step": 2000}, {"loss": 1.008, "grad_norm": 1.1737638711929321, "learning_rate": 0.0002, "epoch": 6.79054054054054, "step": 2010}, {"loss": 1.1211, "grad_norm": 1.3917324542999268, "learning_rate": 0.0002, "epoch": 6.824324324324325, "step": 2020}, {"loss": 1.1436, "grad_norm": 1.1063282489776611, "learning_rate": 0.0002, "epoch": 6.858108108108108, "step": 2030}, {"loss": 1.0888, "grad_norm": 1.2951769828796387, "learning_rate": 0.0002, "epoch": 6.891891891891892, "step": 2040}, {"loss": 1.153, "grad_norm": 1.2272734642028809, "learning_rate": 0.0002, "epoch": 6.925675675675675, "step": 2050}, {"loss": 1.1156, "grad_norm": 1.5298433303833008, "learning_rate": 0.0002, "epoch": 6.95945945945946, "step": 2060}, {"loss": 1.1214, "grad_norm": 1.1478265523910522, "learning_rate": 0.0002, "epoch": 6.993243243243243, "step": 2070}]} +{"epoch": 8.0, "step": 2368, "epoch_duration": 598.151061296463, "total_accumulated_duration": 4836.19810962677, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13688.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-4/checkpoint-592", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6467, "grad_norm": 0.5820087194442749, "learning_rate": 0.0002, "epoch": 0.033783783783783786, "step": 10}, {"loss": 2.2808, "grad_norm": 0.4625075161457062, "learning_rate": 0.0002, "epoch": 0.06756756756756757, "step": 20}, {"loss": 2.0116, "grad_norm": 0.46946242451667786, "learning_rate": 0.0002, "epoch": 0.10135135135135136, "step": 30}, {"loss": 1.9089, "grad_norm": 0.5181305408477783, "learning_rate": 0.0002, "epoch": 0.13513513513513514, "step": 40}, {"loss": 1.9232, "grad_norm": 0.7439630627632141, "learning_rate": 0.0002, "epoch": 0.16891891891891891, "step": 50}, {"loss": 1.9646, "grad_norm": 0.5144319534301758, "learning_rate": 0.0002, "epoch": 0.20270270270270271, "step": 60}, {"loss": 1.921, "grad_norm": 0.46696192026138306, "learning_rate": 0.0002, "epoch": 0.23648648648648649, "step": 70}, {"loss": 1.8794, "grad_norm": 0.4330582022666931, "learning_rate": 0.0002, "epoch": 0.2702702702702703, "step": 80}, {"loss": 1.8897, "grad_norm": 0.502414882183075, "learning_rate": 0.0002, "epoch": 0.30405405405405406, "step": 90}, {"loss": 1.8166, "grad_norm": 0.4174366295337677, "learning_rate": 0.0002, "epoch": 0.33783783783783783, "step": 100}, {"loss": 1.8264, "grad_norm": 0.4296933710575104, "learning_rate": 0.0002, "epoch": 0.3716216216216216, "step": 110}, {"loss": 1.9223, "grad_norm": 0.4299834668636322, "learning_rate": 0.0002, "epoch": 0.40540540540540543, "step": 120}, {"loss": 1.8708, "grad_norm": 0.5583795309066772, "learning_rate": 0.0002, "epoch": 0.4391891891891892, "step": 130}, {"loss": 1.7786, "grad_norm": 0.5205192565917969, "learning_rate": 0.0002, "epoch": 0.47297297297297297, "step": 140}, {"loss": 1.8293, "grad_norm": 0.4683739244937897, "learning_rate": 0.0002, "epoch": 0.5067567567567568, "step": 150}, {"loss": 1.9102, "grad_norm": 0.497546523809433, "learning_rate": 0.0002, "epoch": 0.5405405405405406, "step": 160}, {"loss": 1.8077, "grad_norm": 0.40443721413612366, "learning_rate": 0.0002, "epoch": 0.5743243243243243, "step": 170}, {"loss": 1.8446, "grad_norm": 0.39056605100631714, "learning_rate": 0.0002, "epoch": 0.6081081081081081, "step": 180}, {"loss": 1.8747, "grad_norm": 0.42397141456604004, "learning_rate": 0.0002, "epoch": 0.6418918918918919, "step": 190}, {"loss": 1.8413, "grad_norm": 0.4679499566555023, "learning_rate": 0.0002, "epoch": 0.6756756756756757, "step": 200}, {"loss": 1.8401, "grad_norm": 0.39300158619880676, "learning_rate": 0.0002, "epoch": 0.7094594594594594, "step": 210}, {"loss": 1.8345, "grad_norm": 0.4001515805721283, "learning_rate": 0.0002, "epoch": 0.7432432432432432, "step": 220}, {"loss": 1.7571, "grad_norm": 0.4094320833683014, "learning_rate": 0.0002, "epoch": 0.777027027027027, "step": 230}, {"loss": 1.8972, "grad_norm": 0.37315094470977783, "learning_rate": 0.0002, "epoch": 0.8108108108108109, "step": 240}, {"loss": 1.8337, "grad_norm": 0.4331067204475403, "learning_rate": 0.0002, "epoch": 0.8445945945945946, "step": 250}, {"loss": 1.8555, "grad_norm": 0.39758574962615967, "learning_rate": 0.0002, "epoch": 0.8783783783783784, "step": 260}, {"loss": 1.8624, "grad_norm": 0.38240519165992737, "learning_rate": 0.0002, "epoch": 0.9121621621621622, "step": 270}, {"loss": 1.7531, "grad_norm": 0.40907856822013855, "learning_rate": 0.0002, "epoch": 0.9459459459459459, "step": 280}, {"loss": 1.8435, "grad_norm": 0.34108003973960876, "learning_rate": 0.0002, "epoch": 0.9797297297297297, "step": 290}, {"eval_loss": 1.8428829908370972, "eval_runtime": 62.3963, "eval_samples_per_second": 8.254, "eval_steps_per_second": 1.042, "epoch": 1.0, "step": 296}, {"loss": 1.7254, "grad_norm": 0.3993101716041565, "learning_rate": 0.0002, "epoch": 1.0135135135135136, "step": 300}, {"loss": 1.7985, "grad_norm": 0.45567989349365234, "learning_rate": 0.0002, "epoch": 1.0472972972972974, "step": 310}, {"loss": 1.757, "grad_norm": 0.3767794072628021, "learning_rate": 0.0002, "epoch": 1.0810810810810811, "step": 320}, {"loss": 1.7917, "grad_norm": 0.5181908011436462, "learning_rate": 0.0002, "epoch": 1.114864864864865, "step": 330}, {"loss": 1.7723, "grad_norm": 0.4213193356990814, "learning_rate": 0.0002, "epoch": 1.1486486486486487, "step": 340}, {"loss": 1.8203, "grad_norm": 0.45519495010375977, "learning_rate": 0.0002, "epoch": 1.1824324324324325, "step": 350}, {"loss": 1.6946, "grad_norm": 0.35332638025283813, "learning_rate": 0.0002, "epoch": 1.2162162162162162, "step": 360}, {"loss": 1.7541, "grad_norm": 0.3675481677055359, "learning_rate": 0.0002, "epoch": 1.25, "step": 370}, {"loss": 1.7458, "grad_norm": 0.4569270610809326, "learning_rate": 0.0002, "epoch": 1.2837837837837838, "step": 380}, {"loss": 1.7988, "grad_norm": 0.37950295209884644, "learning_rate": 0.0002, "epoch": 1.3175675675675675, "step": 390}, {"loss": 1.7032, "grad_norm": 0.5744572877883911, "learning_rate": 0.0002, "epoch": 1.3513513513513513, "step": 400}, {"loss": 1.7437, "grad_norm": 0.44380778074264526, "learning_rate": 0.0002, "epoch": 1.385135135135135, "step": 410}, {"loss": 1.7454, "grad_norm": 0.43328171968460083, "learning_rate": 0.0002, "epoch": 1.4189189189189189, "step": 420}, {"loss": 1.7636, "grad_norm": 0.41290056705474854, "learning_rate": 0.0002, "epoch": 1.4527027027027026, "step": 430}, {"loss": 1.7332, "grad_norm": 0.3771473169326782, "learning_rate": 0.0002, "epoch": 1.4864864864864864, "step": 440}, {"loss": 1.7618, "grad_norm": 0.42537811398506165, "learning_rate": 0.0002, "epoch": 1.5202702702702702, "step": 450}, {"loss": 1.8523, "grad_norm": 0.39705610275268555, "learning_rate": 0.0002, "epoch": 1.554054054054054, "step": 460}, {"loss": 1.7673, "grad_norm": 0.4178248643875122, "learning_rate": 0.0002, "epoch": 1.5878378378378377, "step": 470}, {"loss": 1.742, "grad_norm": 0.39107105135917664, "learning_rate": 0.0002, "epoch": 1.6216216216216215, "step": 480}, {"loss": 1.6984, "grad_norm": 0.38505619764328003, "learning_rate": 0.0002, "epoch": 1.6554054054054053, "step": 490}, {"loss": 1.7382, "grad_norm": 0.43590813875198364, "learning_rate": 0.0002, "epoch": 1.689189189189189, "step": 500}, {"loss": 1.7139, "grad_norm": 0.42785948514938354, "learning_rate": 0.0002, "epoch": 1.722972972972973, "step": 510}, {"loss": 1.7551, "grad_norm": 0.3829004168510437, "learning_rate": 0.0002, "epoch": 1.7567567567567568, "step": 520}, {"loss": 1.7744, "grad_norm": 0.35287904739379883, "learning_rate": 0.0002, "epoch": 1.7905405405405406, "step": 530}, {"loss": 1.7714, "grad_norm": 0.38657888770103455, "learning_rate": 0.0002, "epoch": 1.8243243243243243, "step": 540}, {"loss": 1.7535, "grad_norm": 0.41452157497406006, "learning_rate": 0.0002, "epoch": 1.8581081081081081, "step": 550}, {"loss": 1.7627, "grad_norm": 0.3898279070854187, "learning_rate": 0.0002, "epoch": 1.8918918918918919, "step": 560}, {"loss": 1.7494, "grad_norm": 0.4147624373435974, "learning_rate": 0.0002, "epoch": 1.9256756756756757, "step": 570}, {"loss": 1.7693, "grad_norm": 0.4374851584434509, "learning_rate": 0.0002, "epoch": 1.9594594594594594, "step": 580}, {"loss": 1.7796, "grad_norm": 0.48530328273773193, "learning_rate": 0.0002, "epoch": 1.9932432432432432, "step": 590}, {"eval_loss": 1.83539617061615, "eval_runtime": 70.9865, "eval_samples_per_second": 7.255, "eval_steps_per_second": 0.916, "epoch": 2.0, "step": 592}, {"loss": 1.7294, "grad_norm": 0.40344223380088806, "learning_rate": 0.0002, "epoch": 2.027027027027027, "step": 600}, {"loss": 1.6693, "grad_norm": 0.48268747329711914, "learning_rate": 0.0002, "epoch": 2.060810810810811, "step": 610}, {"loss": 1.6315, "grad_norm": 0.4675706923007965, "learning_rate": 0.0002, "epoch": 2.0945945945945947, "step": 620}, {"loss": 1.6627, "grad_norm": 0.47494322061538696, "learning_rate": 0.0002, "epoch": 2.1283783783783785, "step": 630}, {"loss": 1.5668, "grad_norm": 0.4555308520793915, "learning_rate": 0.0002, "epoch": 2.1621621621621623, "step": 640}, {"loss": 1.6537, "grad_norm": 0.43085595965385437, "learning_rate": 0.0002, "epoch": 2.195945945945946, "step": 650}, {"loss": 1.6316, "grad_norm": 0.4364128112792969, "learning_rate": 0.0002, "epoch": 2.22972972972973, "step": 660}, {"loss": 1.669, "grad_norm": 0.4711395800113678, "learning_rate": 0.0002, "epoch": 2.2635135135135136, "step": 670}, {"loss": 1.5758, "grad_norm": 0.5109705328941345, "learning_rate": 0.0002, "epoch": 2.2972972972972974, "step": 680}, {"loss": 1.5912, "grad_norm": 0.5185648798942566, "learning_rate": 0.0002, "epoch": 2.331081081081081, "step": 690}, {"loss": 1.6605, "grad_norm": 0.49192842841148376, "learning_rate": 0.0002, "epoch": 2.364864864864865, "step": 700}, {"loss": 1.6688, "grad_norm": 0.5619909763336182, "learning_rate": 0.0002, "epoch": 2.3986486486486487, "step": 710}, {"loss": 1.7836, "grad_norm": 0.4932861328125, "learning_rate": 0.0002, "epoch": 2.4324324324324325, "step": 720}, {"loss": 1.6532, "grad_norm": 0.5211932063102722, "learning_rate": 0.0002, "epoch": 2.4662162162162162, "step": 730}, {"loss": 1.667, "grad_norm": 0.4138050377368927, "learning_rate": 0.0002, "epoch": 2.5, "step": 740}, {"loss": 1.658, "grad_norm": 0.4644908010959625, "learning_rate": 0.0002, "epoch": 2.5337837837837838, "step": 750}, {"loss": 1.6451, "grad_norm": 0.4513227641582489, "learning_rate": 0.0002, "epoch": 2.5675675675675675, "step": 760}, {"loss": 1.7071, "grad_norm": 0.4735109508037567, "learning_rate": 0.0002, "epoch": 2.6013513513513513, "step": 770}, {"loss": 1.6659, "grad_norm": 0.5453559756278992, "learning_rate": 0.0002, "epoch": 2.635135135135135, "step": 780}, {"loss": 1.7211, "grad_norm": 0.5422565937042236, "learning_rate": 0.0002, "epoch": 2.668918918918919, "step": 790}, {"loss": 1.6623, "grad_norm": 0.4288518726825714, "learning_rate": 0.0002, "epoch": 2.7027027027027026, "step": 800}, {"loss": 1.7197, "grad_norm": 0.4085204005241394, "learning_rate": 0.0002, "epoch": 2.7364864864864864, "step": 810}, {"loss": 1.6376, "grad_norm": 0.49770182371139526, "learning_rate": 0.0002, "epoch": 2.77027027027027, "step": 820}, {"loss": 1.6332, "grad_norm": 0.5005106329917908, "learning_rate": 0.0002, "epoch": 2.804054054054054, "step": 830}, {"loss": 1.6675, "grad_norm": 0.4763440489768982, "learning_rate": 0.0002, "epoch": 2.8378378378378377, "step": 840}, {"loss": 1.7149, "grad_norm": 0.44995108246803284, "learning_rate": 0.0002, "epoch": 2.8716216216216215, "step": 850}, {"loss": 1.6438, "grad_norm": 0.5299676656723022, "learning_rate": 0.0002, "epoch": 2.9054054054054053, "step": 860}, {"loss": 1.6457, "grad_norm": 0.49627119302749634, "learning_rate": 0.0002, "epoch": 2.939189189189189, "step": 870}, {"loss": 1.6517, "grad_norm": 0.502545177936554, "learning_rate": 0.0002, "epoch": 2.972972972972973, "step": 880}, {"eval_loss": 1.8520468473434448, "eval_runtime": 70.9917, "eval_samples_per_second": 7.254, "eval_steps_per_second": 0.916, "epoch": 3.0, "step": 888}, {"loss": 1.6271, "grad_norm": 0.4756380319595337, "learning_rate": 0.0002, "epoch": 3.0067567567567566, "step": 890}, {"loss": 1.563, "grad_norm": 0.5167421102523804, "learning_rate": 0.0002, "epoch": 3.0405405405405403, "step": 900}, {"loss": 1.48, "grad_norm": 0.5524939298629761, "learning_rate": 0.0002, "epoch": 3.074324324324324, "step": 910}, {"loss": 1.5297, "grad_norm": 0.7045221924781799, "learning_rate": 0.0002, "epoch": 3.108108108108108, "step": 920}, {"loss": 1.5548, "grad_norm": 0.5692355036735535, "learning_rate": 0.0002, "epoch": 3.141891891891892, "step": 930}, {"loss": 1.5297, "grad_norm": 0.5467017292976379, "learning_rate": 0.0002, "epoch": 3.175675675675676, "step": 940}, {"loss": 1.5559, "grad_norm": 0.6004040241241455, "learning_rate": 0.0002, "epoch": 3.2094594594594597, "step": 950}, {"loss": 1.5255, "grad_norm": 0.5713295936584473, "learning_rate": 0.0002, "epoch": 3.2432432432432434, "step": 960}, {"loss": 1.5412, "grad_norm": 0.6054869890213013, "learning_rate": 0.0002, "epoch": 3.277027027027027, "step": 970}, {"loss": 1.5167, "grad_norm": 0.6304576992988586, "learning_rate": 0.0002, "epoch": 3.310810810810811, "step": 980}, {"loss": 1.52, "grad_norm": 0.5347281694412231, "learning_rate": 0.0002, "epoch": 3.3445945945945947, "step": 990}, {"loss": 1.5707, "grad_norm": 0.598211407661438, "learning_rate": 0.0002, "epoch": 3.3783783783783785, "step": 1000}, {"loss": 1.5243, "grad_norm": 0.637312650680542, "learning_rate": 0.0002, "epoch": 3.4121621621621623, "step": 1010}, {"loss": 1.5356, "grad_norm": 0.6092430949211121, "learning_rate": 0.0002, "epoch": 3.445945945945946, "step": 1020}, {"loss": 1.5856, "grad_norm": 0.6421037912368774, "learning_rate": 0.0002, "epoch": 3.47972972972973, "step": 1030}, {"loss": 1.5553, "grad_norm": 0.6712167263031006, "learning_rate": 0.0002, "epoch": 3.5135135135135136, "step": 1040}, {"loss": 1.4708, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 3.5472972972972974, "step": 1050}, {"loss": 1.5159, "grad_norm": 1.418167233467102, "learning_rate": 0.0002, "epoch": 3.581081081081081, "step": 1060}, {"loss": 1.5264, "grad_norm": 0.6092377305030823, "learning_rate": 0.0002, "epoch": 3.614864864864865, "step": 1070}, {"loss": 1.5227, "grad_norm": 0.5632478594779968, "learning_rate": 0.0002, "epoch": 3.6486486486486487, "step": 1080}, {"loss": 1.5492, "grad_norm": 0.6007736921310425, "learning_rate": 0.0002, "epoch": 3.6824324324324325, "step": 1090}, {"loss": 1.5002, "grad_norm": 0.6031264066696167, "learning_rate": 0.0002, "epoch": 3.7162162162162162, "step": 1100}, {"loss": 1.4797, "grad_norm": 0.5440598726272583, "learning_rate": 0.0002, "epoch": 3.75, "step": 1110}, {"loss": 1.5743, "grad_norm": 0.6304370760917664, "learning_rate": 0.0002, "epoch": 3.7837837837837838, "step": 1120}, {"loss": 1.6429, "grad_norm": 0.6729280948638916, "learning_rate": 0.0002, "epoch": 3.8175675675675675, "step": 1130}, {"loss": 1.594, "grad_norm": 0.5881901979446411, "learning_rate": 0.0002, "epoch": 3.8513513513513513, "step": 1140}, {"loss": 1.5008, "grad_norm": 0.5508038997650146, "learning_rate": 0.0002, "epoch": 3.885135135135135, "step": 1150}, {"loss": 1.5045, "grad_norm": 0.5926295518875122, "learning_rate": 0.0002, "epoch": 3.918918918918919, "step": 1160}, {"loss": 1.5223, "grad_norm": 0.5882043838500977, "learning_rate": 0.0002, "epoch": 3.9527027027027026, "step": 1170}, {"loss": 1.5874, "grad_norm": 0.604119598865509, "learning_rate": 0.0002, "epoch": 3.9864864864864864, "step": 1180}, {"eval_loss": 1.8923152685165405, "eval_runtime": 62.9577, "eval_samples_per_second": 8.18, "eval_steps_per_second": 1.032, "epoch": 4.0, "step": 1184}, {"loss": 1.4183, "grad_norm": 0.696061909198761, "learning_rate": 0.0002, "epoch": 4.02027027027027, "step": 1190}, {"loss": 1.3455, "grad_norm": 0.751200258731842, "learning_rate": 0.0002, "epoch": 4.054054054054054, "step": 1200}, {"loss": 1.35, "grad_norm": 0.9667422771453857, "learning_rate": 0.0002, "epoch": 4.087837837837838, "step": 1210}, {"loss": 1.4058, "grad_norm": 0.7374204397201538, "learning_rate": 0.0002, "epoch": 4.121621621621622, "step": 1220}, {"loss": 1.4454, "grad_norm": 0.8050723075866699, "learning_rate": 0.0002, "epoch": 4.155405405405405, "step": 1230}, {"loss": 1.4129, "grad_norm": 0.7360416054725647, "learning_rate": 0.0002, "epoch": 4.1891891891891895, "step": 1240}, {"loss": 1.3899, "grad_norm": 0.7947028279304504, "learning_rate": 0.0002, "epoch": 4.222972972972973, "step": 1250}, {"loss": 1.4264, "grad_norm": 0.7336545586585999, "learning_rate": 0.0002, "epoch": 4.256756756756757, "step": 1260}, {"loss": 1.4047, "grad_norm": 0.7051223516464233, "learning_rate": 0.0002, "epoch": 4.29054054054054, "step": 1270}, {"loss": 1.3507, "grad_norm": 0.7939404845237732, "learning_rate": 0.0002, "epoch": 4.324324324324325, "step": 1280}, {"loss": 1.387, "grad_norm": 0.7818657755851746, "learning_rate": 0.0002, "epoch": 4.358108108108108, "step": 1290}, {"loss": 1.3533, "grad_norm": 0.7490634918212891, "learning_rate": 0.0002, "epoch": 4.391891891891892, "step": 1300}, {"loss": 1.3912, "grad_norm": 0.9319770932197571, "learning_rate": 0.0002, "epoch": 4.425675675675675, "step": 1310}, {"loss": 1.439, "grad_norm": 0.7811282873153687, "learning_rate": 0.0002, "epoch": 4.45945945945946, "step": 1320}, {"loss": 1.3973, "grad_norm": 0.7785378694534302, "learning_rate": 0.0002, "epoch": 4.493243243243243, "step": 1330}, {"loss": 1.3931, "grad_norm": 0.8697562217712402, "learning_rate": 0.0002, "epoch": 4.527027027027027, "step": 1340}, {"loss": 1.3846, "grad_norm": 0.7927497625350952, "learning_rate": 0.0002, "epoch": 4.5608108108108105, "step": 1350}, {"loss": 1.3188, "grad_norm": 0.9746347665786743, "learning_rate": 0.0002, "epoch": 4.594594594594595, "step": 1360}, {"loss": 1.4611, "grad_norm": 0.7353375554084778, "learning_rate": 0.0002, "epoch": 4.628378378378378, "step": 1370}, {"loss": 1.4067, "grad_norm": 0.8139469027519226, "learning_rate": 0.0002, "epoch": 4.662162162162162, "step": 1380}, {"loss": 1.3727, "grad_norm": 1.728020429611206, "learning_rate": 0.0002, "epoch": 4.695945945945946, "step": 1390}, {"loss": 1.3971, "grad_norm": 0.8249040246009827, "learning_rate": 0.0002, "epoch": 4.72972972972973, "step": 1400}, {"loss": 1.4238, "grad_norm": 0.7916110157966614, "learning_rate": 0.0002, "epoch": 4.763513513513513, "step": 1410}, {"loss": 1.4064, "grad_norm": 0.7286198735237122, "learning_rate": 0.0002, "epoch": 4.797297297297297, "step": 1420}, {"loss": 1.305, "grad_norm": 0.7969672083854675, "learning_rate": 0.0002, "epoch": 4.831081081081081, "step": 1430}, {"loss": 1.4109, "grad_norm": 0.9593119621276855, "learning_rate": 0.0002, "epoch": 4.864864864864865, "step": 1440}, {"loss": 1.4112, "grad_norm": 0.8609084486961365, "learning_rate": 0.0002, "epoch": 4.898648648648649, "step": 1450}, {"loss": 1.3126, "grad_norm": 0.705203115940094, "learning_rate": 0.0002, "epoch": 4.9324324324324325, "step": 1460}, {"loss": 1.4226, "grad_norm": 0.9503173232078552, "learning_rate": 0.0002, "epoch": 4.966216216216216, "step": 1470}, {"loss": 1.4457, "grad_norm": 0.7174800038337708, "learning_rate": 0.0002, "epoch": 5.0, "step": 1480}, {"eval_loss": 1.9753261804580688, "eval_runtime": 70.4622, "eval_samples_per_second": 7.309, "eval_steps_per_second": 0.922, "epoch": 5.0, "step": 1480}, {"loss": 1.2986, "grad_norm": 1.450723648071289, "learning_rate": 0.0002, "epoch": 5.033783783783784, "step": 1490}, {"loss": 1.2184, "grad_norm": 0.9207791686058044, "learning_rate": 0.0002, "epoch": 5.0675675675675675, "step": 1500}, {"loss": 1.1628, "grad_norm": 1.0742532014846802, "learning_rate": 0.0002, "epoch": 5.101351351351352, "step": 1510}, {"loss": 1.2221, "grad_norm": 1.1070902347564697, "learning_rate": 0.0002, "epoch": 5.135135135135135, "step": 1520}, {"loss": 1.1737, "grad_norm": 0.9838612079620361, "learning_rate": 0.0002, "epoch": 5.168918918918919, "step": 1530}, {"loss": 1.2095, "grad_norm": 0.9286013245582581, "learning_rate": 0.0002, "epoch": 5.202702702702703, "step": 1540}, {"loss": 1.2243, "grad_norm": 0.9755229949951172, "learning_rate": 0.0002, "epoch": 5.236486486486487, "step": 1550}, {"loss": 1.1729, "grad_norm": 0.9734522104263306, "learning_rate": 0.0002, "epoch": 5.27027027027027, "step": 1560}, {"loss": 1.1528, "grad_norm": 1.1838241815567017, "learning_rate": 0.0002, "epoch": 5.304054054054054, "step": 1570}, {"loss": 1.2135, "grad_norm": 1.1389052867889404, "learning_rate": 0.0002, "epoch": 5.337837837837838, "step": 1580}, {"loss": 1.2486, "grad_norm": 1.2093408107757568, "learning_rate": 0.0002, "epoch": 5.371621621621622, "step": 1590}, {"loss": 1.2017, "grad_norm": 0.9418244361877441, "learning_rate": 0.0002, "epoch": 5.405405405405405, "step": 1600}, {"loss": 1.2554, "grad_norm": 0.9843172430992126, "learning_rate": 0.0002, "epoch": 5.4391891891891895, "step": 1610}, {"loss": 1.2967, "grad_norm": 1.0316557884216309, "learning_rate": 0.0002, "epoch": 5.472972972972973, "step": 1620}, {"loss": 1.2509, "grad_norm": 1.0008920431137085, "learning_rate": 0.0002, "epoch": 5.506756756756757, "step": 1630}, {"loss": 1.2908, "grad_norm": 1.1854851245880127, "learning_rate": 0.0002, "epoch": 5.54054054054054, "step": 1640}, {"loss": 1.2679, "grad_norm": 0.9324101209640503, "learning_rate": 0.0002, "epoch": 5.574324324324325, "step": 1650}, {"loss": 1.202, "grad_norm": 0.993882954120636, "learning_rate": 0.0002, "epoch": 5.608108108108108, "step": 1660}, {"loss": 1.2498, "grad_norm": 0.8795919418334961, "learning_rate": 0.0002, "epoch": 5.641891891891892, "step": 1670}, {"loss": 1.2244, "grad_norm": 1.203471064567566, "learning_rate": 0.0002, "epoch": 5.675675675675675, "step": 1680}, {"loss": 1.2452, "grad_norm": 0.916689932346344, "learning_rate": 0.0002, "epoch": 5.70945945945946, "step": 1690}, {"loss": 1.1774, "grad_norm": 0.8567600846290588, "learning_rate": 0.0002, "epoch": 5.743243243243243, "step": 1700}, {"loss": 1.2585, "grad_norm": 0.9426271319389343, "learning_rate": 0.0002, "epoch": 5.777027027027027, "step": 1710}, {"loss": 1.239, "grad_norm": 1.0812019109725952, "learning_rate": 0.0002, "epoch": 5.8108108108108105, "step": 1720}, {"loss": 1.1527, "grad_norm": 1.0045292377471924, "learning_rate": 0.0002, "epoch": 5.844594594594595, "step": 1730}, {"loss": 1.2949, "grad_norm": 1.0750256776809692, "learning_rate": 0.0002, "epoch": 5.878378378378378, "step": 1740}, {"loss": 1.3052, "grad_norm": 1.0471885204315186, "learning_rate": 0.0002, "epoch": 5.912162162162162, "step": 1750}, {"loss": 1.2318, "grad_norm": 0.9119327664375305, "learning_rate": 0.0002, "epoch": 5.945945945945946, "step": 1760}, {"loss": 1.2652, "grad_norm": 1.0975338220596313, "learning_rate": 0.0002, "epoch": 5.97972972972973, "step": 1770}, {"eval_loss": 2.104356527328491, "eval_runtime": 71.0586, "eval_samples_per_second": 7.248, "eval_steps_per_second": 0.915, "epoch": 6.0, "step": 1776}, {"loss": 1.1342, "grad_norm": 0.9404756426811218, "learning_rate": 0.0002, "epoch": 6.013513513513513, "step": 1780}, {"loss": 0.9892, "grad_norm": 1.3757696151733398, "learning_rate": 0.0002, "epoch": 6.047297297297297, "step": 1790}, {"loss": 1.0826, "grad_norm": 1.5798641443252563, "learning_rate": 0.0002, "epoch": 6.081081081081081, "step": 1800}, {"loss": 0.9929, "grad_norm": 1.3777581453323364, "learning_rate": 0.0002, "epoch": 6.114864864864865, "step": 1810}, {"loss": 1.0257, "grad_norm": 1.136362910270691, "learning_rate": 0.0002, "epoch": 6.148648648648648, "step": 1820}, {"loss": 1.0623, "grad_norm": 1.3719290494918823, "learning_rate": 0.0002, "epoch": 6.1824324324324325, "step": 1830}, {"loss": 1.0144, "grad_norm": 1.375697374343872, "learning_rate": 0.0002, "epoch": 6.216216216216216, "step": 1840}, {"loss": 1.0307, "grad_norm": 1.3208998441696167, "learning_rate": 0.0002, "epoch": 6.25, "step": 1850}, {"loss": 1.0298, "grad_norm": 1.3176994323730469, "learning_rate": 0.0002, "epoch": 6.283783783783784, "step": 1860}, {"loss": 1.0809, "grad_norm": 1.3333075046539307, "learning_rate": 0.0002, "epoch": 6.3175675675675675, "step": 1870}, {"loss": 1.1059, "grad_norm": 1.6315182447433472, "learning_rate": 0.0002, "epoch": 6.351351351351352, "step": 1880}, {"loss": 1.0139, "grad_norm": 1.1802350282669067, "learning_rate": 0.0002, "epoch": 6.385135135135135, "step": 1890}, {"loss": 1.07, "grad_norm": 1.0628817081451416, "learning_rate": 0.0002, "epoch": 6.418918918918919, "step": 1900}, {"loss": 1.0916, "grad_norm": 1.3136482238769531, "learning_rate": 0.0002, "epoch": 6.452702702702703, "step": 1910}, {"loss": 1.0453, "grad_norm": 1.4804624319076538, "learning_rate": 0.0002, "epoch": 6.486486486486487, "step": 1920}, {"loss": 1.1146, "grad_norm": 1.1129399538040161, "learning_rate": 0.0002, "epoch": 6.52027027027027, "step": 1930}, {"loss": 1.0645, "grad_norm": 1.324576497077942, "learning_rate": 0.0002, "epoch": 6.554054054054054, "step": 1940}, {"loss": 1.0534, "grad_norm": 1.3321561813354492, "learning_rate": 0.0002, "epoch": 6.587837837837838, "step": 1950}, {"loss": 1.027, "grad_norm": 1.2377620935440063, "learning_rate": 0.0002, "epoch": 6.621621621621622, "step": 1960}, {"loss": 1.0144, "grad_norm": 1.1174288988113403, "learning_rate": 0.0002, "epoch": 6.655405405405405, "step": 1970}, {"loss": 1.1074, "grad_norm": 1.2291412353515625, "learning_rate": 0.0002, "epoch": 6.6891891891891895, "step": 1980}, {"loss": 1.101, "grad_norm": 1.2079328298568726, "learning_rate": 0.0002, "epoch": 6.722972972972973, "step": 1990}, {"loss": 1.1603, "grad_norm": 1.125183343887329, "learning_rate": 0.0002, "epoch": 6.756756756756757, "step": 2000}, {"loss": 1.008, "grad_norm": 1.1737638711929321, "learning_rate": 0.0002, "epoch": 6.79054054054054, "step": 2010}, {"loss": 1.1211, "grad_norm": 1.3917324542999268, "learning_rate": 0.0002, "epoch": 6.824324324324325, "step": 2020}, {"loss": 1.1436, "grad_norm": 1.1063282489776611, "learning_rate": 0.0002, "epoch": 6.858108108108108, "step": 2030}, {"loss": 1.0888, "grad_norm": 1.2951769828796387, "learning_rate": 0.0002, "epoch": 6.891891891891892, "step": 2040}, {"loss": 1.153, "grad_norm": 1.2272734642028809, "learning_rate": 0.0002, "epoch": 6.925675675675675, "step": 2050}, {"loss": 1.1156, "grad_norm": 1.5298433303833008, "learning_rate": 0.0002, "epoch": 6.95945945945946, "step": 2060}, {"loss": 1.1214, "grad_norm": 1.1478265523910522, "learning_rate": 0.0002, "epoch": 6.993243243243243, "step": 2070}, {"eval_loss": 2.238886594772339, "eval_runtime": 62.8821, "eval_samples_per_second": 8.19, "eval_steps_per_second": 1.034, "epoch": 7.0, "step": 2072}, {"loss": 0.9044, "grad_norm": 1.6612180471420288, "learning_rate": 0.0002, "epoch": 7.027027027027027, "step": 2080}, {"loss": 0.932, "grad_norm": 1.266597867012024, "learning_rate": 0.0002, "epoch": 7.0608108108108105, "step": 2090}, {"loss": 0.9108, "grad_norm": 1.4388158321380615, "learning_rate": 0.0002, "epoch": 7.094594594594595, "step": 2100}, {"loss": 0.8743, "grad_norm": 1.5639206171035767, "learning_rate": 0.0002, "epoch": 7.128378378378378, "step": 2110}, {"loss": 0.8907, "grad_norm": 1.4063223600387573, "learning_rate": 0.0002, "epoch": 7.162162162162162, "step": 2120}, {"loss": 0.9383, "grad_norm": 1.7724202871322632, "learning_rate": 0.0002, "epoch": 7.195945945945946, "step": 2130}, {"loss": 0.944, "grad_norm": 1.628645658493042, "learning_rate": 0.0002, "epoch": 7.22972972972973, "step": 2140}, {"loss": 0.9197, "grad_norm": 1.5467971563339233, "learning_rate": 0.0002, "epoch": 7.263513513513513, "step": 2150}, {"loss": 0.8928, "grad_norm": 1.3064892292022705, "learning_rate": 0.0002, "epoch": 7.297297297297297, "step": 2160}, {"loss": 0.8599, "grad_norm": 1.1528593301773071, "learning_rate": 0.0002, "epoch": 7.331081081081081, "step": 2170}, {"loss": 0.8757, "grad_norm": 1.82744562625885, "learning_rate": 0.0002, "epoch": 7.364864864864865, "step": 2180}, {"loss": 0.8856, "grad_norm": 1.581808090209961, "learning_rate": 0.0002, "epoch": 7.398648648648648, "step": 2190}, {"loss": 0.9736, "grad_norm": 1.7797787189483643, "learning_rate": 0.0002, "epoch": 7.4324324324324325, "step": 2200}, {"loss": 0.8861, "grad_norm": 2.161501169204712, "learning_rate": 0.0002, "epoch": 7.466216216216216, "step": 2210}, {"loss": 0.8976, "grad_norm": 1.4904208183288574, "learning_rate": 0.0002, "epoch": 7.5, "step": 2220}, {"loss": 0.9143, "grad_norm": 1.76048743724823, "learning_rate": 0.0002, "epoch": 7.533783783783784, "step": 2230}, {"loss": 0.9403, "grad_norm": 1.39728844165802, "learning_rate": 0.0002, "epoch": 7.5675675675675675, "step": 2240}, {"loss": 0.9236, "grad_norm": 1.4059574604034424, "learning_rate": 0.0002, "epoch": 7.601351351351351, "step": 2250}, {"loss": 0.9192, "grad_norm": 1.5134271383285522, "learning_rate": 0.0002, "epoch": 7.635135135135135, "step": 2260}, {"loss": 0.9459, "grad_norm": 1.384108066558838, "learning_rate": 0.0002, "epoch": 7.668918918918919, "step": 2270}, {"loss": 0.9808, "grad_norm": 1.4390848875045776, "learning_rate": 0.0002, "epoch": 7.702702702702703, "step": 2280}, {"loss": 0.8559, "grad_norm": 1.6258286237716675, "learning_rate": 0.0002, "epoch": 7.736486486486487, "step": 2290}, {"loss": 0.966, "grad_norm": 1.5682430267333984, "learning_rate": 0.0002, "epoch": 7.77027027027027, "step": 2300}, {"loss": 0.8859, "grad_norm": 1.3329198360443115, "learning_rate": 0.0002, "epoch": 7.804054054054054, "step": 2310}, {"loss": 0.8762, "grad_norm": 1.3879269361495972, "learning_rate": 0.0002, "epoch": 7.837837837837838, "step": 2320}, {"loss": 0.9348, "grad_norm": 1.6853514909744263, "learning_rate": 0.0002, "epoch": 7.871621621621622, "step": 2330}, {"loss": 0.9542, "grad_norm": 1.5088176727294922, "learning_rate": 0.0002, "epoch": 7.905405405405405, "step": 2340}, {"loss": 0.8656, "grad_norm": 1.4418280124664307, "learning_rate": 0.0002, "epoch": 7.9391891891891895, "step": 2350}, {"loss": 0.9609, "grad_norm": 1.4557723999023438, "learning_rate": 0.0002, "epoch": 7.972972972972973, "step": 2360}]}