diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8e85dbd03a2c0c2046d445893c654cf9f84d077 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69809f0040c8f92d4a238f6493d26dccf499247ceda24ca5edf1163b49e962e +size 85100592 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0aa1918d272179690c64c47582a456cc64e74a28 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f73148243e73d765345c5789209c42faa666c876b06a6ceb5d4442ec1d88a3b +size 43126684 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..72e984eb6de313ff39290030e36a714f607ee783 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c01ab4c0f45976dd0b37a94c24d44ab3264195b7231e616864a83fc30f1669a +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bf030fcd0d8b1fbecf02fa180b2dd61513b0226 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f46dc04db0a603406c597c113e229228b08858bb09b49bfebd3512f1a8f3306 +size 1064 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0316b27771935e6157db8820ce20949b54434ed --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,48 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.15673981191222572, + "eval_steps": 50, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.75274075357184e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.safetensors b/checkpoint-1000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74c4792d5367ccb23d618e969819e21745b3d197 --- /dev/null +++ b/checkpoint-1000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bd78db9d9a54986e2c11ffced397ff7188be95a72fb1d58e4dbfc9a5b10756 +size 85100592 diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc6e70a336c2de2f981ae84138562ce6218aad73 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da86fb12bf85497d7d598e5053e8ac13fce7c88d2a2b25f9c6b8c2d69ef6e926 +size 43127132 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c09fe3b6ceaf654555dc8d39e5e3fad61f6c13fd --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1575b6cd4b082a5f2959edf357f5bf17e65f7756a963eead9feaa93dfcf50805 +size 14244 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb079ac777435f69b38299460f32248e366aa526 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d6d865d6518a82dd54bb09f8f02628ebe31ca8be097a65ef5c8faff7622969 +size 1064 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..839095abcd126112a13ca66cb83edcb93f759fce --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,300 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.567398119122257, + "eval_steps": 50, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + }, + { + "epoch": 1.1, + "learning_rate": 7.537688442211055e-06, + "loss": 0.1491, + "step": 700 + }, + { + "epoch": 1.1, + "eval_loss": 0.157754048705101, + "eval_runtime": 136.107, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 0.661, + "step": 700 + }, + { + "epoch": 1.18, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.1466, + "step": 750 + }, + { + "epoch": 1.18, + "eval_loss": 0.1569654941558838, + "eval_runtime": 137.1916, + "eval_samples_per_second": 5.204, + "eval_steps_per_second": 0.656, + "step": 750 + }, + { + "epoch": 1.25, + "learning_rate": 5.025125628140704e-06, + "loss": 0.1383, + "step": 800 + }, + { + "epoch": 1.25, + "eval_loss": 0.15617845952510834, + "eval_runtime": 136.7366, + "eval_samples_per_second": 5.222, + "eval_steps_per_second": 0.658, + "step": 800 + }, + { + "epoch": 1.33, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.1417, + "step": 850 + }, + { + "epoch": 1.33, + "eval_loss": 0.15615858137607574, + "eval_runtime": 136.2828, + "eval_samples_per_second": 5.239, + "eval_steps_per_second": 0.66, + "step": 850 + }, + { + "epoch": 1.41, + "learning_rate": 2.512562814070352e-06, + "loss": 0.1374, + "step": 900 + }, + { + "epoch": 1.41, + "eval_loss": 0.155540332198143, + "eval_runtime": 137.0904, + "eval_samples_per_second": 5.208, + "eval_steps_per_second": 0.657, + "step": 900 + }, + { + "epoch": 1.49, + "learning_rate": 1.256281407035176e-06, + "loss": 0.147, + "step": 950 + }, + { + "epoch": 1.49, + "eval_loss": 0.15468443930149078, + "eval_runtime": 136.9218, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 950 + }, + { + "epoch": 1.57, + "learning_rate": 0.0, + "loss": 0.1448, + "step": 1000 + }, + { + "epoch": 1.57, + "eval_loss": 0.15455935895442963, + "eval_runtime": 136.7415, + "eval_samples_per_second": 5.222, + "eval_steps_per_second": 0.658, + "step": 1000 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.7525216609776435e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-150/README.md b/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-150/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-150/adapter_config.json b/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-150/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-150/adapter_model.safetensors b/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86f50c353d679533903910339482e20aace451b6 --- /dev/null +++ b/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a12141bb43e830a8718d52bd0d32f4b487ea502c4972da2acf46e2ab4a1aff +size 85100592 diff --git a/checkpoint-150/optimizer.pt b/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e5f2fba4a5e662ff456737bdf2589fb7b940651 --- /dev/null +++ b/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13935a4d9f371ff3035fd3bf86cc3322a69a0f9c739f5dbef207611edaa9c922 +size 43126684 diff --git a/checkpoint-150/rng_state.pth b/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..becf59bce686607334e624dff79f8259cf3e9807 --- /dev/null +++ b/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0962aa698e0e188a79f51f32c71fcc3e315e7f273b4ba096ed39831a26a8f47b +size 14244 diff --git a/checkpoint-150/scheduler.pt b/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..49d58f979ce05f436a6917857895bda3fa2d6188 --- /dev/null +++ b/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a0a7460dd8b31647fa0542d6e8cdd02c31293f0704d27ec57a49b4c476aa1c +size 1064 diff --git a/checkpoint-150/trainer_state.json b/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..77578844c3333abdb578fe03765d52afe44ca7c4 --- /dev/null +++ b/checkpoint-150/trainer_state.json @@ -0,0 +1,62 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.23510971786833856, + "eval_steps": 50, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 2.62911113035776e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-150/training_args.bin b/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dc138fb1673ed7b34daf1c00191fb30e7e66e65 --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727ef48f57d38fda1a97e3cc9c25f9341f961bd8a996adc089592cc9835622bc +size 85100592 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..710c6ab94a4902e8dc7a725285874e0838090d13 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff1bcaec0babeb8e55e682d9da623230c8e0c9aea5651775ad7240718d3d9c9 +size 43126684 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b2b8f033039888d0dcaf065d069d2f3190f1daa --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f457ed62b714b4aba8d1b2432fdfc3a63a834912752b668d75a7da2e195a1587 +size 14244 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d84b5e537a910294a56f642241016921e9f87a --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b477d3bb44d9bf70633240462f7ac6e455d50eefacf5b2433c62e0cc9e80d +size 1064 diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e75d191a34e797da9d21afdb31ad889dad6a7033 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,76 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.31347962382445144, + "eval_steps": 50, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 3.50548150714368e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-250/README.md b/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-250/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-250/adapter_config.json b/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-250/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-250/adapter_model.safetensors b/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dc3828e7263908602f9a0e0a33b6b6141540ace --- /dev/null +++ b/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed08f577f810bfea2a625ae11c709cdfa654427fcce0a09e85e6fec516f73f5 +size 85100592 diff --git a/checkpoint-250/optimizer.pt b/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ecd052fdebf4e3afd482ce139296ec28dc65439 --- /dev/null +++ b/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3bc2666ba548997464a7639f9dc6ecfd18172c99544643445cc9830bd28aa48 +size 43126684 diff --git a/checkpoint-250/rng_state.pth b/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..07a9983172b421a8047dc8a156081b8252b07c0a --- /dev/null +++ b/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e90eec24f22ad8e38976f35fa28211eae70ff1aac715343277c0bc4b2839fa3 +size 14244 diff --git a/checkpoint-250/scheduler.pt b/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..95fe513e9fe8f72dc7b9ffa5f782cbf0f7422fc6 --- /dev/null +++ b/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09471b95cb193b326e2ae9278591cdf878ced8cb70ac85a4cb6b83f68d62fc51 +size 1064 diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a370c2091cfa2aed77010264c3b3bc8c5bc0538c --- /dev/null +++ b/checkpoint-250/trainer_state.json @@ -0,0 +1,90 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.39184952978056425, + "eval_steps": 50, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 4.3818518839296e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c4cd2113196a54821f12bebba043126e42f964a --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9dcad4faf20f41404b8cfead079476e1b9e12179561ce60578ab234a8eebc2d +size 85100592 diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5665a16737180818c6ba655eebbf4b8369a73a18 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d36d7e08c0b28b1bf2a8b6580de32ebb04c5aa47ad21e5dc169f5b965a4ae42 +size 43127132 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7d18234e64040f8cf959a85886a2d9137390310 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b609533938f675544d701f32c5dfd0943480eeae212bb01e28566ca924db586f +size 14244 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdb954848e9822cd7c1e29d3b40bf45fa6b86357 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d0ec4220fe093365424ee63188b9cc5436640be7c2cb84202c87d53f32aeaf +size 1064 diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..41481c37aad37525b47aa84df8ed7b1b066e142e --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,104 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4702194357366771, + "eval_steps": 50, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 5.25822226071552e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-350/README.md b/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-350/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-350/adapter_config.json b/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-350/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-350/adapter_model.safetensors b/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a3e823f00788fe5fd4f3507ef5b325c8b12c5c3 --- /dev/null +++ b/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d2f3b6e00fc579a23ffa509d46e76aad1f545cc588b1830255c6f2af71e368f +size 85100592 diff --git a/checkpoint-350/optimizer.pt b/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d7f3e8b0f14fba5780897b0ef77e7c90635ac77 --- /dev/null +++ b/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5991fa258a4369f8e039ad8330658311b9d18e00db1f2fa16322362c9b47a3e +size 43127132 diff --git a/checkpoint-350/rng_state.pth b/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a234902a72982c10c1ac2b3c274da55a4b573d14 --- /dev/null +++ b/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951be2d5916602a2d21667bb40dea68576fab0a7f2c0f920dd0f986db118c99e +size 14244 diff --git a/checkpoint-350/scheduler.pt b/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8ae24c11986cf24c39cadca76e59270bbeb6f76 --- /dev/null +++ b/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9f9d20eafb507e007e0db2ee7fa0cc38244f7a2c2b2a604378cdb9e12dadb4 +size 1064 diff --git a/checkpoint-350/trainer_state.json b/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4212da2799908976582ad96dd8921618ab13bd45 --- /dev/null +++ b/checkpoint-350/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.54858934169279, + "eval_steps": 50, + "global_step": 350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 6.13459263750144e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-350/training_args.bin b/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea138ac858af7abb466340b9cb63d779a9870abd --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62919b2ae0be849f7a474d67d3b7175b7c2282865532bbfa0b61862d2e153fd +size 85100592 diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0733e69080960d6cb85ee754a88c7691b2eeb8d7 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742a0fa18b967ffca8e9768c23ca2bd34a05de2e6106b8c5548cb78e6c79b9dd +size 43127132 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..607eabb84394049180c372893fc5d3bb0b64334f --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e4f910f3e5e824f0aff0ee0b42c04a769fa7b424ca38472744271022ad41da +size 14244 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..52939dd56953607040022110b17355a120077961 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7181c684d8bc6db848c39932fb3b82e6b80240c695625aac4584086e7663877b +size 1064 diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d51fbb3a726a51e48b19b941b67b405e87266ca4 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,132 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6269592476489029, + "eval_steps": 50, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 7.01096301428736e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-450/README.md b/checkpoint-450/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-450/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-450/adapter_config.json b/checkpoint-450/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-450/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-450/adapter_model.safetensors b/checkpoint-450/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f761fe05de99cdc1ae862e26058ebec82038557c --- /dev/null +++ b/checkpoint-450/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe364b7bb2eb7210c0e3c506b3581aba8e5148758e10232a73d04f67b8f5bc19 +size 85100592 diff --git a/checkpoint-450/optimizer.pt b/checkpoint-450/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..38a5ca1b6d20b4d4b14e788eced39e87d8d042be --- /dev/null +++ b/checkpoint-450/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c8ce5ad0db902a76daaf4203e783b39685ff4f31ca9ba9d6ea121e3c27d511 +size 43127132 diff --git a/checkpoint-450/rng_state.pth b/checkpoint-450/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2b8bad435ee546a9129f786466e748455905281 --- /dev/null +++ b/checkpoint-450/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5dc0dc7e95dbe44f12f8e84c5a27f875649b4c99a29a3989d3ce218d8c46f5 +size 14244 diff --git a/checkpoint-450/scheduler.pt b/checkpoint-450/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbc3bf76d5260c2c30bfc67361e92a8589cadbe7 --- /dev/null +++ b/checkpoint-450/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1067d941ad3656559599b88573da32c1132c71afd7464f5844b679059050ac89 +size 1064 diff --git a/checkpoint-450/trainer_state.json b/checkpoint-450/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dbdcd16807b840aec8d42673e1e808aaf8a23a4f --- /dev/null +++ b/checkpoint-450/trainer_state.json @@ -0,0 +1,146 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7053291536050157, + "eval_steps": 50, + "global_step": 450, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 7.88733339107328e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-450/training_args.bin b/checkpoint-450/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-450/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..959044ed33c2144170726ac6f1cc11af9661ea12 --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0872eaa7b51542290927775c1951e429c448100d76da8e7f265734196aea05ef +size 85100592 diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb274d8111da85ab494f46c6f5a4361f07952e79 --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df09d2ee646edc6d66bdc6da074bc0688ce1513d757b16c94b8c3110f3ebbc8b +size 43126684 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33812a7af95da8b386e8257947be925afac7850a --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592682c1150abfb50b69e678e29176ab6292e66f4f95f4501146e1803b37b9da +size 14244 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29ddae8f47fa44816202135e154475d713372793 --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b44bde04014e0643297b0f127d92402c51731c3d038fc538c9a302aac901fb6 +size 1064 diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5ae71bb00c347e04accacc1e44adae506d3cef28 --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.07836990595611286, + "eval_steps": 50, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 8763703767859200.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0bc1c699d0d6b07ee33855dc446822e57b750b2 --- /dev/null +++ b/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d378a781bd39b07e9717fc93589246fb3cae5802ba51052561f36f86a1c9b235 +size 85100592 diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..01e7d7b10afab2430f6ef12146d0cd0e3135fe45 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76e8b501ae0e46b30c3875f61bf540b3c82409c72cd6065a4d2917e9ea23203 +size 43127132 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..28322b46d4244c492597715b07912794338fff2a --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6952c12f0719bd2254db75a798e38905ff9073d16053c7659dc4d2aa47149d4a +size 14244 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..89463731d1b5f59fe378f306cca2f74b47a04dc3 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08914fd90e4abb043440f2510884e4614f7e83091c72cc4c2d2f0ff220357a6e +size 1064 diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d222dad1d175cbbdf474aa67bc954cd157e6cd25 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,160 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7836990595611285, + "eval_steps": 50, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 8.7637037678592e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-550/README.md b/checkpoint-550/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-550/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-550/adapter_config.json b/checkpoint-550/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-550/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-550/adapter_model.safetensors b/checkpoint-550/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32d67806f2fd3da9250772d107f197154f94d18e --- /dev/null +++ b/checkpoint-550/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf5a9702b5e3a9aea6b62032de751a80d9158cee10e82d42185cd66305e1ab3 +size 85100592 diff --git a/checkpoint-550/optimizer.pt b/checkpoint-550/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4180a355a9e1324b0ba81b11f068ab1fd41168b0 --- /dev/null +++ b/checkpoint-550/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9780a45ab93697f76b56e182bc63f130235c7be725d1146e4740190f96af63fa +size 43127132 diff --git a/checkpoint-550/rng_state.pth b/checkpoint-550/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b42a5e70e3538b3ea59c46762aa1af0be7b22e8 --- /dev/null +++ b/checkpoint-550/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5eb7c348ca2a5d5a477ac5a307887ef2d0ba14107624e5247e6f4d48deddaa +size 14244 diff --git a/checkpoint-550/scheduler.pt b/checkpoint-550/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a651c534e0d965f44cae8d5bf2dd3c658e41d3c8 --- /dev/null +++ b/checkpoint-550/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6090bcf779d364236641cfe14c77a35431b5bfafcd25a884b8a5b97a4fc6fe12 +size 1064 diff --git a/checkpoint-550/trainer_state.json b/checkpoint-550/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5e62038832febb7af10c09b5fa820e94e98b3d87 --- /dev/null +++ b/checkpoint-550/trainer_state.json @@ -0,0 +1,174 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8620689655172413, + "eval_steps": 50, + "global_step": 550, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 9.64007414464512e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-550/training_args.bin b/checkpoint-550/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-550/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.safetensors b/checkpoint-600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b9b567c720fd3abb063722ed5d575dd37a86be1 --- /dev/null +++ b/checkpoint-600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe509fe3d7d62f31fe78c0e4269f49f3310902cb89f6e6bb839bf27d943a1294 +size 85100592 diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a273d2e6a4be9cc2a2374f11768f6dec874d791 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8c2588af8e00b4206238fdcd6b357bad795228769cb732bd6059dca21202e0 +size 43127132 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..04f2edbdd77cedde70ff5e9c8fe4b4ba6fc17c3f --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32861990c37c74ab39313315667e60f6ec1b669ba6c730729e5453b5854541bd +size 14244 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5dfe1f9e4f86adab5523d67985799f485c78132 --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ce7ff6d6acf4945e377262122c7e37fa2f686114026e8b558ebf0faa33a8fb +size 1064 diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a781090206eb89e4506b9748097b47cacb590e7f --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,188 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9404388714733543, + "eval_steps": 50, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.051644452143104e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-650/README.md b/checkpoint-650/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-650/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-650/adapter_config.json b/checkpoint-650/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-650/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-650/adapter_model.safetensors b/checkpoint-650/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd857f7843a0ac2192617e5f0c0641459a5564b7 --- /dev/null +++ b/checkpoint-650/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7848c8029c1cc287045fe5b46a64b349390d7d51c4d010ff2599e76190c4cf6 +size 85100592 diff --git a/checkpoint-650/optimizer.pt b/checkpoint-650/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..736264d278c29260d3cd33b65a862242410ea162 --- /dev/null +++ b/checkpoint-650/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2ef27137ee2de8d0508d650dae9846d9a6f80463ef3877a5850c621550d936 +size 43127132 diff --git a/checkpoint-650/rng_state.pth b/checkpoint-650/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..05f41b6c0ab2ab26ab2b17a0517c584c2494d91c --- /dev/null +++ b/checkpoint-650/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e1f335e5477520281d97d47a270a1afd749b4a6a9c728c5eca7265428aadee +size 14244 diff --git a/checkpoint-650/scheduler.pt b/checkpoint-650/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdd260fbee15f41b499155a2c0891ed4c0aaafe4 --- /dev/null +++ b/checkpoint-650/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64cb7a43229696b470cfa762fb458086df2a2667853db1b1fbc8be13f2ebd755 +size 1064 diff --git a/checkpoint-650/trainer_state.json b/checkpoint-650/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b5f0e0fa877d1494f323088c8e53acdaafcaa787 --- /dev/null +++ b/checkpoint-650/trainer_state.json @@ -0,0 +1,202 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0188087774294672, + "eval_steps": 50, + "global_step": 650, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.1390623972274995e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-650/training_args.bin b/checkpoint-650/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-650/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.safetensors b/checkpoint-700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..641fe796fab6ed47d9aee5057c6483477205825c --- /dev/null +++ b/checkpoint-700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947e14adee98d368ed5392e05291a0b78b0062c25329895def073e75691389cd +size 85100592 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..74e8643a49e3b0f4e30253458ce0155bc15c0fac --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2391114055d0b26f63fc9d16d1925667bf4c2ac852eb7245444492f2d10d9e1 +size 43127132 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..89018757dfc943475820aae840a900e1a27ea0b4 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d7b94b93ef4ac511de7c2b6f131ed6b332575b40fb9743f72a3437e59953c7 +size 14244 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..02cbb3376c0ab4a41059d1471bfcd017761e9cbc --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8641b104236537da719992bf5637d9a29f245da450fd6a0c1ade6f59294e3067 +size 1064 diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..10d177730008b36f0c0d6e8f133f828718346bb0 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.09717868338558, + "eval_steps": 50, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + }, + { + "epoch": 1.1, + "learning_rate": 7.537688442211055e-06, + "loss": 0.1491, + "step": 700 + }, + { + "epoch": 1.1, + "eval_loss": 0.157754048705101, + "eval_runtime": 136.107, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 0.661, + "step": 700 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.2266994349060915e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-750/README.md b/checkpoint-750/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-750/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-750/adapter_config.json b/checkpoint-750/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-750/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-750/adapter_model.safetensors b/checkpoint-750/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1bf0186e8499999fee0cf896cc24d951bf07d4d6 --- /dev/null +++ b/checkpoint-750/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7624baf8bb5eb0b5d0426c8e0877a3a8a5e806771c07b947822134d8832c1131 +size 85100592 diff --git a/checkpoint-750/optimizer.pt b/checkpoint-750/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b2038a9a251818d5aa999427da36724fee0873b --- /dev/null +++ b/checkpoint-750/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b86205f11e68f69e821ff73150c5b7b0d3f8f91f58d150b63cda101e7cff47 +size 43127132 diff --git a/checkpoint-750/rng_state.pth b/checkpoint-750/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac4e19e94bae199e21a04e90fb5d7c55edb5ef79 --- /dev/null +++ b/checkpoint-750/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5552af9f265ab9527dd6e554cb71364641857f9f1e512eb0310ace769bfe75 +size 14244 diff --git a/checkpoint-750/scheduler.pt b/checkpoint-750/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..34bee97f97b51a6d92ac9228d40e56002fad4dcc --- /dev/null +++ b/checkpoint-750/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ff28eb4ca89c9920d5e03ccaa7e35ffa8cc6579bf5ebfee1c7132e0d1e6636 +size 1064 diff --git a/checkpoint-750/trainer_state.json b/checkpoint-750/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b35b2d50868eb55cb41d9990af35831a3a73f03e --- /dev/null +++ b/checkpoint-750/trainer_state.json @@ -0,0 +1,230 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1755485893416928, + "eval_steps": 50, + "global_step": 750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + }, + { + "epoch": 1.1, + "learning_rate": 7.537688442211055e-06, + "loss": 0.1491, + "step": 700 + }, + { + "epoch": 1.1, + "eval_loss": 0.157754048705101, + "eval_runtime": 136.107, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 0.661, + "step": 700 + }, + { + "epoch": 1.18, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.1466, + "step": 750 + }, + { + "epoch": 1.18, + "eval_loss": 0.1569654941558838, + "eval_runtime": 137.1916, + "eval_samples_per_second": 5.204, + "eval_steps_per_second": 0.656, + "step": 750 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.3143364725846835e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-750/training_args.bin b/checkpoint-750/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-750/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-800/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-800/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-800/adapter_model.safetensors b/checkpoint-800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2f21ac72b1b41a3c714fb23a3ac7eb7bb8ffbf3 --- /dev/null +++ b/checkpoint-800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0311fdf5faf62c5f475ccfac9ed031cebd6a5d9b98adc9a83352055a5f67fe8 +size 85100592 diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e4394382b4e817382da5bb90311ff7a75bbbd83 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b16a79b1679389f768486e62aa33e9124537cd53dae3313f8c200e7265f0735 +size 43127132 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6763ef8d1357d6d8c64888637f0a1459f8099b51 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4395fc50488a59c62366c632fe720564fb71447ce49d3af8c768c207fd1524 +size 14244 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..91a1e379ee7f89bed4ffaba66c0bd5e6ae7b1665 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2387e2d26114664cc6a4cf5bd742a874dbf876f2332a74cdad212bf210513c +size 1064 diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..470bdda6dfad857f7364d4fa737c47b688a3c6fd --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,244 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2539184952978055, + "eval_steps": 50, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + }, + { + "epoch": 1.1, + "learning_rate": 7.537688442211055e-06, + "loss": 0.1491, + "step": 700 + }, + { + "epoch": 1.1, + "eval_loss": 0.157754048705101, + "eval_runtime": 136.107, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 0.661, + "step": 700 + }, + { + "epoch": 1.18, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.1466, + "step": 750 + }, + { + "epoch": 1.18, + "eval_loss": 0.1569654941558838, + "eval_runtime": 137.1916, + "eval_samples_per_second": 5.204, + "eval_steps_per_second": 0.656, + "step": 750 + }, + { + "epoch": 1.25, + "learning_rate": 5.025125628140704e-06, + "loss": 0.1383, + "step": 800 + }, + { + "epoch": 1.25, + "eval_loss": 0.15617845952510834, + "eval_runtime": 136.7366, + "eval_samples_per_second": 5.222, + "eval_steps_per_second": 0.658, + "step": 800 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.4019735102632755e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-850/README.md b/checkpoint-850/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-850/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-850/adapter_config.json b/checkpoint-850/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-850/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-850/adapter_model.safetensors b/checkpoint-850/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65dc13166c070e0efaf4a3f720199d169f16d61e --- /dev/null +++ b/checkpoint-850/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4513a0ae0b9af5ba7945b3a86d31e4bcdd6033c06e7cf2bb0dca62eece264c +size 85100592 diff --git a/checkpoint-850/optimizer.pt b/checkpoint-850/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d30e2fa79f0d01f5e755878702b47e5aea1c36c --- /dev/null +++ b/checkpoint-850/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dce1c4c36ab2f661d194800d8d5cbba551fceb88b0ce7a822d2d398f02bcb42b +size 43127132 diff --git a/checkpoint-850/rng_state.pth b/checkpoint-850/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2ba2c537cd7f8eae701ad3a2775ff7e952a17c3 --- /dev/null +++ b/checkpoint-850/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7ff3498876023cba2480f20218eefbf0bcd31c40a3259ad008bdcef9fd6b35 +size 14244 diff --git a/checkpoint-850/scheduler.pt b/checkpoint-850/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..15211d9e9f90f890ca6da2f4b0cf1eb2a50685f4 --- /dev/null +++ b/checkpoint-850/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a80d05a9e60673a998a3e598998f59153efa52025094d8426638be58e9ddf148 +size 1064 diff --git a/checkpoint-850/trainer_state.json b/checkpoint-850/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..86d0d2f60bc7e4742a848f29bca8c47e285bcc2a --- /dev/null +++ b/checkpoint-850/trainer_state.json @@ -0,0 +1,258 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3322884012539185, + "eval_steps": 50, + "global_step": 850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + }, + { + "epoch": 1.1, + "learning_rate": 7.537688442211055e-06, + "loss": 0.1491, + "step": 700 + }, + { + "epoch": 1.1, + "eval_loss": 0.157754048705101, + "eval_runtime": 136.107, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 0.661, + "step": 700 + }, + { + "epoch": 1.18, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.1466, + "step": 750 + }, + { + "epoch": 1.18, + "eval_loss": 0.1569654941558838, + "eval_runtime": 137.1916, + "eval_samples_per_second": 5.204, + "eval_steps_per_second": 0.656, + "step": 750 + }, + { + "epoch": 1.25, + "learning_rate": 5.025125628140704e-06, + "loss": 0.1383, + "step": 800 + }, + { + "epoch": 1.25, + "eval_loss": 0.15617845952510834, + "eval_runtime": 136.7366, + "eval_samples_per_second": 5.222, + "eval_steps_per_second": 0.658, + "step": 800 + }, + { + "epoch": 1.33, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.1417, + "step": 850 + }, + { + "epoch": 1.33, + "eval_loss": 0.15615858137607574, + "eval_runtime": 136.2828, + "eval_samples_per_second": 5.239, + "eval_steps_per_second": 0.66, + "step": 850 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.4896105479418675e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-850/training_args.bin b/checkpoint-850/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-850/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-900/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-900/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-900/adapter_model.safetensors b/checkpoint-900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5713f2a8f24d1480c3e6cbdf0fce02a473a9d93 --- /dev/null +++ b/checkpoint-900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd08dcfdc9a33f6fbbf727aeffca9043ce082db0f17a5bc0f8b8685c70b1d656 +size 85100592 diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..abcbb7cca97044207c9120eba6d6cadea77ee6c1 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ff6e82ca84834b29d2c15af0a9df34c1ac305316f987156ad97a392c8c711e +size 43127132 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9dfea154b608fdcdeac7808454f3ada59f692414 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85bc79c4d38af35767d2b63d02e3d02cb00892ac242bf5cbdb40a9f05f7e4256 +size 14244 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..25662b4210c7666ed19ecf1aa7bff50a794ef7d7 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22aa127cbf65f517d6cc6dcd8205454e61950c25d1a17d361d7e6a24d922b3a6 +size 1064 diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..82fed0e07b303e869629590637e27522110378fd --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,272 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4106583072100314, + "eval_steps": 50, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + }, + { + "epoch": 1.1, + "learning_rate": 7.537688442211055e-06, + "loss": 0.1491, + "step": 700 + }, + { + "epoch": 1.1, + "eval_loss": 0.157754048705101, + "eval_runtime": 136.107, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 0.661, + "step": 700 + }, + { + "epoch": 1.18, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.1466, + "step": 750 + }, + { + "epoch": 1.18, + "eval_loss": 0.1569654941558838, + "eval_runtime": 137.1916, + "eval_samples_per_second": 5.204, + "eval_steps_per_second": 0.656, + "step": 750 + }, + { + "epoch": 1.25, + "learning_rate": 5.025125628140704e-06, + "loss": 0.1383, + "step": 800 + }, + { + "epoch": 1.25, + "eval_loss": 0.15617845952510834, + "eval_runtime": 136.7366, + "eval_samples_per_second": 5.222, + "eval_steps_per_second": 0.658, + "step": 800 + }, + { + "epoch": 1.33, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.1417, + "step": 850 + }, + { + "epoch": 1.33, + "eval_loss": 0.15615858137607574, + "eval_runtime": 136.2828, + "eval_samples_per_second": 5.239, + "eval_steps_per_second": 0.66, + "step": 850 + }, + { + "epoch": 1.41, + "learning_rate": 2.512562814070352e-06, + "loss": 0.1374, + "step": 900 + }, + { + "epoch": 1.41, + "eval_loss": 0.155540332198143, + "eval_runtime": 137.0904, + "eval_samples_per_second": 5.208, + "eval_steps_per_second": 0.657, + "step": 900 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.5772475856204595e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600 diff --git a/checkpoint-950/README.md b/checkpoint-950/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5aebe228437c05609806d320d9742e9440821ea9 --- /dev/null +++ b/checkpoint-950/README.md @@ -0,0 +1,220 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + + +- PEFT 0.6.3.dev0 diff --git a/checkpoint-950/adapter_config.json b/checkpoint-950/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1666479dbaa072c6d89f7f7e0c7e1206aea6cdf --- /dev/null +++ b/checkpoint-950/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "lm_head", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-950/adapter_model.safetensors b/checkpoint-950/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59d09e6a6ca491971dde6252600002b5ffc338f0 --- /dev/null +++ b/checkpoint-950/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea7c000427f71c20b5f25db845d468c72f8fe6daaf2f6243489c48cfb16f861 +size 85100592 diff --git a/checkpoint-950/optimizer.pt b/checkpoint-950/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..48400777a0c357879902f7ad91d249ed651c4a87 --- /dev/null +++ b/checkpoint-950/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da7802227cd4b7def9664ad02f69c8b0b4132e67a8ec1fbd08d18dd3a5b19fe +size 43127132 diff --git a/checkpoint-950/rng_state.pth b/checkpoint-950/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..04484dc2042e632c663472d42cb0bdeb6f4f0dd5 --- /dev/null +++ b/checkpoint-950/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58aa4a1f4ee74c1f429b17faed518bd9ab16ef6f1ae91406f0b966380c2da8ef +size 14244 diff --git a/checkpoint-950/scheduler.pt b/checkpoint-950/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..40b35000bae4f8304ea298a207fc940a46776af5 --- /dev/null +++ b/checkpoint-950/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8702e895a6b0d1b6a0fc15d813777f3f0bcf1b2cf280a0cb1a49beb133097a3 +size 1064 diff --git a/checkpoint-950/trainer_state.json b/checkpoint-950/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9b733b821b6af816efd1eb880760a9bb8e683cf4 --- /dev/null +++ b/checkpoint-950/trainer_state.json @@ -0,0 +1,286 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.489028213166144, + "eval_steps": 50, + "global_step": 950, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 0.7797, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.2723180055618286, + "eval_runtime": 135.6616, + "eval_samples_per_second": 5.263, + "eval_steps_per_second": 0.663, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.2457, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.22004182636737823, + "eval_runtime": 136.2348, + "eval_samples_per_second": 5.241, + "eval_steps_per_second": 0.661, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.2088, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.19266444444656372, + "eval_runtime": 136.6465, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.659, + "step": 150 + }, + { + "epoch": 0.31, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.1832, + "step": 200 + }, + { + "epoch": 0.31, + "eval_loss": 0.17922177910804749, + "eval_runtime": 136.7121, + "eval_samples_per_second": 5.223, + "eval_steps_per_second": 0.658, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 1.884422110552764e-05, + "loss": 0.1754, + "step": 250 + }, + { + "epoch": 0.39, + "eval_loss": 0.17311859130859375, + "eval_runtime": 136.3058, + "eval_samples_per_second": 5.238, + "eval_steps_per_second": 0.66, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.169, + "step": 300 + }, + { + "epoch": 0.47, + "eval_loss": 0.16897280514240265, + "eval_runtime": 136.923, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.166, + "step": 350 + }, + { + "epoch": 0.55, + "eval_loss": 0.1663457602262497, + "eval_runtime": 136.6033, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 350 + }, + { + "epoch": 0.63, + "learning_rate": 1.507537688442211e-05, + "loss": 0.1682, + "step": 400 + }, + { + "epoch": 0.63, + "eval_loss": 0.16482460498809814, + "eval_runtime": 136.5801, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.1576, + "step": 450 + }, + { + "epoch": 0.71, + "eval_loss": 0.16245244443416595, + "eval_runtime": 136.7662, + "eval_samples_per_second": 5.221, + "eval_steps_per_second": 0.658, + "step": 450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.165, + "step": 500 + }, + { + "epoch": 0.78, + "eval_loss": 0.16068558394908905, + "eval_runtime": 136.6019, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 0.659, + "step": 500 + }, + { + "epoch": 0.86, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.152, + "step": 550 + }, + { + "epoch": 0.86, + "eval_loss": 0.15984833240509033, + "eval_runtime": 136.8975, + "eval_samples_per_second": 5.216, + "eval_steps_per_second": 0.657, + "step": 550 + }, + { + "epoch": 0.94, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.1563, + "step": 600 + }, + { + "epoch": 0.94, + "eval_loss": 0.15865428745746613, + "eval_runtime": 136.9521, + "eval_samples_per_second": 5.214, + "eval_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 1.02, + "learning_rate": 8.793969849246232e-06, + "loss": 0.1477, + "step": 650 + }, + { + "epoch": 1.02, + "eval_loss": 0.1577940434217453, + "eval_runtime": 136.5669, + "eval_samples_per_second": 5.228, + "eval_steps_per_second": 0.659, + "step": 650 + }, + { + "epoch": 1.1, + "learning_rate": 7.537688442211055e-06, + "loss": 0.1491, + "step": 700 + }, + { + "epoch": 1.1, + "eval_loss": 0.157754048705101, + "eval_runtime": 136.107, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 0.661, + "step": 700 + }, + { + "epoch": 1.18, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.1466, + "step": 750 + }, + { + "epoch": 1.18, + "eval_loss": 0.1569654941558838, + "eval_runtime": 137.1916, + "eval_samples_per_second": 5.204, + "eval_steps_per_second": 0.656, + "step": 750 + }, + { + "epoch": 1.25, + "learning_rate": 5.025125628140704e-06, + "loss": 0.1383, + "step": 800 + }, + { + "epoch": 1.25, + "eval_loss": 0.15617845952510834, + "eval_runtime": 136.7366, + "eval_samples_per_second": 5.222, + "eval_steps_per_second": 0.658, + "step": 800 + }, + { + "epoch": 1.33, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.1417, + "step": 850 + }, + { + "epoch": 1.33, + "eval_loss": 0.15615858137607574, + "eval_runtime": 136.2828, + "eval_samples_per_second": 5.239, + "eval_steps_per_second": 0.66, + "step": 850 + }, + { + "epoch": 1.41, + "learning_rate": 2.512562814070352e-06, + "loss": 0.1374, + "step": 900 + }, + { + "epoch": 1.41, + "eval_loss": 0.155540332198143, + "eval_runtime": 137.0904, + "eval_samples_per_second": 5.208, + "eval_steps_per_second": 0.657, + "step": 900 + }, + { + "epoch": 1.49, + "learning_rate": 1.256281407035176e-06, + "loss": 0.147, + "step": 950 + }, + { + "epoch": 1.49, + "eval_loss": 0.15468443930149078, + "eval_runtime": 136.9218, + "eval_samples_per_second": 5.215, + "eval_steps_per_second": 0.657, + "step": 950 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.6648846232990515e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-950/training_args.bin b/checkpoint-950/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6ef0510bf0e93897291ec1b6d0026dbc86e8a --- /dev/null +++ b/checkpoint-950/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f208e3c6bbc0ff595dc52e32a7309c9e57d7d78823b465b2b38edcf101eb89a +size 4600