diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d66fa05047349ecbcaccc3146d1b4a332158878 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a99864a548560a398945f587eea1503839b2450902d2b417b71e2f8b0ad4db4 +size 609389712 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..355ab6613770728eaa592065509bd2f4e6aa4a86 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23be3dd51c5413dcd449a047cde520fbf4e02c09d3a658d183ad7c7bd08bc336 +size 43126684 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d5efeff0f311d36b93b974fbd9869bf3bcde4b8 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f5a59a14d46de7f0d0af48c5b2eeb8a470600eb778fece520dd91926fb5399 +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bf030fcd0d8b1fbecf02fa180b2dd61513b0226 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f46dc04db0a603406c597c113e229228b08858bb09b49bfebd3512f1a8f3306 +size 1064 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c2228bf1dcd653b56a6e91b0025804ef0d654604 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,49 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.16, + "eval_steps": 50, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.75274075357184e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.safetensors b/checkpoint-1000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39f8e1d4ed391f56f25a1c1f0ff4b2e7b6fd5df9 --- /dev/null +++ b/checkpoint-1000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa4fac3f59611bb4a9c6157eb62479ccf376c48de016c379070c61193239953 +size 609389712 diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a59766b99e81248b4a50b7ac68a465eccf788ae --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1fbbcbf3edb83fadf8d05c6f22141b0b4148f540d619d08274309f758afca3 +size 43127132 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c20859deec23376e4fb18225631744395d5ed8cb --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:083d1fab437bfe07458908c484b3955a52f3a1daf94849802b42e40af820f9d4 +size 14244 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb079ac777435f69b38299460f32248e366aa526 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d6d865d6518a82dd54bb09f8f02628ebe31ca8be097a65ef5c8faff7622969 +size 1064 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..86012ca7a62aa3f0198a3ed22ee2e63a3f0afe89 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,301 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6, + "eval_steps": 50, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + }, + { + "epoch": 1.12, + "learning_rate": 7.537688442211055e-06, + "loss": 0.7107, + "step": 700 + }, + { + "epoch": 1.12, + "eval_loss": 0.7591288089752197, + "eval_runtime": 405.5072, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 700 + }, + { + "epoch": 1.2, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.7094, + "step": 750 + }, + { + "epoch": 1.2, + "eval_loss": 0.7564280033111572, + "eval_runtime": 405.5407, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 750 + }, + { + "epoch": 1.28, + "learning_rate": 5.025125628140704e-06, + "loss": 0.7203, + "step": 800 + }, + { + "epoch": 1.28, + "eval_loss": 0.7533515095710754, + "eval_runtime": 405.633, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 800 + }, + { + "epoch": 1.36, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.6954, + "step": 850 + }, + { + "epoch": 1.36, + "eval_loss": 0.7509064674377441, + "eval_runtime": 405.5277, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 850 + }, + { + "epoch": 1.44, + "learning_rate": 2.512562814070352e-06, + "loss": 0.705, + "step": 900 + }, + { + "epoch": 1.44, + "eval_loss": 0.7496302723884583, + "eval_runtime": 405.4847, + "eval_samples_per_second": 12.331, + "eval_steps_per_second": 1.541, + "step": 900 + }, + { + "epoch": 1.52, + "learning_rate": 1.256281407035176e-06, + "loss": 0.7173, + "step": 950 + }, + { + "epoch": 1.52, + "eval_loss": 0.7481338381767273, + "eval_runtime": 405.49, + "eval_samples_per_second": 12.331, + "eval_steps_per_second": 1.541, + "step": 950 + }, + { + "epoch": 1.6, + "learning_rate": 0.0, + "loss": 0.696, + "step": 1000 + }, + { + "epoch": 1.6, + "eval_loss": 0.7476922869682312, + "eval_runtime": 405.5359, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 1000 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.75274075357184e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-150/README.md b/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-150/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-150/adapter_config.json b/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-150/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-150/adapter_model.safetensors b/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c854fb0365101963a75d2cd8af0596308e2fc06f --- /dev/null +++ b/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:169738fc4d8af44ef693ef9435d0a01693036c537375ce01bbc4d9487d76ed51 +size 609389712 diff --git a/checkpoint-150/optimizer.pt b/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f0af6e25bcd513f972088c5bb34249a59f1b298 --- /dev/null +++ b/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b020b24cdb28ab539944706ba7060d5f1c9eefd873eaee771a1c4d0870b775e8 +size 43126684 diff --git a/checkpoint-150/rng_state.pth b/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..76c345ff282fc4cadf88e511b2d8aa15e1a6cf6f --- /dev/null +++ b/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c0053a7fcb9ca154abb520c2f6bce83e88fb3b95860f9cfec37a406f29da17 +size 14244 diff --git a/checkpoint-150/scheduler.pt b/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..49d58f979ce05f436a6917857895bda3fa2d6188 --- /dev/null +++ b/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a0a7460dd8b31647fa0542d6e8cdd02c31293f0704d27ec57a49b4c476aa1c +size 1064 diff --git a/checkpoint-150/trainer_state.json b/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f20f0eff8a60a8f6aa1f2c41d14c503f4c51ba4c --- /dev/null +++ b/checkpoint-150/trainer_state.json @@ -0,0 +1,63 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.24, + "eval_steps": 50, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 2.62911113035776e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-150/training_args.bin b/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..144123e84e9603d342d93ac41c9b1825c62ee24a --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a087caeab1b8377f1841f6a481e25190cc8562593d63ceaf5637ca62c06e70 +size 609389712 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0af5ba44b904780c5a81ddaef201e30192c07378 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eed8632771b997aa743faf7e217a32800a025dfb62546b5be603731e3063aee +size 43126684 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea60a493e18326030402447fea87e8cd7168ba82 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7775948c4249583aa53279096045195942843fd474c8cd9ee4590b8a17e724e7 +size 14244 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d84b5e537a910294a56f642241016921e9f87a --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b477d3bb44d9bf70633240462f7ac6e455d50eefacf5b2433c62e0cc9e80d +size 1064 diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a217c78ba386a6b11836c7e075bda7490104a9e3 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,77 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.32, + "eval_steps": 50, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 3.50548150714368e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-250/README.md b/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-250/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-250/adapter_config.json b/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-250/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-250/adapter_model.safetensors b/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a713f682009d35c346718bb01915206d5a5303f --- /dev/null +++ b/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4e6a4e2b7009665a2d08d8ab7f983982770bfee208f4a0a5c606abd0a4b63a1 +size 609389712 diff --git a/checkpoint-250/optimizer.pt b/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..859a3507e00bbeb2c452b2763a4bd0aec282ca10 --- /dev/null +++ b/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb05fea125c57a8ef83000219df69bffb6979627a74fbfc41660f53b9e94728c +size 43126684 diff --git a/checkpoint-250/rng_state.pth b/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..61e5e5e28ea7a75401b64e8a5b7e53e6c0eb7f1a --- /dev/null +++ b/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5a4d1ace6cff644daeafee2ed3c659c8ad6d7329984fac4814598148287f45 +size 14244 diff --git a/checkpoint-250/scheduler.pt b/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..95fe513e9fe8f72dc7b9ffa5f782cbf0f7422fc6 --- /dev/null +++ b/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09471b95cb193b326e2ae9278591cdf878ced8cb70ac85a4cb6b83f68d62fc51 +size 1064 diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cb6d1d60d4d6b5bb17f68298255fe14aa80704a9 --- /dev/null +++ b/checkpoint-250/trainer_state.json @@ -0,0 +1,91 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4, + "eval_steps": 50, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 4.3818518839296e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f43349fe27a9a335ec61003332adf7c1d3848d53 --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba36c6267bc7ed00bf4e47e3408e4caf16672d746808f3d2bca56b54245a1745 +size 609389712 diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..031bd0a3239885a9de5d4fad11423d7dfdb55051 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62163ab77d93bb009c21e46694dcc45375176a177ef306aa01c27c512a746a18 +size 43127132 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1c184a0e2abcfffa55036562e911ad7dc437592 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae384141208d14371e9167d8c1fa551691aeae58cda37eb5ecefcd1d2d5aaba +size 14244 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdb954848e9822cd7c1e29d3b40bf45fa6b86357 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d0ec4220fe093365424ee63188b9cc5436640be7c2cb84202c87d53f32aeaf +size 1064 diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..76105b946aaf211e3266b5f23a8cc685fd004353 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,105 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.48, + "eval_steps": 50, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 5.25822226071552e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-350/README.md b/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-350/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-350/adapter_config.json b/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-350/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-350/adapter_model.safetensors b/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..224ab7b833be9b5f1ba2f93ada92caf7ff0c6885 --- /dev/null +++ b/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0751fee811a1f1b7684b6750b74ac4695ba47387e1e94b5070a1267974748e +size 609389712 diff --git a/checkpoint-350/optimizer.pt b/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbb2343401bb96e7520cf26f4f05b53646610f38 --- /dev/null +++ b/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d37e77f608a4aeec57dc79233b0011fb1037e786480b496106efa62cbd0c1e5 +size 43127132 diff --git a/checkpoint-350/rng_state.pth b/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..360578bab774d6ad9c0ac4023168da3b3370e190 --- /dev/null +++ b/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2007e1ce0f14325986972ec49c69379e221e4e3a98af0e1add7f1d4189281592 +size 14244 diff --git a/checkpoint-350/scheduler.pt b/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8ae24c11986cf24c39cadca76e59270bbeb6f76 --- /dev/null +++ b/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9f9d20eafb507e007e0db2ee7fa0cc38244f7a2c2b2a604378cdb9e12dadb4 +size 1064 diff --git a/checkpoint-350/trainer_state.json b/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f283a3038393fd55c2e04a7e3014b3801659f9a4 --- /dev/null +++ b/checkpoint-350/trainer_state.json @@ -0,0 +1,119 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.56, + "eval_steps": 50, + "global_step": 350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 6.13459263750144e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-350/training_args.bin b/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60cc26f1d01ac7523b2ac34d94ea120834d6342c --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b8edcd9cfba22b681ea8a05f8e8a9f9edf5d0e9837842e001ba193f35a879fb +size 609389712 diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb3bb7e82d626b774797699536b56857670f1a33 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e883d5497327a1a9ddfd57608ad1e424812db799024cadb84e96a415b7cfe0f +size 43127132 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..25124c5e8e7502970e42c61b28b7a645d92c79b0 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0f421bc95467030268a15e26d8df95bd839c0b07f5f09a5901c9d8ccab1966a +size 14244 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..52939dd56953607040022110b17355a120077961 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7181c684d8bc6db848c39932fb3b82e6b80240c695625aac4584086e7663877b +size 1064 diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9d44062c8d12b274430a9f877ce47042bf59e452 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,133 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.64, + "eval_steps": 50, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 7.01096301428736e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-450/README.md b/checkpoint-450/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-450/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-450/adapter_config.json b/checkpoint-450/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-450/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-450/adapter_model.safetensors b/checkpoint-450/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5a0607e29a2cb2947c8eb69e8392a17af90edd8 --- /dev/null +++ b/checkpoint-450/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3577b434f638ee00fbbd10ed2a8d2560cb533f66d89c30ae27ea7b2df99edfec +size 609389712 diff --git a/checkpoint-450/optimizer.pt b/checkpoint-450/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..296da5b5ab41e56fa42dcfacdcbd30885b1f41cc --- /dev/null +++ b/checkpoint-450/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f84101a72c186c5efa1a36df67445e62ab819c7cbeee03e39522c0fe93bff6 +size 43127132 diff --git a/checkpoint-450/rng_state.pth b/checkpoint-450/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c8be50e0d17faa9ae4ae52d3597e729f33c17d3 --- /dev/null +++ b/checkpoint-450/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6127fdfcbb17a924f253b33b28cad75bb64bc35f5cbe475607aeb233685df0 +size 14244 diff --git a/checkpoint-450/scheduler.pt b/checkpoint-450/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbc3bf76d5260c2c30bfc67361e92a8589cadbe7 --- /dev/null +++ b/checkpoint-450/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1067d941ad3656559599b88573da32c1132c71afd7464f5844b679059050ac89 +size 1064 diff --git a/checkpoint-450/trainer_state.json b/checkpoint-450/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..97eec18602d0cc0b11b138e726f97240c1a1ca75 --- /dev/null +++ b/checkpoint-450/trainer_state.json @@ -0,0 +1,147 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.72, + "eval_steps": 50, + "global_step": 450, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 7.88733339107328e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-450/training_args.bin b/checkpoint-450/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-450/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..612eeeb29d30c8c812fbf93e72e61c94225a303e --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e46c84b3ea9990f3179703c46c784b514b30304359e5c05e72cb16964ba220c +size 609389712 diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc2153cd10360c7ecb819bc2a8d7c2589129f7dd --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb96e382c84e28b9aea862fb4e6ff99da42b6fbf5f8d91ebd05d874078e99fc5 +size 43126684 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2cec1570de9c6177615af5fc2a3965c31ea6ed8 --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0553051d10408eee59c3840a1a9bfb3b86f5cd48f5ff749de95ca6ef87ba0668 +size 14244 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29ddae8f47fa44816202135e154475d713372793 --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b44bde04014e0643297b0f127d92402c51731c3d038fc538c9a302aac901fb6 +size 1064 diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..89fc4c1158688523cf7335692b0191aa08cd242f --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,35 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.08, + "eval_steps": 50, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 8763703767859200.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba3bf5dcaa148fed89b31b7171b92f2f05032862 --- /dev/null +++ b/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90f82ca3d7783ecb42e61455e3721817b0e3ebe536d80db80b75ca3d1409297 +size 609389712 diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ad53f80026600867d06e6f0085dd616358ba6de --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26aed64c6bcd6cff9a393a2a84ac6d3739eb22a149805090fbd8a23fb7445ced +size 43127132 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fa4cfa6f06617d3a4f61e85a19cf862e8a74f3b --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81ec963b08ff90f15851ff1fe201dacb6fdf773d811710421103d3c2e0c052d +size 14244 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..89463731d1b5f59fe378f306cca2f74b47a04dc3 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08914fd90e4abb043440f2510884e4614f7e83091c72cc4c2d2f0ff220357a6e +size 1064 diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..125ff391c073ef9cdc0e4992df7a84c2750cfecd --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,161 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8, + "eval_steps": 50, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 8.7637037678592e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-550/README.md b/checkpoint-550/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-550/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-550/adapter_config.json b/checkpoint-550/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-550/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-550/adapter_model.safetensors b/checkpoint-550/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..027a9e8a545b1ad0e185b84152e040a6f81c4480 --- /dev/null +++ b/checkpoint-550/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e647c2b3d4c6928f20b36b99db338b2d6fcc10920fa16948fcd4ab7e17616211 +size 609389712 diff --git a/checkpoint-550/optimizer.pt b/checkpoint-550/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf0300066c9ca40037d6eb1c365b3c0d3d16698d --- /dev/null +++ b/checkpoint-550/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6edeffebaba5153ea4aeea3bebd1b998c3393ccfc480ee3b66f1563095c23c +size 43127132 diff --git a/checkpoint-550/rng_state.pth b/checkpoint-550/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..adc20e0c648ab717f3bac7c444900932be927e09 --- /dev/null +++ b/checkpoint-550/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646461297fb11e289425e945df4cbe38623366f09a858475fabba9e599f96747 +size 14244 diff --git a/checkpoint-550/scheduler.pt b/checkpoint-550/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a651c534e0d965f44cae8d5bf2dd3c658e41d3c8 --- /dev/null +++ b/checkpoint-550/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6090bcf779d364236641cfe14c77a35431b5bfafcd25a884b8a5b97a4fc6fe12 +size 1064 diff --git a/checkpoint-550/trainer_state.json b/checkpoint-550/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0bc0e455d726a4857ef135945219261fa31ccd25 --- /dev/null +++ b/checkpoint-550/trainer_state.json @@ -0,0 +1,175 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.88, + "eval_steps": 50, + "global_step": 550, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 9.64007414464512e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-550/training_args.bin b/checkpoint-550/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-550/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.safetensors b/checkpoint-600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02f4498b11d102678f03a698e5879450e113d298 --- /dev/null +++ b/checkpoint-600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef59a13d55da988cc77ed5e9485e2ad6e110a3f1c894d236cfa3fdc8b06650f +size 609389712 diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72b24c7b8d3d0af6961a705bd02a375147242267 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66626662e7d09282c73c427db047b65bed71025ed2b00c2183e2391f05b9b257 +size 43127132 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a98bceaefb1e01880d4c8e6f65bd5111b194f0f8 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b43c01ca858f576b733efb66d155de633fb4f0175c92dd5c886c317999160b +size 14244 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5dfe1f9e4f86adab5523d67985799f485c78132 --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ce7ff6d6acf4945e377262122c7e37fa2f686114026e8b558ebf0faa33a8fb +size 1064 diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e67ea81491eda3a8b578c2dedd1ef898226d816c --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,189 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.96, + "eval_steps": 50, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.051644452143104e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-650/README.md b/checkpoint-650/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-650/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-650/adapter_config.json b/checkpoint-650/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-650/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-650/adapter_model.safetensors b/checkpoint-650/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..acbd1b79683ff95085d764ee7b8f3126bec18dea --- /dev/null +++ b/checkpoint-650/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270f53cf2783d912ae945b905f4ef9f25839ac9ff7a7e812459d8dba51d4c5f9 +size 609389712 diff --git a/checkpoint-650/optimizer.pt b/checkpoint-650/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..31e4532516757258feb53e6f8d6cca978a5b420f --- /dev/null +++ b/checkpoint-650/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d994da21e0cc750dd4b71c474688d98438efc77f86cc4319a91c9a7091201ace +size 43127132 diff --git a/checkpoint-650/rng_state.pth b/checkpoint-650/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7700fefb603c5a51a2220cb818bd77eee0ad374 --- /dev/null +++ b/checkpoint-650/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbd2aff071536c93dcca8875506dc2e45bce5d8e83d9b97985d299aff00969b +size 14244 diff --git a/checkpoint-650/scheduler.pt b/checkpoint-650/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdd260fbee15f41b499155a2c0891ed4c0aaafe4 --- /dev/null +++ b/checkpoint-650/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64cb7a43229696b470cfa762fb458086df2a2667853db1b1fbc8be13f2ebd755 +size 1064 diff --git a/checkpoint-650/trainer_state.json b/checkpoint-650/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4bbc966e35672aef9cdce8f77cec631ed7db3872 --- /dev/null +++ b/checkpoint-650/trainer_state.json @@ -0,0 +1,203 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.04, + "eval_steps": 50, + "global_step": 650, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.139281489821696e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-650/training_args.bin b/checkpoint-650/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-650/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.safetensors b/checkpoint-700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4d5e054d415b2e956f15795517e17bcbb0de803 --- /dev/null +++ b/checkpoint-700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55cb3a725e1ea420e1d1b4d154cb33285c146ec706f9fc14c6b6b0665097e1b7 +size 609389712 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fabd61b3838daa200cbf2ba7e66b8a68cc6c4712 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10aff25f8a52cdaaff4457770e804da285ab38c3a5518f2de8a3c058d86f202 +size 43127132 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e0702d9774c9b5a0cb703f7bf484485a1ac76c4 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b96516f7624d7067fc3c86ead5767e446098a97396af88f7ef3d5a917eebaea2 +size 14244 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..02cbb3376c0ab4a41059d1471bfcd017761e9cbc --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8641b104236537da719992bf5637d9a29f245da450fd6a0c1ade6f59294e3067 +size 1064 diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..59a716e9c4cc5b84ab5b452b10a8ad22aef34ced --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.12, + "eval_steps": 50, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + }, + { + "epoch": 1.12, + "learning_rate": 7.537688442211055e-06, + "loss": 0.7107, + "step": 700 + }, + { + "epoch": 1.12, + "eval_loss": 0.7591288089752197, + "eval_runtime": 405.5072, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 700 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.226918527500288e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-750/README.md b/checkpoint-750/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-750/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-750/adapter_config.json b/checkpoint-750/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-750/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-750/adapter_model.safetensors b/checkpoint-750/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4defde1ee89676c56d7f103f57d6e33db30c42aa --- /dev/null +++ b/checkpoint-750/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff98d91a6b52d806ae0367254cf0e370f79343fbdf74303c757095882a2bde0 +size 609389712 diff --git a/checkpoint-750/optimizer.pt b/checkpoint-750/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8ec48c059cea261aa86594d7434ed22191be03a --- /dev/null +++ b/checkpoint-750/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f553ba52866b6f70da0e4f0e32ab0cd785c99b3e54e44f890eb891e93cc98d +size 43127132 diff --git a/checkpoint-750/rng_state.pth b/checkpoint-750/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..02b45e920dd950af4754a64cf5d58f0adc902a26 --- /dev/null +++ b/checkpoint-750/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c335fd4d5447347a72b25eed2111ba23cf16dcf4c718cc1f4b4dcff2fb739a +size 14244 diff --git a/checkpoint-750/scheduler.pt b/checkpoint-750/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..34bee97f97b51a6d92ac9228d40e56002fad4dcc --- /dev/null +++ b/checkpoint-750/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ff28eb4ca89c9920d5e03ccaa7e35ffa8cc6579bf5ebfee1c7132e0d1e6636 +size 1064 diff --git a/checkpoint-750/trainer_state.json b/checkpoint-750/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9d7deb4df9592843dbb4ad94f118787037abc60 --- /dev/null +++ b/checkpoint-750/trainer_state.json @@ -0,0 +1,231 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2, + "eval_steps": 50, + "global_step": 750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + }, + { + "epoch": 1.12, + "learning_rate": 7.537688442211055e-06, + "loss": 0.7107, + "step": 700 + }, + { + "epoch": 1.12, + "eval_loss": 0.7591288089752197, + "eval_runtime": 405.5072, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 700 + }, + { + "epoch": 1.2, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.7094, + "step": 750 + }, + { + "epoch": 1.2, + "eval_loss": 0.7564280033111572, + "eval_runtime": 405.5407, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 750 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.31455556517888e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-750/training_args.bin b/checkpoint-750/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-750/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-800/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-800/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-800/adapter_model.safetensors b/checkpoint-800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc12a4799ed65d03e09a7f2fd3c24f423da8cb5a --- /dev/null +++ b/checkpoint-800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53664b23c228e0f3c017ce675f731ee126176c194161dc1645669c96dd42d762 +size 609389712 diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c841611df06526cf8daeb97ce22f41a97381cbc4 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8429fcd1dfc8ce0960d0ac9d0bd2a5babf71b4c54a8d2ef27fbc620dd70ca1d +size 43127132 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b851d37ca2de16967fd0032ca7034692aa58ea04 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b59348f315476cbba572dc1e7e6ffb0fdeefd5ec98118edf11497efa662c78 +size 14244 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..91a1e379ee7f89bed4ffaba66c0bd5e6ae7b1665 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2387e2d26114664cc6a4cf5bd742a874dbf876f2332a74cdad212bf210513c +size 1064 diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f871c13ae8e8d27343cc42bc704e1d643bad31 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,245 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.28, + "eval_steps": 50, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + }, + { + "epoch": 1.12, + "learning_rate": 7.537688442211055e-06, + "loss": 0.7107, + "step": 700 + }, + { + "epoch": 1.12, + "eval_loss": 0.7591288089752197, + "eval_runtime": 405.5072, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 700 + }, + { + "epoch": 1.2, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.7094, + "step": 750 + }, + { + "epoch": 1.2, + "eval_loss": 0.7564280033111572, + "eval_runtime": 405.5407, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 750 + }, + { + "epoch": 1.28, + "learning_rate": 5.025125628140704e-06, + "loss": 0.7203, + "step": 800 + }, + { + "epoch": 1.28, + "eval_loss": 0.7533515095710754, + "eval_runtime": 405.633, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 800 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.402192602857472e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-850/README.md b/checkpoint-850/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-850/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-850/adapter_config.json b/checkpoint-850/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-850/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-850/adapter_model.safetensors b/checkpoint-850/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d2814277a408e2797a12f34b1f3dbcc2eb1fbc7 --- /dev/null +++ b/checkpoint-850/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df9c7d3490f7c7fb568331608176f475522aad2cb69e017870d885b00997e99 +size 609389712 diff --git a/checkpoint-850/optimizer.pt b/checkpoint-850/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b029218270b1fc967da422784ca9b6478c029bb1 --- /dev/null +++ b/checkpoint-850/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f1669b56403e9d3fb99c22b1367ab89e6d91148d4f0374ce253d78987505f7 +size 43127132 diff --git a/checkpoint-850/rng_state.pth b/checkpoint-850/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc5592300a150fc3cea5b55d24038a5ca41bc1e2 --- /dev/null +++ b/checkpoint-850/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f0975fdc887bdd1eaa0520cd9276ec24f9d1b02fdf33f9ea02f159c8674322 +size 14244 diff --git a/checkpoint-850/scheduler.pt b/checkpoint-850/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..15211d9e9f90f890ca6da2f4b0cf1eb2a50685f4 --- /dev/null +++ b/checkpoint-850/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a80d05a9e60673a998a3e598998f59153efa52025094d8426638be58e9ddf148 +size 1064 diff --git a/checkpoint-850/trainer_state.json b/checkpoint-850/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c4fc76533b0175376586b86b3c9d4300fa4d7741 --- /dev/null +++ b/checkpoint-850/trainer_state.json @@ -0,0 +1,259 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3599999999999999, + "eval_steps": 50, + "global_step": 850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + }, + { + "epoch": 1.12, + "learning_rate": 7.537688442211055e-06, + "loss": 0.7107, + "step": 700 + }, + { + "epoch": 1.12, + "eval_loss": 0.7591288089752197, + "eval_runtime": 405.5072, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 700 + }, + { + "epoch": 1.2, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.7094, + "step": 750 + }, + { + "epoch": 1.2, + "eval_loss": 0.7564280033111572, + "eval_runtime": 405.5407, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 750 + }, + { + "epoch": 1.28, + "learning_rate": 5.025125628140704e-06, + "loss": 0.7203, + "step": 800 + }, + { + "epoch": 1.28, + "eval_loss": 0.7533515095710754, + "eval_runtime": 405.633, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 800 + }, + { + "epoch": 1.36, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.6954, + "step": 850 + }, + { + "epoch": 1.36, + "eval_loss": 0.7509064674377441, + "eval_runtime": 405.5277, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 850 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.489829640536064e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-850/training_args.bin b/checkpoint-850/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-850/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-900/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-900/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-900/adapter_model.safetensors b/checkpoint-900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..daa4fe2d4b7ed7a7e87db9ba705a4746bcd134dc --- /dev/null +++ b/checkpoint-900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c59b23c91d79d8cc24b726231898c3a7ccc654f10da35599d3eaa8d7a398bb27 +size 609389712 diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a661c2e6bc00a28a4e29de6037b32dd8a4fb58a --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d590fd06b2e921ecf6f453d0619046e14bc9bd0c4f7574273da2ff9eed3227 +size 43127132 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2dd255d932fa636b422ad445876bece80fb90c75 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec3c15f40ff3a07601039109f7b64dcc036ebcc7c0d83016d1d7e3899f518e8 +size 14244 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..25662b4210c7666ed19ecf1aa7bff50a794ef7d7 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22aa127cbf65f517d6cc6dcd8205454e61950c25d1a17d361d7e6a24d922b3a6 +size 1064 diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e5770173e337103bb2b0df826187995f91458754 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,273 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.44, + "eval_steps": 50, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + }, + { + "epoch": 1.12, + "learning_rate": 7.537688442211055e-06, + "loss": 0.7107, + "step": 700 + }, + { + "epoch": 1.12, + "eval_loss": 0.7591288089752197, + "eval_runtime": 405.5072, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 700 + }, + { + "epoch": 1.2, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.7094, + "step": 750 + }, + { + "epoch": 1.2, + "eval_loss": 0.7564280033111572, + "eval_runtime": 405.5407, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 750 + }, + { + "epoch": 1.28, + "learning_rate": 5.025125628140704e-06, + "loss": 0.7203, + "step": 800 + }, + { + "epoch": 1.28, + "eval_loss": 0.7533515095710754, + "eval_runtime": 405.633, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 800 + }, + { + "epoch": 1.36, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.6954, + "step": 850 + }, + { + "epoch": 1.36, + "eval_loss": 0.7509064674377441, + "eval_runtime": 405.5277, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 850 + }, + { + "epoch": 1.44, + "learning_rate": 2.512562814070352e-06, + "loss": 0.705, + "step": 900 + }, + { + "epoch": 1.44, + "eval_loss": 0.7496302723884583, + "eval_runtime": 405.4847, + "eval_samples_per_second": 12.331, + "eval_steps_per_second": 1.541, + "step": 900 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.577466678214656e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664 diff --git a/checkpoint-950/README.md b/checkpoint-950/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b --- /dev/null +++ b/checkpoint-950/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-950/adapter_config.json b/checkpoint-950/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b --- /dev/null +++ b/checkpoint-950/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "lm_head", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-950/adapter_model.safetensors b/checkpoint-950/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a48031974cef6823bf830123aad484b5091c218 --- /dev/null +++ b/checkpoint-950/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6593543e0e614d551fed5f1e5db08a5a32730b7b655ad4aea1130d7b4c99af9 +size 609389712 diff --git a/checkpoint-950/optimizer.pt b/checkpoint-950/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..83a6232dd411dbf25c3edf8eb02117240c7c25e6 --- /dev/null +++ b/checkpoint-950/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbad7b48bae391e4faaf5eb85c943aa76ddf38e1c9eeccaaefb586eeb2b0c5a +size 43127132 diff --git a/checkpoint-950/rng_state.pth b/checkpoint-950/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4cc32671cbc030b428e267ae8a9eb43c7c0655ab --- /dev/null +++ b/checkpoint-950/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb87a3145033ec2ec27062e9ead87bea31cbd15584f4c477dc0b7a1e2df6f9cf +size 14244 diff --git a/checkpoint-950/scheduler.pt b/checkpoint-950/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..40b35000bae4f8304ea298a207fc940a46776af5 --- /dev/null +++ b/checkpoint-950/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8702e895a6b0d1b6a0fc15d813777f3f0bcf1b2cf280a0cb1a49beb133097a3 +size 1064 diff --git a/checkpoint-950/trainer_state.json b/checkpoint-950/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..658894b4c5927640e3c7ecb7bc4d7a20a8c15899 --- /dev/null +++ b/checkpoint-950/trainer_state.json @@ -0,0 +1,287 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.52, + "eval_steps": 50, + "global_step": 950, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2.3869346733668342e-05, + "loss": 1.2058, + "step": 50 + }, + { + "epoch": 0.08, + "eval_loss": 0.9411209225654602, + "eval_runtime": 405.7571, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 50 + }, + { + "epoch": 0.16, + "learning_rate": 2.2613065326633167e-05, + "loss": 0.9065, + "step": 100 + }, + { + "epoch": 0.16, + "eval_loss": 0.8835591673851013, + "eval_runtime": 405.6798, + "eval_samples_per_second": 12.325, + "eval_steps_per_second": 1.541, + "step": 100 + }, + { + "epoch": 0.24, + "learning_rate": 2.135678391959799e-05, + "loss": 0.8612, + "step": 150 + }, + { + "epoch": 0.24, + "eval_loss": 0.8560027480125427, + "eval_runtime": 405.7296, + "eval_samples_per_second": 12.323, + "eval_steps_per_second": 1.54, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.8536, + "step": 200 + }, + { + "epoch": 0.32, + "eval_loss": 0.8348749876022339, + "eval_runtime": 405.5958, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 200 + }, + { + "epoch": 0.4, + "learning_rate": 1.884422110552764e-05, + "loss": 0.8195, + "step": 250 + }, + { + "epoch": 0.4, + "eval_loss": 0.8201740384101868, + "eval_runtime": 405.604, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 1.541, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.8272, + "step": 300 + }, + { + "epoch": 0.48, + "eval_loss": 0.807165801525116, + "eval_runtime": 405.5942, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 1.6331658291457288e-05, + "loss": 0.8132, + "step": 350 + }, + { + "epoch": 0.56, + "eval_loss": 0.7971442341804504, + "eval_runtime": 405.5887, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 1.541, + "step": 350 + }, + { + "epoch": 0.64, + "learning_rate": 1.507537688442211e-05, + "loss": 0.8124, + "step": 400 + }, + { + "epoch": 0.64, + "eval_loss": 0.7893713116645813, + "eval_runtime": 405.5093, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3819095477386935e-05, + "loss": 0.7823, + "step": 450 + }, + { + "epoch": 0.72, + "eval_loss": 0.7817508578300476, + "eval_runtime": 405.5623, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 450 + }, + { + "epoch": 0.8, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.772, + "step": 500 + }, + { + "epoch": 0.8, + "eval_loss": 0.7751882672309875, + "eval_runtime": 405.5365, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1306532663316583e-05, + "loss": 0.7668, + "step": 550 + }, + { + "epoch": 0.88, + "eval_loss": 0.7704442143440247, + "eval_runtime": 405.5551, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 550 + }, + { + "epoch": 0.96, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.7827, + "step": 600 + }, + { + "epoch": 0.96, + "eval_loss": 0.7652014493942261, + "eval_runtime": 405.6401, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 600 + }, + { + "epoch": 1.04, + "learning_rate": 8.793969849246232e-06, + "loss": 0.7248, + "step": 650 + }, + { + "epoch": 1.04, + "eval_loss": 0.7625133991241455, + "eval_runtime": 405.4991, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 650 + }, + { + "epoch": 1.12, + "learning_rate": 7.537688442211055e-06, + "loss": 0.7107, + "step": 700 + }, + { + "epoch": 1.12, + "eval_loss": 0.7591288089752197, + "eval_runtime": 405.5072, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 700 + }, + { + "epoch": 1.2, + "learning_rate": 6.2814070351758795e-06, + "loss": 0.7094, + "step": 750 + }, + { + "epoch": 1.2, + "eval_loss": 0.7564280033111572, + "eval_runtime": 405.5407, + "eval_samples_per_second": 12.329, + "eval_steps_per_second": 1.541, + "step": 750 + }, + { + "epoch": 1.28, + "learning_rate": 5.025125628140704e-06, + "loss": 0.7203, + "step": 800 + }, + { + "epoch": 1.28, + "eval_loss": 0.7533515095710754, + "eval_runtime": 405.633, + "eval_samples_per_second": 12.326, + "eval_steps_per_second": 1.541, + "step": 800 + }, + { + "epoch": 1.36, + "learning_rate": 3.7688442211055276e-06, + "loss": 0.6954, + "step": 850 + }, + { + "epoch": 1.36, + "eval_loss": 0.7509064674377441, + "eval_runtime": 405.5277, + "eval_samples_per_second": 12.33, + "eval_steps_per_second": 1.541, + "step": 850 + }, + { + "epoch": 1.44, + "learning_rate": 2.512562814070352e-06, + "loss": 0.705, + "step": 900 + }, + { + "epoch": 1.44, + "eval_loss": 0.7496302723884583, + "eval_runtime": 405.4847, + "eval_samples_per_second": 12.331, + "eval_steps_per_second": 1.541, + "step": 900 + }, + { + "epoch": 1.52, + "learning_rate": 1.256281407035176e-06, + "loss": 0.7173, + "step": 950 + }, + { + "epoch": 1.52, + "eval_loss": 0.7481338381767273, + "eval_runtime": 405.49, + "eval_samples_per_second": 12.331, + "eval_steps_per_second": 1.541, + "step": 950 + } + ], + "logging_steps": 50, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.665103715893248e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-950/training_args.bin b/checkpoint-950/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c --- /dev/null +++ b/checkpoint-950/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1 +size 4664