diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-100/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-100/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0d66fa05047349ecbcaccc3146d1b4a332158878
--- /dev/null
+++ b/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a99864a548560a398945f587eea1503839b2450902d2b417b71e2f8b0ad4db4
+size 609389712
diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..355ab6613770728eaa592065509bd2f4e6aa4a86
--- /dev/null
+++ b/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23be3dd51c5413dcd449a047cde520fbf4e02c09d3a658d183ad7c7bd08bc336
+size 43126684
diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3d5efeff0f311d36b93b974fbd9869bf3bcde4b8
--- /dev/null
+++ b/checkpoint-100/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70f5a59a14d46de7f0d0af48c5b2eeb8a470600eb778fece520dd91926fb5399
+size 14244
diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8bf030fcd0d8b1fbecf02fa180b2dd61513b0226
--- /dev/null
+++ b/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f46dc04db0a603406c597c113e229228b08858bb09b49bfebd3512f1a8f3306
+size 1064
diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2228bf1dcd653b56a6e91b0025804ef0d654604
--- /dev/null
+++ b/checkpoint-100/trainer_state.json
@@ -0,0 +1,49 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.16,
+  "eval_steps": 50,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.75274075357184e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-1000/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-1000/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-1000/adapter_model.safetensors b/checkpoint-1000/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..39f8e1d4ed391f56f25a1c1f0ff4b2e7b6fd5df9
--- /dev/null
+++ b/checkpoint-1000/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efa4fac3f59611bb4a9c6157eb62479ccf376c48de016c379070c61193239953
+size 609389712
diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a59766b99e81248b4a50b7ac68a465eccf788ae
--- /dev/null
+++ b/checkpoint-1000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d1fbbcbf3edb83fadf8d05c6f22141b0b4148f540d619d08274309f758afca3
+size 43127132
diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c20859deec23376e4fb18225631744395d5ed8cb
--- /dev/null
+++ b/checkpoint-1000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:083d1fab437bfe07458908c484b3955a52f3a1daf94849802b42e40af820f9d4
+size 14244
diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb079ac777435f69b38299460f32248e366aa526
--- /dev/null
+++ b/checkpoint-1000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4d6d865d6518a82dd54bb09f8f02628ebe31ca8be097a65ef5c8faff7622969
+size 1064
diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..86012ca7a62aa3f0198a3ed22ee2e63a3f0afe89
--- /dev/null
+++ b/checkpoint-1000/trainer_state.json
@@ -0,0 +1,301 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.6,
+  "eval_steps": 50,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 7.537688442211055e-06,
+      "loss": 0.7107,
+      "step": 700
+    },
+    {
+      "epoch": 1.12,
+      "eval_loss": 0.7591288089752197,
+      "eval_runtime": 405.5072,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 700
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 6.2814070351758795e-06,
+      "loss": 0.7094,
+      "step": 750
+    },
+    {
+      "epoch": 1.2,
+      "eval_loss": 0.7564280033111572,
+      "eval_runtime": 405.5407,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 750
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 5.025125628140704e-06,
+      "loss": 0.7203,
+      "step": 800
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.7533515095710754,
+      "eval_runtime": 405.633,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 800
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 3.7688442211055276e-06,
+      "loss": 0.6954,
+      "step": 850
+    },
+    {
+      "epoch": 1.36,
+      "eval_loss": 0.7509064674377441,
+      "eval_runtime": 405.5277,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 850
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 2.512562814070352e-06,
+      "loss": 0.705,
+      "step": 900
+    },
+    {
+      "epoch": 1.44,
+      "eval_loss": 0.7496302723884583,
+      "eval_runtime": 405.4847,
+      "eval_samples_per_second": 12.331,
+      "eval_steps_per_second": 1.541,
+      "step": 900
+    },
+    {
+      "epoch": 1.52,
+      "learning_rate": 1.256281407035176e-06,
+      "loss": 0.7173,
+      "step": 950
+    },
+    {
+      "epoch": 1.52,
+      "eval_loss": 0.7481338381767273,
+      "eval_runtime": 405.49,
+      "eval_samples_per_second": 12.331,
+      "eval_steps_per_second": 1.541,
+      "step": 950
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 0.0,
+      "loss": 0.696,
+      "step": 1000
+    },
+    {
+      "epoch": 1.6,
+      "eval_loss": 0.7476922869682312,
+      "eval_runtime": 405.5359,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.75274075357184e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-1000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-150/README.md b/checkpoint-150/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-150/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-150/adapter_config.json b/checkpoint-150/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-150/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-150/adapter_model.safetensors b/checkpoint-150/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c854fb0365101963a75d2cd8af0596308e2fc06f
--- /dev/null
+++ b/checkpoint-150/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:169738fc4d8af44ef693ef9435d0a01693036c537375ce01bbc4d9487d76ed51
+size 609389712
diff --git a/checkpoint-150/optimizer.pt b/checkpoint-150/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f0af6e25bcd513f972088c5bb34249a59f1b298
--- /dev/null
+++ b/checkpoint-150/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b020b24cdb28ab539944706ba7060d5f1c9eefd873eaee771a1c4d0870b775e8
+size 43126684
diff --git a/checkpoint-150/rng_state.pth b/checkpoint-150/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..76c345ff282fc4cadf88e511b2d8aa15e1a6cf6f
--- /dev/null
+++ b/checkpoint-150/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03c0053a7fcb9ca154abb520c2f6bce83e88fb3b95860f9cfec37a406f29da17
+size 14244
diff --git a/checkpoint-150/scheduler.pt b/checkpoint-150/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..49d58f979ce05f436a6917857895bda3fa2d6188
--- /dev/null
+++ b/checkpoint-150/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a0a7460dd8b31647fa0542d6e8cdd02c31293f0704d27ec57a49b4c476aa1c
+size 1064
diff --git a/checkpoint-150/trainer_state.json b/checkpoint-150/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f20f0eff8a60a8f6aa1f2c41d14c503f4c51ba4c
--- /dev/null
+++ b/checkpoint-150/trainer_state.json
@@ -0,0 +1,63 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.24,
+  "eval_steps": 50,
+  "global_step": 150,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 2.62911113035776e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-150/training_args.bin b/checkpoint-150/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-150/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-200/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-200/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..144123e84e9603d342d93ac41c9b1825c62ee24a
--- /dev/null
+++ b/checkpoint-200/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07a087caeab1b8377f1841f6a481e25190cc8562593d63ceaf5637ca62c06e70
+size 609389712
diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0af5ba44b904780c5a81ddaef201e30192c07378
--- /dev/null
+++ b/checkpoint-200/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9eed8632771b997aa743faf7e217a32800a025dfb62546b5be603731e3063aee
+size 43126684
diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ea60a493e18326030402447fea87e8cd7168ba82
--- /dev/null
+++ b/checkpoint-200/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7775948c4249583aa53279096045195942843fd474c8cd9ee4590b8a17e724e7
+size 14244
diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72d84b5e537a910294a56f642241016921e9f87a
--- /dev/null
+++ b/checkpoint-200/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f1b477d3bb44d9bf70633240462f7ac6e455d50eefacf5b2433c62e0cc9e80d
+size 1064
diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a217c78ba386a6b11836c7e075bda7490104a9e3
--- /dev/null
+++ b/checkpoint-200/trainer_state.json
@@ -0,0 +1,77 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.32,
+  "eval_steps": 50,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 3.50548150714368e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-200/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-250/README.md b/checkpoint-250/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-250/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-250/adapter_config.json b/checkpoint-250/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-250/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-250/adapter_model.safetensors b/checkpoint-250/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7a713f682009d35c346718bb01915206d5a5303f
--- /dev/null
+++ b/checkpoint-250/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4e6a4e2b7009665a2d08d8ab7f983982770bfee208f4a0a5c606abd0a4b63a1
+size 609389712
diff --git a/checkpoint-250/optimizer.pt b/checkpoint-250/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..859a3507e00bbeb2c452b2763a4bd0aec282ca10
--- /dev/null
+++ b/checkpoint-250/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb05fea125c57a8ef83000219df69bffb6979627a74fbfc41660f53b9e94728c
+size 43126684
diff --git a/checkpoint-250/rng_state.pth b/checkpoint-250/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..61e5e5e28ea7a75401b64e8a5b7e53e6c0eb7f1a
--- /dev/null
+++ b/checkpoint-250/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd5a4d1ace6cff644daeafee2ed3c659c8ad6d7329984fac4814598148287f45
+size 14244
diff --git a/checkpoint-250/scheduler.pt b/checkpoint-250/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..95fe513e9fe8f72dc7b9ffa5f782cbf0f7422fc6
--- /dev/null
+++ b/checkpoint-250/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09471b95cb193b326e2ae9278591cdf878ced8cb70ac85a4cb6b83f68d62fc51
+size 1064
diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cb6d1d60d4d6b5bb17f68298255fe14aa80704a9
--- /dev/null
+++ b/checkpoint-250/trainer_state.json
@@ -0,0 +1,91 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.4,
+  "eval_steps": 50,
+  "global_step": 250,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 4.3818518839296e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-250/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-300/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-300/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f43349fe27a9a335ec61003332adf7c1d3848d53
--- /dev/null
+++ b/checkpoint-300/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba36c6267bc7ed00bf4e47e3408e4caf16672d746808f3d2bca56b54245a1745
+size 609389712
diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..031bd0a3239885a9de5d4fad11423d7dfdb55051
--- /dev/null
+++ b/checkpoint-300/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62163ab77d93bb009c21e46694dcc45375176a177ef306aa01c27c512a746a18
+size 43127132
diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f1c184a0e2abcfffa55036562e911ad7dc437592
--- /dev/null
+++ b/checkpoint-300/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ae384141208d14371e9167d8c1fa551691aeae58cda37eb5ecefcd1d2d5aaba
+size 14244
diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bdb954848e9822cd7c1e29d3b40bf45fa6b86357
--- /dev/null
+++ b/checkpoint-300/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25d0ec4220fe093365424ee63188b9cc5436640be7c2cb84202c87d53f32aeaf
+size 1064
diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..76105b946aaf211e3266b5f23a8cc685fd004353
--- /dev/null
+++ b/checkpoint-300/trainer_state.json
@@ -0,0 +1,105 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.48,
+  "eval_steps": 50,
+  "global_step": 300,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 5.25822226071552e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-300/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-350/README.md b/checkpoint-350/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-350/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-350/adapter_config.json b/checkpoint-350/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-350/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-350/adapter_model.safetensors b/checkpoint-350/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..224ab7b833be9b5f1ba2f93ada92caf7ff0c6885
--- /dev/null
+++ b/checkpoint-350/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee0751fee811a1f1b7684b6750b74ac4695ba47387e1e94b5070a1267974748e
+size 609389712
diff --git a/checkpoint-350/optimizer.pt b/checkpoint-350/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bbb2343401bb96e7520cf26f4f05b53646610f38
--- /dev/null
+++ b/checkpoint-350/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d37e77f608a4aeec57dc79233b0011fb1037e786480b496106efa62cbd0c1e5
+size 43127132
diff --git a/checkpoint-350/rng_state.pth b/checkpoint-350/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..360578bab774d6ad9c0ac4023168da3b3370e190
--- /dev/null
+++ b/checkpoint-350/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2007e1ce0f14325986972ec49c69379e221e4e3a98af0e1add7f1d4189281592
+size 14244
diff --git a/checkpoint-350/scheduler.pt b/checkpoint-350/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8ae24c11986cf24c39cadca76e59270bbeb6f76
--- /dev/null
+++ b/checkpoint-350/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb9f9d20eafb507e007e0db2ee7fa0cc38244f7a2c2b2a604378cdb9e12dadb4
+size 1064
diff --git a/checkpoint-350/trainer_state.json b/checkpoint-350/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f283a3038393fd55c2e04a7e3014b3801659f9a4
--- /dev/null
+++ b/checkpoint-350/trainer_state.json
@@ -0,0 +1,119 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.56,
+  "eval_steps": 50,
+  "global_step": 350,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 6.13459263750144e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-350/training_args.bin b/checkpoint-350/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-350/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-400/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-400/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..60cc26f1d01ac7523b2ac34d94ea120834d6342c
--- /dev/null
+++ b/checkpoint-400/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b8edcd9cfba22b681ea8a05f8e8a9f9edf5d0e9837842e001ba193f35a879fb
+size 609389712
diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb3bb7e82d626b774797699536b56857670f1a33
--- /dev/null
+++ b/checkpoint-400/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e883d5497327a1a9ddfd57608ad1e424812db799024cadb84e96a415b7cfe0f
+size 43127132
diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..25124c5e8e7502970e42c61b28b7a645d92c79b0
--- /dev/null
+++ b/checkpoint-400/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f421bc95467030268a15e26d8df95bd839c0b07f5f09a5901c9d8ccab1966a
+size 14244
diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52939dd56953607040022110b17355a120077961
--- /dev/null
+++ b/checkpoint-400/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7181c684d8bc6db848c39932fb3b82e6b80240c695625aac4584086e7663877b
+size 1064
diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d44062c8d12b274430a9f877ce47042bf59e452
--- /dev/null
+++ b/checkpoint-400/trainer_state.json
@@ -0,0 +1,133 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.64,
+  "eval_steps": 50,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 7.01096301428736e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-400/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-450/README.md b/checkpoint-450/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-450/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-450/adapter_config.json b/checkpoint-450/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-450/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-450/adapter_model.safetensors b/checkpoint-450/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b5a0607e29a2cb2947c8eb69e8392a17af90edd8
--- /dev/null
+++ b/checkpoint-450/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3577b434f638ee00fbbd10ed2a8d2560cb533f66d89c30ae27ea7b2df99edfec
+size 609389712
diff --git a/checkpoint-450/optimizer.pt b/checkpoint-450/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..296da5b5ab41e56fa42dcfacdcbd30885b1f41cc
--- /dev/null
+++ b/checkpoint-450/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3f84101a72c186c5efa1a36df67445e62ab819c7cbeee03e39522c0fe93bff6
+size 43127132
diff --git a/checkpoint-450/rng_state.pth b/checkpoint-450/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0c8be50e0d17faa9ae4ae52d3597e729f33c17d3
--- /dev/null
+++ b/checkpoint-450/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d6127fdfcbb17a924f253b33b28cad75bb64bc35f5cbe475607aeb233685df0
+size 14244
diff --git a/checkpoint-450/scheduler.pt b/checkpoint-450/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dbc3bf76d5260c2c30bfc67361e92a8589cadbe7
--- /dev/null
+++ b/checkpoint-450/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1067d941ad3656559599b88573da32c1132c71afd7464f5844b679059050ac89
+size 1064
diff --git a/checkpoint-450/trainer_state.json b/checkpoint-450/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..97eec18602d0cc0b11b138e726f97240c1a1ca75
--- /dev/null
+++ b/checkpoint-450/trainer_state.json
@@ -0,0 +1,147 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.72,
+  "eval_steps": 50,
+  "global_step": 450,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 7.88733339107328e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-450/training_args.bin b/checkpoint-450/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-450/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-50/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-50/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..612eeeb29d30c8c812fbf93e72e61c94225a303e
--- /dev/null
+++ b/checkpoint-50/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e46c84b3ea9990f3179703c46c784b514b30304359e5c05e72cb16964ba220c
+size 609389712
diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc2153cd10360c7ecb819bc2a8d7c2589129f7dd
--- /dev/null
+++ b/checkpoint-50/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb96e382c84e28b9aea862fb4e6ff99da42b6fbf5f8d91ebd05d874078e99fc5
+size 43126684
diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a2cec1570de9c6177615af5fc2a3965c31ea6ed8
--- /dev/null
+++ b/checkpoint-50/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0553051d10408eee59c3840a1a9bfb3b86f5cd48f5ff749de95ca6ef87ba0668
+size 14244
diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..29ddae8f47fa44816202135e154475d713372793
--- /dev/null
+++ b/checkpoint-50/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b44bde04014e0643297b0f127d92402c51731c3d038fc538c9a302aac901fb6
+size 1064
diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..89fc4c1158688523cf7335692b0191aa08cd242f
--- /dev/null
+++ b/checkpoint-50/trainer_state.json
@@ -0,0 +1,35 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.08,
+  "eval_steps": 50,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 8763703767859200.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-50/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-500/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-500/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ba3bf5dcaa148fed89b31b7171b92f2f05032862
--- /dev/null
+++ b/checkpoint-500/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b90f82ca3d7783ecb42e61455e3721817b0e3ebe536d80db80b75ca3d1409297
+size 609389712
diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ad53f80026600867d06e6f0085dd616358ba6de
--- /dev/null
+++ b/checkpoint-500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26aed64c6bcd6cff9a393a2a84ac6d3739eb22a149805090fbd8a23fb7445ced
+size 43127132
diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0fa4cfa6f06617d3a4f61e85a19cf862e8a74f3b
--- /dev/null
+++ b/checkpoint-500/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c81ec963b08ff90f15851ff1fe201dacb6fdf773d811710421103d3c2e0c052d
+size 14244
diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..89463731d1b5f59fe378f306cca2f74b47a04dc3
--- /dev/null
+++ b/checkpoint-500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08914fd90e4abb043440f2510884e4614f7e83091c72cc4c2d2f0ff220357a6e
+size 1064
diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..125ff391c073ef9cdc0e4992df7a84c2750cfecd
--- /dev/null
+++ b/checkpoint-500/trainer_state.json
@@ -0,0 +1,161 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.8,
+  "eval_steps": 50,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 8.7637037678592e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-550/README.md b/checkpoint-550/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-550/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-550/adapter_config.json b/checkpoint-550/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-550/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-550/adapter_model.safetensors b/checkpoint-550/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..027a9e8a545b1ad0e185b84152e040a6f81c4480
--- /dev/null
+++ b/checkpoint-550/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e647c2b3d4c6928f20b36b99db338b2d6fcc10920fa16948fcd4ab7e17616211
+size 609389712
diff --git a/checkpoint-550/optimizer.pt b/checkpoint-550/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf0300066c9ca40037d6eb1c365b3c0d3d16698d
--- /dev/null
+++ b/checkpoint-550/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb6edeffebaba5153ea4aeea3bebd1b998c3393ccfc480ee3b66f1563095c23c
+size 43127132
diff --git a/checkpoint-550/rng_state.pth b/checkpoint-550/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..adc20e0c648ab717f3bac7c444900932be927e09
--- /dev/null
+++ b/checkpoint-550/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:646461297fb11e289425e945df4cbe38623366f09a858475fabba9e599f96747
+size 14244
diff --git a/checkpoint-550/scheduler.pt b/checkpoint-550/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a651c534e0d965f44cae8d5bf2dd3c658e41d3c8
--- /dev/null
+++ b/checkpoint-550/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6090bcf779d364236641cfe14c77a35431b5bfafcd25a884b8a5b97a4fc6fe12
+size 1064
diff --git a/checkpoint-550/trainer_state.json b/checkpoint-550/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0bc0e455d726a4857ef135945219261fa31ccd25
--- /dev/null
+++ b/checkpoint-550/trainer_state.json
@@ -0,0 +1,175 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.88,
+  "eval_steps": 50,
+  "global_step": 550,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 9.64007414464512e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-550/training_args.bin b/checkpoint-550/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-550/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-600/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-600/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-600/adapter_model.safetensors b/checkpoint-600/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..02f4498b11d102678f03a698e5879450e113d298
--- /dev/null
+++ b/checkpoint-600/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ef59a13d55da988cc77ed5e9485e2ad6e110a3f1c894d236cfa3fdc8b06650f
+size 609389712
diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72b24c7b8d3d0af6961a705bd02a375147242267
--- /dev/null
+++ b/checkpoint-600/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66626662e7d09282c73c427db047b65bed71025ed2b00c2183e2391f05b9b257
+size 43127132
diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a98bceaefb1e01880d4c8e6f65bd5111b194f0f8
--- /dev/null
+++ b/checkpoint-600/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87b43c01ca858f576b733efb66d155de633fb4f0175c92dd5c886c317999160b
+size 14244
diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5dfe1f9e4f86adab5523d67985799f485c78132
--- /dev/null
+++ b/checkpoint-600/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9ce7ff6d6acf4945e377262122c7e37fa2f686114026e8b558ebf0faa33a8fb
+size 1064
diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e67ea81491eda3a8b578c2dedd1ef898226d816c
--- /dev/null
+++ b/checkpoint-600/trainer_state.json
@@ -0,0 +1,189 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.96,
+  "eval_steps": 50,
+  "global_step": 600,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.051644452143104e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-600/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-650/README.md b/checkpoint-650/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-650/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-650/adapter_config.json b/checkpoint-650/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-650/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-650/adapter_model.safetensors b/checkpoint-650/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..acbd1b79683ff95085d764ee7b8f3126bec18dea
--- /dev/null
+++ b/checkpoint-650/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:270f53cf2783d912ae945b905f4ef9f25839ac9ff7a7e812459d8dba51d4c5f9
+size 609389712
diff --git a/checkpoint-650/optimizer.pt b/checkpoint-650/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..31e4532516757258feb53e6f8d6cca978a5b420f
--- /dev/null
+++ b/checkpoint-650/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d994da21e0cc750dd4b71c474688d98438efc77f86cc4319a91c9a7091201ace
+size 43127132
diff --git a/checkpoint-650/rng_state.pth b/checkpoint-650/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e7700fefb603c5a51a2220cb818bd77eee0ad374
--- /dev/null
+++ b/checkpoint-650/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dbd2aff071536c93dcca8875506dc2e45bce5d8e83d9b97985d299aff00969b
+size 14244
diff --git a/checkpoint-650/scheduler.pt b/checkpoint-650/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cdd260fbee15f41b499155a2c0891ed4c0aaafe4
--- /dev/null
+++ b/checkpoint-650/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64cb7a43229696b470cfa762fb458086df2a2667853db1b1fbc8be13f2ebd755
+size 1064
diff --git a/checkpoint-650/trainer_state.json b/checkpoint-650/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4bbc966e35672aef9cdce8f77cec631ed7db3872
--- /dev/null
+++ b/checkpoint-650/trainer_state.json
@@ -0,0 +1,203 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.04,
+  "eval_steps": 50,
+  "global_step": 650,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.139281489821696e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-650/training_args.bin b/checkpoint-650/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-650/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-700/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-700/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-700/adapter_model.safetensors b/checkpoint-700/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4d5e054d415b2e956f15795517e17bcbb0de803
--- /dev/null
+++ b/checkpoint-700/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55cb3a725e1ea420e1d1b4d154cb33285c146ec706f9fc14c6b6b0665097e1b7
+size 609389712
diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fabd61b3838daa200cbf2ba7e66b8a68cc6c4712
--- /dev/null
+++ b/checkpoint-700/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d10aff25f8a52cdaaff4457770e804da285ab38c3a5518f2de8a3c058d86f202
+size 43127132
diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3e0702d9774c9b5a0cb703f7bf484485a1ac76c4
--- /dev/null
+++ b/checkpoint-700/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b96516f7624d7067fc3c86ead5767e446098a97396af88f7ef3d5a917eebaea2
+size 14244
diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02cbb3376c0ab4a41059d1471bfcd017761e9cbc
--- /dev/null
+++ b/checkpoint-700/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8641b104236537da719992bf5637d9a29f245da450fd6a0c1ade6f59294e3067
+size 1064
diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..59a716e9c4cc5b84ab5b452b10a8ad22aef34ced
--- /dev/null
+++ b/checkpoint-700/trainer_state.json
@@ -0,0 +1,217 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.12,
+  "eval_steps": 50,
+  "global_step": 700,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 7.537688442211055e-06,
+      "loss": 0.7107,
+      "step": 700
+    },
+    {
+      "epoch": 1.12,
+      "eval_loss": 0.7591288089752197,
+      "eval_runtime": 405.5072,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 700
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.226918527500288e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-700/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-750/README.md b/checkpoint-750/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-750/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-750/adapter_config.json b/checkpoint-750/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-750/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-750/adapter_model.safetensors b/checkpoint-750/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4defde1ee89676c56d7f103f57d6e33db30c42aa
--- /dev/null
+++ b/checkpoint-750/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ff98d91a6b52d806ae0367254cf0e370f79343fbdf74303c757095882a2bde0
+size 609389712
diff --git a/checkpoint-750/optimizer.pt b/checkpoint-750/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8ec48c059cea261aa86594d7434ed22191be03a
--- /dev/null
+++ b/checkpoint-750/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51f553ba52866b6f70da0e4f0e32ab0cd785c99b3e54e44f890eb891e93cc98d
+size 43127132
diff --git a/checkpoint-750/rng_state.pth b/checkpoint-750/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..02b45e920dd950af4754a64cf5d58f0adc902a26
--- /dev/null
+++ b/checkpoint-750/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2c335fd4d5447347a72b25eed2111ba23cf16dcf4c718cc1f4b4dcff2fb739a
+size 14244
diff --git a/checkpoint-750/scheduler.pt b/checkpoint-750/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..34bee97f97b51a6d92ac9228d40e56002fad4dcc
--- /dev/null
+++ b/checkpoint-750/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01ff28eb4ca89c9920d5e03ccaa7e35ffa8cc6579bf5ebfee1c7132e0d1e6636
+size 1064
diff --git a/checkpoint-750/trainer_state.json b/checkpoint-750/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9d7deb4df9592843dbb4ad94f118787037abc60
--- /dev/null
+++ b/checkpoint-750/trainer_state.json
@@ -0,0 +1,231 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.2,
+  "eval_steps": 50,
+  "global_step": 750,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 7.537688442211055e-06,
+      "loss": 0.7107,
+      "step": 700
+    },
+    {
+      "epoch": 1.12,
+      "eval_loss": 0.7591288089752197,
+      "eval_runtime": 405.5072,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 700
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 6.2814070351758795e-06,
+      "loss": 0.7094,
+      "step": 750
+    },
+    {
+      "epoch": 1.2,
+      "eval_loss": 0.7564280033111572,
+      "eval_runtime": 405.5407,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 750
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.31455556517888e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-750/training_args.bin b/checkpoint-750/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-750/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-800/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-800/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-800/adapter_model.safetensors b/checkpoint-800/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dc12a4799ed65d03e09a7f2fd3c24f423da8cb5a
--- /dev/null
+++ b/checkpoint-800/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53664b23c228e0f3c017ce675f731ee126176c194161dc1645669c96dd42d762
+size 609389712
diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c841611df06526cf8daeb97ce22f41a97381cbc4
--- /dev/null
+++ b/checkpoint-800/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8429fcd1dfc8ce0960d0ac9d0bd2a5babf71b4c54a8d2ef27fbc620dd70ca1d
+size 43127132
diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b851d37ca2de16967fd0032ca7034692aa58ea04
--- /dev/null
+++ b/checkpoint-800/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36b59348f315476cbba572dc1e7e6ffb0fdeefd5ec98118edf11497efa662c78
+size 14244
diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..91a1e379ee7f89bed4ffaba66c0bd5e6ae7b1665
--- /dev/null
+++ b/checkpoint-800/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f2387e2d26114664cc6a4cf5bd742a874dbf876f2332a74cdad212bf210513c
+size 1064
diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c9f871c13ae8e8d27343cc42bc704e1d643bad31
--- /dev/null
+++ b/checkpoint-800/trainer_state.json
@@ -0,0 +1,245 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.28,
+  "eval_steps": 50,
+  "global_step": 800,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 7.537688442211055e-06,
+      "loss": 0.7107,
+      "step": 700
+    },
+    {
+      "epoch": 1.12,
+      "eval_loss": 0.7591288089752197,
+      "eval_runtime": 405.5072,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 700
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 6.2814070351758795e-06,
+      "loss": 0.7094,
+      "step": 750
+    },
+    {
+      "epoch": 1.2,
+      "eval_loss": 0.7564280033111572,
+      "eval_runtime": 405.5407,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 750
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 5.025125628140704e-06,
+      "loss": 0.7203,
+      "step": 800
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.7533515095710754,
+      "eval_runtime": 405.633,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 800
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.402192602857472e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-800/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-850/README.md b/checkpoint-850/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-850/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-850/adapter_config.json b/checkpoint-850/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-850/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-850/adapter_model.safetensors b/checkpoint-850/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d2814277a408e2797a12f34b1f3dbcc2eb1fbc7
--- /dev/null
+++ b/checkpoint-850/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3df9c7d3490f7c7fb568331608176f475522aad2cb69e017870d885b00997e99
+size 609389712
diff --git a/checkpoint-850/optimizer.pt b/checkpoint-850/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b029218270b1fc967da422784ca9b6478c029bb1
--- /dev/null
+++ b/checkpoint-850/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97f1669b56403e9d3fb99c22b1367ab89e6d91148d4f0374ce253d78987505f7
+size 43127132
diff --git a/checkpoint-850/rng_state.pth b/checkpoint-850/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fc5592300a150fc3cea5b55d24038a5ca41bc1e2
--- /dev/null
+++ b/checkpoint-850/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76f0975fdc887bdd1eaa0520cd9276ec24f9d1b02fdf33f9ea02f159c8674322
+size 14244
diff --git a/checkpoint-850/scheduler.pt b/checkpoint-850/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15211d9e9f90f890ca6da2f4b0cf1eb2a50685f4
--- /dev/null
+++ b/checkpoint-850/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a80d05a9e60673a998a3e598998f59153efa52025094d8426638be58e9ddf148
+size 1064
diff --git a/checkpoint-850/trainer_state.json b/checkpoint-850/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c4fc76533b0175376586b86b3c9d4300fa4d7741
--- /dev/null
+++ b/checkpoint-850/trainer_state.json
@@ -0,0 +1,259 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.3599999999999999,
+  "eval_steps": 50,
+  "global_step": 850,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 7.537688442211055e-06,
+      "loss": 0.7107,
+      "step": 700
+    },
+    {
+      "epoch": 1.12,
+      "eval_loss": 0.7591288089752197,
+      "eval_runtime": 405.5072,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 700
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 6.2814070351758795e-06,
+      "loss": 0.7094,
+      "step": 750
+    },
+    {
+      "epoch": 1.2,
+      "eval_loss": 0.7564280033111572,
+      "eval_runtime": 405.5407,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 750
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 5.025125628140704e-06,
+      "loss": 0.7203,
+      "step": 800
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.7533515095710754,
+      "eval_runtime": 405.633,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 800
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 3.7688442211055276e-06,
+      "loss": 0.6954,
+      "step": 850
+    },
+    {
+      "epoch": 1.36,
+      "eval_loss": 0.7509064674377441,
+      "eval_runtime": 405.5277,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 850
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.489829640536064e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-850/training_args.bin b/checkpoint-850/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-850/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-900/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-900/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-900/adapter_model.safetensors b/checkpoint-900/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..daa4fe2d4b7ed7a7e87db9ba705a4746bcd134dc
--- /dev/null
+++ b/checkpoint-900/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c59b23c91d79d8cc24b726231898c3a7ccc654f10da35599d3eaa8d7a398bb27
+size 609389712
diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a661c2e6bc00a28a4e29de6037b32dd8a4fb58a
--- /dev/null
+++ b/checkpoint-900/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28d590fd06b2e921ecf6f453d0619046e14bc9bd0c4f7574273da2ff9eed3227
+size 43127132
diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2dd255d932fa636b422ad445876bece80fb90c75
--- /dev/null
+++ b/checkpoint-900/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bec3c15f40ff3a07601039109f7b64dcc036ebcc7c0d83016d1d7e3899f518e8
+size 14244
diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25662b4210c7666ed19ecf1aa7bff50a794ef7d7
--- /dev/null
+++ b/checkpoint-900/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22aa127cbf65f517d6cc6dcd8205454e61950c25d1a17d361d7e6a24d922b3a6
+size 1064
diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e5770173e337103bb2b0df826187995f91458754
--- /dev/null
+++ b/checkpoint-900/trainer_state.json
@@ -0,0 +1,273 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.44,
+  "eval_steps": 50,
+  "global_step": 900,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 7.537688442211055e-06,
+      "loss": 0.7107,
+      "step": 700
+    },
+    {
+      "epoch": 1.12,
+      "eval_loss": 0.7591288089752197,
+      "eval_runtime": 405.5072,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 700
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 6.2814070351758795e-06,
+      "loss": 0.7094,
+      "step": 750
+    },
+    {
+      "epoch": 1.2,
+      "eval_loss": 0.7564280033111572,
+      "eval_runtime": 405.5407,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 750
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 5.025125628140704e-06,
+      "loss": 0.7203,
+      "step": 800
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.7533515095710754,
+      "eval_runtime": 405.633,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 800
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 3.7688442211055276e-06,
+      "loss": 0.6954,
+      "step": 850
+    },
+    {
+      "epoch": 1.36,
+      "eval_loss": 0.7509064674377441,
+      "eval_runtime": 405.5277,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 850
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 2.512562814070352e-06,
+      "loss": 0.705,
+      "step": 900
+    },
+    {
+      "epoch": 1.44,
+      "eval_loss": 0.7496302723884583,
+      "eval_runtime": 405.4847,
+      "eval_samples_per_second": 12.331,
+      "eval_steps_per_second": 1.541,
+      "step": 900
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.577466678214656e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-900/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664
diff --git a/checkpoint-950/README.md b/checkpoint-950/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbc649d4e14f63f5dab7f3353b89128a180ff20b
--- /dev/null
+++ b/checkpoint-950/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.7.2.dev0
\ No newline at end of file
diff --git a/checkpoint-950/adapter_config.json b/checkpoint-950/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a12e4e3123e2df3580c38c97e60db0f668049a6b
--- /dev/null
+++ b/checkpoint-950/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "lm_head",
+    "gate_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-950/adapter_model.safetensors b/checkpoint-950/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a48031974cef6823bf830123aad484b5091c218
--- /dev/null
+++ b/checkpoint-950/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6593543e0e614d551fed5f1e5db08a5a32730b7b655ad4aea1130d7b4c99af9
+size 609389712
diff --git a/checkpoint-950/optimizer.pt b/checkpoint-950/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83a6232dd411dbf25c3edf8eb02117240c7c25e6
--- /dev/null
+++ b/checkpoint-950/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abbad7b48bae391e4faaf5eb85c943aa76ddf38e1c9eeccaaefb586eeb2b0c5a
+size 43127132
diff --git a/checkpoint-950/rng_state.pth b/checkpoint-950/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4cc32671cbc030b428e267ae8a9eb43c7c0655ab
--- /dev/null
+++ b/checkpoint-950/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb87a3145033ec2ec27062e9ead87bea31cbd15584f4c477dc0b7a1e2df6f9cf
+size 14244
diff --git a/checkpoint-950/scheduler.pt b/checkpoint-950/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40b35000bae4f8304ea298a207fc940a46776af5
--- /dev/null
+++ b/checkpoint-950/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8702e895a6b0d1b6a0fc15d813777f3f0bcf1b2cf280a0cb1a49beb133097a3
+size 1064
diff --git a/checkpoint-950/trainer_state.json b/checkpoint-950/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..658894b4c5927640e3c7ecb7bc4d7a20a8c15899
--- /dev/null
+++ b/checkpoint-950/trainer_state.json
@@ -0,0 +1,287 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.52,
+  "eval_steps": 50,
+  "global_step": 950,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 2.3869346733668342e-05,
+      "loss": 1.2058,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.9411209225654602,
+      "eval_runtime": 405.7571,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 50
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 2.2613065326633167e-05,
+      "loss": 0.9065,
+      "step": 100
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.8835591673851013,
+      "eval_runtime": 405.6798,
+      "eval_samples_per_second": 12.325,
+      "eval_steps_per_second": 1.541,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 2.135678391959799e-05,
+      "loss": 0.8612,
+      "step": 150
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 0.8560027480125427,
+      "eval_runtime": 405.7296,
+      "eval_samples_per_second": 12.323,
+      "eval_steps_per_second": 1.54,
+      "step": 150
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 2.0100502512562815e-05,
+      "loss": 0.8536,
+      "step": 200
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.8348749876022339,
+      "eval_runtime": 405.5958,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 200
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.884422110552764e-05,
+      "loss": 0.8195,
+      "step": 250
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.8201740384101868,
+      "eval_runtime": 405.604,
+      "eval_samples_per_second": 12.327,
+      "eval_steps_per_second": 1.541,
+      "step": 250
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 1.7587939698492464e-05,
+      "loss": 0.8272,
+      "step": 300
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 0.807165801525116,
+      "eval_runtime": 405.5942,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 300
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 1.6331658291457288e-05,
+      "loss": 0.8132,
+      "step": 350
+    },
+    {
+      "epoch": 0.56,
+      "eval_loss": 0.7971442341804504,
+      "eval_runtime": 405.5887,
+      "eval_samples_per_second": 12.328,
+      "eval_steps_per_second": 1.541,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.507537688442211e-05,
+      "loss": 0.8124,
+      "step": 400
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 0.7893713116645813,
+      "eval_runtime": 405.5093,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.3819095477386935e-05,
+      "loss": 0.7823,
+      "step": 450
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 0.7817508578300476,
+      "eval_runtime": 405.5623,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 450
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.2562814070351759e-05,
+      "loss": 0.772,
+      "step": 500
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.7751882672309875,
+      "eval_runtime": 405.5365,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.1306532663316583e-05,
+      "loss": 0.7668,
+      "step": 550
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.7704442143440247,
+      "eval_runtime": 405.5551,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 550
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.0050251256281408e-05,
+      "loss": 0.7827,
+      "step": 600
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 0.7652014493942261,
+      "eval_runtime": 405.6401,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 600
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.793969849246232e-06,
+      "loss": 0.7248,
+      "step": 650
+    },
+    {
+      "epoch": 1.04,
+      "eval_loss": 0.7625133991241455,
+      "eval_runtime": 405.4991,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 650
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 7.537688442211055e-06,
+      "loss": 0.7107,
+      "step": 700
+    },
+    {
+      "epoch": 1.12,
+      "eval_loss": 0.7591288089752197,
+      "eval_runtime": 405.5072,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 700
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 6.2814070351758795e-06,
+      "loss": 0.7094,
+      "step": 750
+    },
+    {
+      "epoch": 1.2,
+      "eval_loss": 0.7564280033111572,
+      "eval_runtime": 405.5407,
+      "eval_samples_per_second": 12.329,
+      "eval_steps_per_second": 1.541,
+      "step": 750
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 5.025125628140704e-06,
+      "loss": 0.7203,
+      "step": 800
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.7533515095710754,
+      "eval_runtime": 405.633,
+      "eval_samples_per_second": 12.326,
+      "eval_steps_per_second": 1.541,
+      "step": 800
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 3.7688442211055276e-06,
+      "loss": 0.6954,
+      "step": 850
+    },
+    {
+      "epoch": 1.36,
+      "eval_loss": 0.7509064674377441,
+      "eval_runtime": 405.5277,
+      "eval_samples_per_second": 12.33,
+      "eval_steps_per_second": 1.541,
+      "step": 850
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 2.512562814070352e-06,
+      "loss": 0.705,
+      "step": 900
+    },
+    {
+      "epoch": 1.44,
+      "eval_loss": 0.7496302723884583,
+      "eval_runtime": 405.4847,
+      "eval_samples_per_second": 12.331,
+      "eval_steps_per_second": 1.541,
+      "step": 900
+    },
+    {
+      "epoch": 1.52,
+      "learning_rate": 1.256281407035176e-06,
+      "loss": 0.7173,
+      "step": 950
+    },
+    {
+      "epoch": 1.52,
+      "eval_loss": 0.7481338381767273,
+      "eval_runtime": 405.49,
+      "eval_samples_per_second": 12.331,
+      "eval_steps_per_second": 1.541,
+      "step": 950
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 50,
+  "total_flos": 1.665103715893248e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-950/training_args.bin b/checkpoint-950/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3d129d8f5922706bd31dcb3040a65c014acc45c
--- /dev/null
+++ b/checkpoint-950/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e678be78bce095b3053fb21c63312965ff29a5b1ad6d6fbd92e73680c32a6f1
+size 4664