diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..cc1259d472ba4bdf769bb49f1d2cb573809a800c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-1086/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..19dec661f75227aa6a7568fd3c4c31d5ca436efe
--- /dev/null
+++ b/README.md
@@ -0,0 +1,62 @@
+---
+library_name: peft
+license: other
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+model-index:
+- name: DeepSeek-R1-Distill-Qwen-32B
+  results: []
+---
+
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+
+# DeepSeek-R1-Distill-Qwen-32B
+
+This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) on the alpaca_thinking dataset.
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 32
+- total_eval_batch_size: 32
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 3.0
+
+### Training results
+
+
+
+### Framework versions
+
+- PEFT 0.12.0
+- Transformers 4.49.0
+- Pytorch 2.5.1+cu124
+- Datasets 3.2.0
+- Tokenizers 0.21.0
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a8646ffc043b2e177162e505e4ddf95a34ee35d
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "pissa_niter_16",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..476a84d19c4420d1d508ed6ebb6e7ce24ac1217b
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d2b60858f8196e9c42a81145ebed482335b25f140656f334689c5cd28feb329
+size 268555264
diff --git a/all_results.json b/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6f40b85634398802ef051edb9b192fe596a15ba6
--- /dev/null
+++ b/all_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 2.9937888198757765,
+    "total_flos": 6.17252944434797e+18,
+    "train_loss": 0.3891050570797086,
+    "train_runtime": 6764.7829,
+    "train_samples_per_second": 5.14,
+    "train_steps_per_second": 0.161
+}
\ No newline at end of file
diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d823a0d9f612b6fd128ea17ebbddb8df140520ef
--- /dev/null
+++ b/checkpoint-1000/README.md
@@ -0,0 +1,202 @@
+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..652c79f18ba64f20d9cd4fc1eff31c4b47afb1c6
--- /dev/null
+++ b/checkpoint-1000/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "pissa_niter_16",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-1000/adapter_model.safetensors b/checkpoint-1000/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc3d36841ae3d8d34206aa710a640da75f6b0dfa
--- /dev/null
+++ b/checkpoint-1000/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e2f0472d2f83d4810a9f24f4b41cb5e2509fd4645e31a85981bfa3ddc9ac9e6
+size 268555264
diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..23ddc2178ea6bde9bccfe4ab185f2bc35c3c4494
--- /dev/null
+++ b/checkpoint-1000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:858a7d2c7117fc10a925bae6ddadff03e49c6742e566c172c0005a2591dd963b
+size 537626770
diff --git a/checkpoint-1000/rng_state_0.pth b/checkpoint-1000/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a7c482c30381cd512ccc35fe322d8a34fbf5207
--- /dev/null
+++ b/checkpoint-1000/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:308f94f9a5c24e1bad5c393d56ae7af7782600f4e791d9c6ac35b22fff2105b6
+size 15024
diff --git a/checkpoint-1000/rng_state_1.pth b/checkpoint-1000/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7b862c21b28bbd89ce6b4fb681d41be05f175599
--- /dev/null
+++ b/checkpoint-1000/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b056f3c23cb32dc77a2ec9e7651e0b64e4440e21f0fdf969b86bfc56a1cbdf06
+size 15024
diff --git a/checkpoint-1000/rng_state_2.pth b/checkpoint-1000/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d86ce886844e0298f058d67065e5eeb27ffe7e48
--- /dev/null
+++ b/checkpoint-1000/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3f8a05714bc528f4885a2816181652f2303b3e8150f89b56aaee6bec56aa520
+size 15024
diff --git a/checkpoint-1000/rng_state_3.pth b/checkpoint-1000/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..10733f5da657367adf3f67760028644c0839660f
--- /dev/null
+++ b/checkpoint-1000/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f755bd3c330281961e5c03af9d10ce8c1e1678619d384f6f1fd5fd7dce2ff50
+size 15024
diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..501b4491679d616789f8e0bc3fe01e337bbc5907
--- /dev/null
+++ b/checkpoint-1000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2602cbf6571d5d825f6bf2d7375d253f1e29c737b5ba79bcc221ad05bf6a6b4
+size 1064
diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1
--- /dev/null
+++ b/checkpoint-1000/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/checkpoint-1000/tokenizer.json b/checkpoint-1000/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c
--- /dev/null
+++ b/checkpoint-1000/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
+size 11422778
diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0a414ab9b6f7fec711d4c1346f5847dd0d5bd0ff
--- /dev/null
+++ b/checkpoint-1000/tokenizer_config.json
@@ -0,0 +1,197 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<｜end▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<｜User｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151645": {
+      "content": "<｜Assistant｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151646": {
+      "content": "<｜begin▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|EOT|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151648": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151649": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<｜begin▁of▁sentence｜>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<｜end▁of▁sentence｜>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "<｜end▁of▁sentence｜>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "split_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizerFast",
+  "unk_token": null,
+  "use_default_system_prompt": false
+}
diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..53fb03d060735d857fc8e921fba71d7e28b1dc1b
--- /dev/null
+++ b/checkpoint-1000/trainer_state.json
@@ -0,0 +1,733 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.756383712905452,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.027605244996549344,
+      "grad_norm": 1.6335422992706299,
+      "learning_rate": 9.174311926605506e-06,
+      "loss": 0.8102,
+      "step": 10
+    },
+    {
+      "epoch": 0.05521048999309869,
+      "grad_norm": 0.8111785054206848,
+      "learning_rate": 1.834862385321101e-05,
+      "loss": 0.6999,
+      "step": 20
+    },
+    {
+      "epoch": 0.08281573498964803,
+      "grad_norm": 0.4619831144809723,
+      "learning_rate": 2.7522935779816515e-05,
+      "loss": 0.5682,
+      "step": 30
+    },
+    {
+      "epoch": 0.11042097998619738,
+      "grad_norm": 0.4434720575809479,
+      "learning_rate": 3.669724770642202e-05,
+      "loss": 0.5232,
+      "step": 40
+    },
+    {
+      "epoch": 0.13802622498274672,
+      "grad_norm": 0.44054797291755676,
+      "learning_rate": 4.587155963302753e-05,
+      "loss": 0.5084,
+      "step": 50
+    },
+    {
+      "epoch": 0.16563146997929606,
+      "grad_norm": 0.42256447672843933,
+      "learning_rate": 5.504587155963303e-05,
+      "loss": 0.477,
+      "step": 60
+    },
+    {
+      "epoch": 0.1932367149758454,
+      "grad_norm": 0.4349405765533447,
+      "learning_rate": 6.422018348623854e-05,
+      "loss": 0.4841,
+      "step": 70
+    },
+    {
+      "epoch": 0.22084195997239475,
+      "grad_norm": 0.4515930712223053,
+      "learning_rate": 7.339449541284404e-05,
+      "loss": 0.4704,
+      "step": 80
+    },
+    {
+      "epoch": 0.2484472049689441,
+      "grad_norm": 0.45412737131118774,
+      "learning_rate": 8.256880733944955e-05,
+      "loss": 0.4718,
+      "step": 90
+    },
+    {
+      "epoch": 0.27605244996549344,
+      "grad_norm": 0.49010995030403137,
+      "learning_rate": 9.174311926605506e-05,
+      "loss": 0.4496,
+      "step": 100
+    },
+    {
+      "epoch": 0.3036576949620428,
+      "grad_norm": 0.4931396245956421,
+      "learning_rate": 9.999974150612772e-05,
+      "loss": 0.4524,
+      "step": 110
+    },
+    {
+      "epoch": 0.33126293995859213,
+      "grad_norm": 1.1270735263824463,
+      "learning_rate": 9.996872547536591e-05,
+      "loss": 0.4503,
+      "step": 120
+    },
+    {
+      "epoch": 0.3588681849551415,
+      "grad_norm": 0.48991507291793823,
+      "learning_rate": 9.988604741439287e-05,
+      "loss": 0.4399,
+      "step": 130
+    },
+    {
+      "epoch": 0.3864734299516908,
+      "grad_norm": 0.45801088213920593,
+      "learning_rate": 9.975179280300506e-05,
+      "loss": 0.4524,
+      "step": 140
+    },
+    {
+      "epoch": 0.4140786749482402,
+      "grad_norm": 0.420897901058197,
+      "learning_rate": 9.956610044533896e-05,
+      "loss": 0.4281,
+      "step": 150
+    },
+    {
+      "epoch": 0.4416839199447895,
+      "grad_norm": 0.4336962103843689,
+      "learning_rate": 9.932916232636318e-05,
+      "loss": 0.4305,
+      "step": 160
+    },
+    {
+      "epoch": 0.4692891649413389,
+      "grad_norm": 0.44120800495147705,
+      "learning_rate": 9.904122341338765e-05,
+      "loss": 0.4208,
+      "step": 170
+    },
+    {
+      "epoch": 0.4968944099378882,
+      "grad_norm": 0.9154078364372253,
+      "learning_rate": 9.870258140279503e-05,
+      "loss": 0.4436,
+      "step": 180
+    },
+    {
+      "epoch": 0.5244996549344375,
+      "grad_norm": 0.4551916718482971,
+      "learning_rate": 9.831358641225624e-05,
+      "loss": 0.4288,
+      "step": 190
+    },
+    {
+      "epoch": 0.5521048999309869,
+      "grad_norm": 0.4513665437698364,
+      "learning_rate": 9.787464061874825e-05,
+      "loss": 0.4384,
+      "step": 200
+    },
+    {
+      "epoch": 0.5797101449275363,
+      "grad_norm": 0.43779632449150085,
+      "learning_rate": 9.738619784274833e-05,
+      "loss": 0.4178,
+      "step": 210
+    },
+    {
+      "epoch": 0.6073153899240856,
+      "grad_norm": 0.4170076847076416,
+      "learning_rate": 9.684876307903494e-05,
+      "loss": 0.42,
+      "step": 220
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.4370488226413727,
+      "learning_rate": 9.626289197457994e-05,
+      "loss": 0.4296,
+      "step": 230
+    },
+    {
+      "epoch": 0.6625258799171843,
+      "grad_norm": 0.42547333240509033,
+      "learning_rate": 9.562919025407236e-05,
+      "loss": 0.4264,
+      "step": 240
+    },
+    {
+      "epoch": 0.6901311249137336,
+      "grad_norm": 0.4317057430744171,
+      "learning_rate": 9.494831309366723e-05,
+      "loss": 0.4052,
+      "step": 250
+    },
+    {
+      "epoch": 0.717736369910283,
+      "grad_norm": 0.40589675307273865,
+      "learning_rate": 9.422096444360735e-05,
+      "loss": 0.41,
+      "step": 260
+    },
+    {
+      "epoch": 0.7453416149068323,
+      "grad_norm": 0.44671744108200073,
+      "learning_rate": 9.34478963004181e-05,
+      "loss": 0.4162,
+      "step": 270
+    },
+    {
+      "epoch": 0.7729468599033816,
+      "grad_norm": 0.41162508726119995,
+      "learning_rate": 9.262990792942768e-05,
+      "loss": 0.4183,
+      "step": 280
+    },
+    {
+      "epoch": 0.800552104899931,
+      "grad_norm": 0.483149915933609,
+      "learning_rate": 9.176784503841697e-05,
+      "loss": 0.4174,
+      "step": 290
+    },
+    {
+      "epoch": 0.8281573498964804,
+      "grad_norm": 0.4605332612991333,
+      "learning_rate": 9.086259890325297e-05,
+      "loss": 0.4191,
+      "step": 300
+    },
+    {
+      "epoch": 0.8557625948930296,
+      "grad_norm": 0.4153307378292084,
+      "learning_rate": 8.991510544640991e-05,
+      "loss": 0.4253,
+      "step": 310
+    },
+    {
+      "epoch": 0.883367839889579,
+      "grad_norm": 0.43806084990501404,
+      "learning_rate": 8.892634426933106e-05,
+      "loss": 0.4265,
+      "step": 320
+    },
+    {
+      "epoch": 0.9109730848861284,
+      "grad_norm": 0.45412200689315796,
+      "learning_rate": 8.78973376396311e-05,
+      "loss": 0.4365,
+      "step": 330
+    },
+    {
+      "epoch": 0.9385783298826778,
+      "grad_norm": 0.3769752085208893,
+      "learning_rate": 8.682914943418676e-05,
+      "loss": 0.4058,
+      "step": 340
+    },
+    {
+      "epoch": 0.966183574879227,
+      "grad_norm": 0.4275883436203003,
+      "learning_rate": 8.572288403920792e-05,
+      "loss": 0.4078,
+      "step": 350
+    },
+    {
+      "epoch": 0.9937888198757764,
+      "grad_norm": 0.43371307849884033,
+      "learning_rate": 8.45796852084268e-05,
+      "loss": 0.4063,
+      "step": 360
+    },
+    {
+      "epoch": 1.0193236714975846,
+      "grad_norm": 0.4527032673358917,
+      "learning_rate": 8.340073488058552e-05,
+      "loss": 0.3742,
+      "step": 370
+    },
+    {
+      "epoch": 1.0469289164941338,
+      "grad_norm": 0.5205631256103516,
+      "learning_rate": 8.218725195744463e-05,
+      "loss": 0.3809,
+      "step": 380
+    },
+    {
+      "epoch": 1.0745341614906831,
+      "grad_norm": 0.4031950533390045,
+      "learning_rate": 8.094049104357609e-05,
+      "loss": 0.3823,
+      "step": 390
+    },
+    {
+      "epoch": 1.1021394064872325,
+      "grad_norm": 0.41949087381362915,
+      "learning_rate": 7.966174114924351e-05,
+      "loss": 0.3765,
+      "step": 400
+    },
+    {
+      "epoch": 1.129744651483782,
+      "grad_norm": 0.43814027309417725,
+      "learning_rate": 7.83523243577109e-05,
+      "loss": 0.3751,
+      "step": 410
+    },
+    {
+      "epoch": 1.1573498964803313,
+      "grad_norm": 0.4457204341888428,
+      "learning_rate": 7.70135944583575e-05,
+      "loss": 0.3869,
+      "step": 420
+    },
+    {
+      "epoch": 1.1849551414768806,
+      "grad_norm": 0.41421836614608765,
+      "learning_rate": 7.56469355470122e-05,
+      "loss": 0.3634,
+      "step": 430
+    },
+    {
+      "epoch": 1.21256038647343,
+      "grad_norm": 0.4416670799255371,
+      "learning_rate": 7.425376059495442e-05,
+      "loss": 0.3768,
+      "step": 440
+    },
+    {
+      "epoch": 1.2401656314699794,
+      "grad_norm": 0.44710710644721985,
+      "learning_rate": 7.283550998806108e-05,
+      "loss": 0.3669,
+      "step": 450
+    },
+    {
+      "epoch": 1.2677708764665288,
+      "grad_norm": 0.39852890372276306,
+      "learning_rate": 7.139365003760999e-05,
+      "loss": 0.3824,
+      "step": 460
+    },
+    {
+      "epoch": 1.295376121463078,
+      "grad_norm": 0.4412725269794464,
+      "learning_rate": 6.992967146427913e-05,
+      "loss": 0.3646,
+      "step": 470
+    },
+    {
+      "epoch": 1.3229813664596273,
+      "grad_norm": 0.41978228092193604,
+      "learning_rate": 6.844508785690964e-05,
+      "loss": 0.3755,
+      "step": 480
+    },
+    {
+      "epoch": 1.3505866114561766,
+      "grad_norm": 0.4214731752872467,
+      "learning_rate": 6.694143410762542e-05,
+      "loss": 0.3841,
+      "step": 490
+    },
+    {
+      "epoch": 1.378191856452726,
+      "grad_norm": 0.4128514230251312,
+      "learning_rate": 6.54202648249278e-05,
+      "loss": 0.3839,
+      "step": 500
+    },
+    {
+      "epoch": 1.4057971014492754,
+      "grad_norm": 0.3899001181125641,
+      "learning_rate": 6.388315272640544e-05,
+      "loss": 0.3726,
+      "step": 510
+    },
+    {
+      "epoch": 1.4334023464458248,
+      "grad_norm": 0.4347754120826721,
+      "learning_rate": 6.233168701272167e-05,
+      "loss": 0.3722,
+      "step": 520
+    },
+    {
+      "epoch": 1.4610075914423741,
+      "grad_norm": 0.3798378109931946,
+      "learning_rate": 6.076747172456015e-05,
+      "loss": 0.3623,
+      "step": 530
+    },
+    {
+      "epoch": 1.4886128364389233,
+      "grad_norm": 0.3879692256450653,
+      "learning_rate": 5.919212408422753e-05,
+      "loss": 0.3684,
+      "step": 540
+    },
+    {
+      "epoch": 1.5162180814354729,
+      "grad_norm": 0.4210754930973053,
+      "learning_rate": 5.76072728236279e-05,
+      "loss": 0.3674,
+      "step": 550
+    },
+    {
+      "epoch": 1.543823326432022,
+      "grad_norm": 0.4184245467185974,
+      "learning_rate": 5.6014556500337534e-05,
+      "loss": 0.3602,
+      "step": 560
+    },
+    {
+      "epoch": 1.5714285714285714,
+      "grad_norm": 0.43027910590171814,
+      "learning_rate": 5.44156218035211e-05,
+      "loss": 0.3872,
+      "step": 570
+    },
+    {
+      "epoch": 1.5990338164251208,
+      "grad_norm": 0.38721945881843567,
+      "learning_rate": 5.28121218514406e-05,
+      "loss": 0.3678,
+      "step": 580
+    },
+    {
+      "epoch": 1.6266390614216701,
+      "grad_norm": 0.4199799597263336,
+      "learning_rate": 5.1205714482317455e-05,
+      "loss": 0.3652,
+      "step": 590
+    },
+    {
+      "epoch": 1.6542443064182195,
+      "grad_norm": 0.40728333592414856,
+      "learning_rate": 4.95980605403146e-05,
+      "loss": 0.3786,
+      "step": 600
+    },
+    {
+      "epoch": 1.6818495514147687,
+      "grad_norm": 0.41107377409935,
+      "learning_rate": 4.79908221584108e-05,
+      "loss": 0.3715,
+      "step": 610
+    },
+    {
+      "epoch": 1.7094547964113183,
+      "grad_norm": 0.45491889119148254,
+      "learning_rate": 4.638566103994258e-05,
+      "loss": 0.386,
+      "step": 620
+    },
+    {
+      "epoch": 1.7370600414078674,
+      "grad_norm": 0.4167945683002472,
+      "learning_rate": 4.478423674059015e-05,
+      "loss": 0.3723,
+      "step": 630
+    },
+    {
+      "epoch": 1.764665286404417,
+      "grad_norm": 0.4188650846481323,
+      "learning_rate": 4.318820495258396e-05,
+      "loss": 0.3794,
+      "step": 640
+    },
+    {
+      "epoch": 1.7922705314009661,
+      "grad_norm": 0.45200666785240173,
+      "learning_rate": 4.159921579290546e-05,
+      "loss": 0.3641,
+      "step": 650
+    },
+    {
+      "epoch": 1.8198757763975155,
+      "grad_norm": 0.42524534463882446,
+      "learning_rate": 4.0018912097252234e-05,
+      "loss": 0.3727,
+      "step": 660
+    },
+    {
+      "epoch": 1.847481021394065,
+      "grad_norm": 0.4238753318786621,
+      "learning_rate": 3.8448927721530967e-05,
+      "loss": 0.3666,
+      "step": 670
+    },
+    {
+      "epoch": 1.8750862663906143,
+      "grad_norm": 0.3949458599090576,
+      "learning_rate": 3.6890885852634635e-05,
+      "loss": 0.3707,
+      "step": 680
+    },
+    {
+      "epoch": 1.9026915113871636,
+      "grad_norm": 0.4040445387363434,
+      "learning_rate": 3.534639733025017e-05,
+      "loss": 0.3793,
+      "step": 690
+    },
+    {
+      "epoch": 1.9302967563837128,
+      "grad_norm": 0.42878955602645874,
+      "learning_rate": 3.3817058981431784e-05,
+      "loss": 0.3623,
+      "step": 700
+    },
+    {
+      "epoch": 1.9579020013802624,
+      "grad_norm": 0.42626291513442993,
+      "learning_rate": 3.230445196966181e-05,
+      "loss": 0.3564,
+      "step": 710
+    },
+    {
+      "epoch": 1.9855072463768115,
+      "grad_norm": 0.43052035570144653,
+      "learning_rate": 3.081014016010584e-05,
+      "loss": 0.3681,
+      "step": 720
+    },
+    {
+      "epoch": 2.0110420979986197,
+      "grad_norm": 0.4627828896045685,
+      "learning_rate": 2.9335668502752394e-05,
+      "loss": 0.359,
+      "step": 730
+    },
+    {
+      "epoch": 2.0386473429951693,
+      "grad_norm": 0.45345333218574524,
+      "learning_rate": 2.7882561435108824e-05,
+      "loss": 0.3189,
+      "step": 740
+    },
+    {
+      "epoch": 2.0662525879917184,
+      "grad_norm": 0.40497517585754395,
+      "learning_rate": 2.6452321306104634e-05,
+      "loss": 0.3409,
+      "step": 750
+    },
+    {
+      "epoch": 2.0938578329882676,
+      "grad_norm": 0.4666087329387665,
+      "learning_rate": 2.5046426822832175e-05,
+      "loss": 0.3354,
+      "step": 760
+    },
+    {
+      "epoch": 2.121463077984817,
+      "grad_norm": 0.38220757246017456,
+      "learning_rate": 2.3666331521730024e-05,
+      "loss": 0.3366,
+      "step": 770
+    },
+    {
+      "epoch": 2.1490683229813663,
+      "grad_norm": 0.4605223536491394,
+      "learning_rate": 2.2313462265790196e-05,
+      "loss": 0.3231,
+      "step": 780
+    },
+    {
+      "epoch": 2.176673567977916,
+      "grad_norm": 0.558403730392456,
+      "learning_rate": 2.098921776934269e-05,
+      "loss": 0.3333,
+      "step": 790
+    },
+    {
+      "epoch": 2.204278812974465,
+      "grad_norm": 0.45217105746269226,
+      "learning_rate": 1.96949671519424e-05,
+      "loss": 0.3401,
+      "step": 800
+    },
+    {
+      "epoch": 2.2318840579710146,
+      "grad_norm": 0.4413389563560486,
+      "learning_rate": 1.843204852285389e-05,
+      "loss": 0.3453,
+      "step": 810
+    },
+    {
+      "epoch": 2.259489302967564,
+      "grad_norm": 0.3977566063404083,
+      "learning_rate": 1.7201767597597196e-05,
+      "loss": 0.338,
+      "step": 820
+    },
+    {
+      "epoch": 2.287094547964113,
+      "grad_norm": 0.4817161560058594,
+      "learning_rate": 1.60053963479852e-05,
+      "loss": 0.3334,
+      "step": 830
+    },
+    {
+      "epoch": 2.3146997929606625,
+      "grad_norm": 0.4438902735710144,
+      "learning_rate": 1.4844171687048058e-05,
+      "loss": 0.3359,
+      "step": 840
+    },
+    {
+      "epoch": 2.3423050379572117,
+      "grad_norm": 0.45830076932907104,
+      "learning_rate": 1.371929419020459e-05,
+      "loss": 0.3534,
+      "step": 850
+    },
+    {
+      "epoch": 2.3699102829537613,
+      "grad_norm": 0.48253732919692993,
+      "learning_rate": 1.2631926854002574e-05,
+      "loss": 0.3247,
+      "step": 860
+    },
+    {
+      "epoch": 2.3975155279503104,
+      "grad_norm": 0.4572385549545288,
+      "learning_rate": 1.1583193893711475e-05,
+      "loss": 0.3309,
+      "step": 870
+    },
+    {
+      "epoch": 2.42512077294686,
+      "grad_norm": 0.4570174217224121,
+      "learning_rate": 1.0574179581010468e-05,
+      "loss": 0.3408,
+      "step": 880
+    },
+    {
+      "epoch": 2.452726017943409,
+      "grad_norm": 0.5289928913116455,
+      "learning_rate": 9.60592712297379e-06,
+      "loss": 0.3338,
+      "step": 890
+    },
+    {
+      "epoch": 2.4803312629399588,
+      "grad_norm": 0.49394240975379944,
+      "learning_rate": 8.679437583512168e-06,
+      "loss": 0.3398,
+      "step": 900
+    },
+    {
+      "epoch": 2.507936507936508,
+      "grad_norm": 0.412822425365448,
+      "learning_rate": 7.795668848385623e-06,
+      "loss": 0.333,
+      "step": 910
+    },
+    {
+      "epoch": 2.5355417529330575,
+      "grad_norm": 0.4305315911769867,
+      "learning_rate": 6.95553463485748e-06,
+      "loss": 0.342,
+      "step": 920
+    },
+    {
+      "epoch": 2.5631469979296067,
+      "grad_norm": 0.43158090114593506,
+      "learning_rate": 6.159903547013746e-06,
+      "loss": 0.3335,
+      "step": 930
+    },
+    {
+      "epoch": 2.590752242926156,
+      "grad_norm": 0.4319579005241394,
+      "learning_rate": 5.409598177724401e-06,
+      "loss": 0.3426,
+      "step": 940
+    },
+    {
+      "epoch": 2.6183574879227054,
+      "grad_norm": 0.4702156186103821,
+      "learning_rate": 4.7053942581750385e-06,
+      "loss": 0.3463,
+      "step": 950
+    },
+    {
+      "epoch": 2.6459627329192545,
+      "grad_norm": 0.38157370686531067,
+      "learning_rate": 4.048019855848273e-06,
+      "loss": 0.3331,
+      "step": 960
+    },
+    {
+      "epoch": 2.673567977915804,
+      "grad_norm": 0.4141283631324768,
+      "learning_rate": 3.438154621784029e-06,
+      "loss": 0.3422,
+      "step": 970
+    },
+    {
+      "epoch": 2.7011732229123533,
+      "grad_norm": 0.42628729343414307,
+      "learning_rate": 2.8764290878969756e-06,
+      "loss": 0.3262,
+      "step": 980
+    },
+    {
+      "epoch": 2.728778467908903,
+      "grad_norm": 0.4850899577140808,
+      "learning_rate": 2.3634240150775646e-06,
+      "loss": 0.3303,
+      "step": 990
+    },
+    {
+      "epoch": 2.756383712905452,
+      "grad_norm": 0.4277842938899994,
+      "learning_rate": 1.8996697927507468e-06,
+      "loss": 0.3446,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1086,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.683655210640081e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fd86c75750949f0ca2ee56bc27dadb57430a90de
--- /dev/null
+++ b/checkpoint-1000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52a6484c213110d668e89b1ff8d77bac863e0460a3e92ff200a8df3f14879a5
+size 5688
diff --git a/checkpoint-1086/README.md b/checkpoint-1086/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d823a0d9f612b6fd128ea17ebbddb8df140520ef
--- /dev/null
+++ b/checkpoint-1086/README.md
@@ -0,0 +1,202 @@
+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-1086/adapter_config.json b/checkpoint-1086/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..652c79f18ba64f20d9cd4fc1eff31c4b47afb1c6
--- /dev/null
+++ b/checkpoint-1086/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "pissa_niter_16",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-1086/adapter_model.safetensors b/checkpoint-1086/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..476a84d19c4420d1d508ed6ebb6e7ce24ac1217b
--- /dev/null
+++ b/checkpoint-1086/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d2b60858f8196e9c42a81145ebed482335b25f140656f334689c5cd28feb329
+size 268555264
diff --git a/checkpoint-1086/optimizer.pt b/checkpoint-1086/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f838dc40133181a49acc50142de136902a66da8
--- /dev/null
+++ b/checkpoint-1086/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe095d3033abf7b29e4e0ba117b60632305190fb88b5c72cb944e0725c597755
+size 537626770
diff --git a/checkpoint-1086/rng_state_0.pth b/checkpoint-1086/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a7c482c30381cd512ccc35fe322d8a34fbf5207
--- /dev/null
+++ b/checkpoint-1086/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:308f94f9a5c24e1bad5c393d56ae7af7782600f4e791d9c6ac35b22fff2105b6
+size 15024
diff --git a/checkpoint-1086/rng_state_1.pth b/checkpoint-1086/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7b862c21b28bbd89ce6b4fb681d41be05f175599
--- /dev/null
+++ b/checkpoint-1086/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b056f3c23cb32dc77a2ec9e7651e0b64e4440e21f0fdf969b86bfc56a1cbdf06
+size 15024
diff --git a/checkpoint-1086/rng_state_2.pth b/checkpoint-1086/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d86ce886844e0298f058d67065e5eeb27ffe7e48
--- /dev/null
+++ b/checkpoint-1086/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3f8a05714bc528f4885a2816181652f2303b3e8150f89b56aaee6bec56aa520
+size 15024
diff --git a/checkpoint-1086/rng_state_3.pth b/checkpoint-1086/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..10733f5da657367adf3f67760028644c0839660f
--- /dev/null
+++ b/checkpoint-1086/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f755bd3c330281961e5c03af9d10ce8c1e1678619d384f6f1fd5fd7dce2ff50
+size 15024
diff --git a/checkpoint-1086/scheduler.pt b/checkpoint-1086/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51a4f7baffe19964a4cdff00ef358614ffef4608
--- /dev/null
+++ b/checkpoint-1086/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2ec3825cfc68e7c0813fa8f1ff9533ce9c3275fb09281f03c2130ac24e05f2d
+size 1064
diff --git a/checkpoint-1086/special_tokens_map.json b/checkpoint-1086/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1
--- /dev/null
+++ b/checkpoint-1086/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/checkpoint-1086/tokenizer.json b/checkpoint-1086/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c
--- /dev/null
+++ b/checkpoint-1086/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
+size 11422778
diff --git a/checkpoint-1086/tokenizer_config.json b/checkpoint-1086/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0a414ab9b6f7fec711d4c1346f5847dd0d5bd0ff
--- /dev/null
+++ b/checkpoint-1086/tokenizer_config.json
@@ -0,0 +1,197 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<｜end▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<｜User｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151645": {
+      "content": "<｜Assistant｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151646": {
+      "content": "<｜begin▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|EOT|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151648": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151649": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<｜begin▁of▁sentence｜>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<｜end▁of▁sentence｜>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "<｜end▁of▁sentence｜>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "split_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizerFast",
+  "unk_token": null,
+  "use_default_system_prompt": false
+}
diff --git a/checkpoint-1086/trainer_state.json b/checkpoint-1086/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9dff5814e4b39dc5512660ab9005aaf49076d2a6
--- /dev/null
+++ b/checkpoint-1086/trainer_state.json
@@ -0,0 +1,789 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.9937888198757765,
+  "eval_steps": 500,
+  "global_step": 1086,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.027605244996549344,
+      "grad_norm": 1.6335422992706299,
+      "learning_rate": 9.174311926605506e-06,
+      "loss": 0.8102,
+      "step": 10
+    },
+    {
+      "epoch": 0.05521048999309869,
+      "grad_norm": 0.8111785054206848,
+      "learning_rate": 1.834862385321101e-05,
+      "loss": 0.6999,
+      "step": 20
+    },
+    {
+      "epoch": 0.08281573498964803,
+      "grad_norm": 0.4619831144809723,
+      "learning_rate": 2.7522935779816515e-05,
+      "loss": 0.5682,
+      "step": 30
+    },
+    {
+      "epoch": 0.11042097998619738,
+      "grad_norm": 0.4434720575809479,
+      "learning_rate": 3.669724770642202e-05,
+      "loss": 0.5232,
+      "step": 40
+    },
+    {
+      "epoch": 0.13802622498274672,
+      "grad_norm": 0.44054797291755676,
+      "learning_rate": 4.587155963302753e-05,
+      "loss": 0.5084,
+      "step": 50
+    },
+    {
+      "epoch": 0.16563146997929606,
+      "grad_norm": 0.42256447672843933,
+      "learning_rate": 5.504587155963303e-05,
+      "loss": 0.477,
+      "step": 60
+    },
+    {
+      "epoch": 0.1932367149758454,
+      "grad_norm": 0.4349405765533447,
+      "learning_rate": 6.422018348623854e-05,
+      "loss": 0.4841,
+      "step": 70
+    },
+    {
+      "epoch": 0.22084195997239475,
+      "grad_norm": 0.4515930712223053,
+      "learning_rate": 7.339449541284404e-05,
+      "loss": 0.4704,
+      "step": 80
+    },
+    {
+      "epoch": 0.2484472049689441,
+      "grad_norm": 0.45412737131118774,
+      "learning_rate": 8.256880733944955e-05,
+      "loss": 0.4718,
+      "step": 90
+    },
+    {
+      "epoch": 0.27605244996549344,
+      "grad_norm": 0.49010995030403137,
+      "learning_rate": 9.174311926605506e-05,
+      "loss": 0.4496,
+      "step": 100
+    },
+    {
+      "epoch": 0.3036576949620428,
+      "grad_norm": 0.4931396245956421,
+      "learning_rate": 9.999974150612772e-05,
+      "loss": 0.4524,
+      "step": 110
+    },
+    {
+      "epoch": 0.33126293995859213,
+      "grad_norm": 1.1270735263824463,
+      "learning_rate": 9.996872547536591e-05,
+      "loss": 0.4503,
+      "step": 120
+    },
+    {
+      "epoch": 0.3588681849551415,
+      "grad_norm": 0.48991507291793823,
+      "learning_rate": 9.988604741439287e-05,
+      "loss": 0.4399,
+      "step": 130
+    },
+    {
+      "epoch": 0.3864734299516908,
+      "grad_norm": 0.45801088213920593,
+      "learning_rate": 9.975179280300506e-05,
+      "loss": 0.4524,
+      "step": 140
+    },
+    {
+      "epoch": 0.4140786749482402,
+      "grad_norm": 0.420897901058197,
+      "learning_rate": 9.956610044533896e-05,
+      "loss": 0.4281,
+      "step": 150
+    },
+    {
+      "epoch": 0.4416839199447895,
+      "grad_norm": 0.4336962103843689,
+      "learning_rate": 9.932916232636318e-05,
+      "loss": 0.4305,
+      "step": 160
+    },
+    {
+      "epoch": 0.4692891649413389,
+      "grad_norm": 0.44120800495147705,
+      "learning_rate": 9.904122341338765e-05,
+      "loss": 0.4208,
+      "step": 170
+    },
+    {
+      "epoch": 0.4968944099378882,
+      "grad_norm": 0.9154078364372253,
+      "learning_rate": 9.870258140279503e-05,
+      "loss": 0.4436,
+      "step": 180
+    },
+    {
+      "epoch": 0.5244996549344375,
+      "grad_norm": 0.4551916718482971,
+      "learning_rate": 9.831358641225624e-05,
+      "loss": 0.4288,
+      "step": 190
+    },
+    {
+      "epoch": 0.5521048999309869,
+      "grad_norm": 0.4513665437698364,
+      "learning_rate": 9.787464061874825e-05,
+      "loss": 0.4384,
+      "step": 200
+    },
+    {
+      "epoch": 0.5797101449275363,
+      "grad_norm": 0.43779632449150085,
+      "learning_rate": 9.738619784274833e-05,
+      "loss": 0.4178,
+      "step": 210
+    },
+    {
+      "epoch": 0.6073153899240856,
+      "grad_norm": 0.4170076847076416,
+      "learning_rate": 9.684876307903494e-05,
+      "loss": 0.42,
+      "step": 220
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.4370488226413727,
+      "learning_rate": 9.626289197457994e-05,
+      "loss": 0.4296,
+      "step": 230
+    },
+    {
+      "epoch": 0.6625258799171843,
+      "grad_norm": 0.42547333240509033,
+      "learning_rate": 9.562919025407236e-05,
+      "loss": 0.4264,
+      "step": 240
+    },
+    {
+      "epoch": 0.6901311249137336,
+      "grad_norm": 0.4317057430744171,
+      "learning_rate": 9.494831309366723e-05,
+      "loss": 0.4052,
+      "step": 250
+    },
+    {
+      "epoch": 0.717736369910283,
+      "grad_norm": 0.40589675307273865,
+      "learning_rate": 9.422096444360735e-05,
+      "loss": 0.41,
+      "step": 260
+    },
+    {
+      "epoch": 0.7453416149068323,
+      "grad_norm": 0.44671744108200073,
+      "learning_rate": 9.34478963004181e-05,
+      "loss": 0.4162,
+      "step": 270
+    },
+    {
+      "epoch": 0.7729468599033816,
+      "grad_norm": 0.41162508726119995,
+      "learning_rate": 9.262990792942768e-05,
+      "loss": 0.4183,
+      "step": 280
+    },
+    {
+      "epoch": 0.800552104899931,
+      "grad_norm": 0.483149915933609,
+      "learning_rate": 9.176784503841697e-05,
+      "loss": 0.4174,
+      "step": 290
+    },
+    {
+      "epoch": 0.8281573498964804,
+      "grad_norm": 0.4605332612991333,
+      "learning_rate": 9.086259890325297e-05,
+      "loss": 0.4191,
+      "step": 300
+    },
+    {
+      "epoch": 0.8557625948930296,
+      "grad_norm": 0.4153307378292084,
+      "learning_rate": 8.991510544640991e-05,
+      "loss": 0.4253,
+      "step": 310
+    },
+    {
+      "epoch": 0.883367839889579,
+      "grad_norm": 0.43806084990501404,
+      "learning_rate": 8.892634426933106e-05,
+      "loss": 0.4265,
+      "step": 320
+    },
+    {
+      "epoch": 0.9109730848861284,
+      "grad_norm": 0.45412200689315796,
+      "learning_rate": 8.78973376396311e-05,
+      "loss": 0.4365,
+      "step": 330
+    },
+    {
+      "epoch": 0.9385783298826778,
+      "grad_norm": 0.3769752085208893,
+      "learning_rate": 8.682914943418676e-05,
+      "loss": 0.4058,
+      "step": 340
+    },
+    {
+      "epoch": 0.966183574879227,
+      "grad_norm": 0.4275883436203003,
+      "learning_rate": 8.572288403920792e-05,
+      "loss": 0.4078,
+      "step": 350
+    },
+    {
+      "epoch": 0.9937888198757764,
+      "grad_norm": 0.43371307849884033,
+      "learning_rate": 8.45796852084268e-05,
+      "loss": 0.4063,
+      "step": 360
+    },
+    {
+      "epoch": 1.0193236714975846,
+      "grad_norm": 0.4527032673358917,
+      "learning_rate": 8.340073488058552e-05,
+      "loss": 0.3742,
+      "step": 370
+    },
+    {
+      "epoch": 1.0469289164941338,
+      "grad_norm": 0.5205631256103516,
+      "learning_rate": 8.218725195744463e-05,
+      "loss": 0.3809,
+      "step": 380
+    },
+    {
+      "epoch": 1.0745341614906831,
+      "grad_norm": 0.4031950533390045,
+      "learning_rate": 8.094049104357609e-05,
+      "loss": 0.3823,
+      "step": 390
+    },
+    {
+      "epoch": 1.1021394064872325,
+      "grad_norm": 0.41949087381362915,
+      "learning_rate": 7.966174114924351e-05,
+      "loss": 0.3765,
+      "step": 400
+    },
+    {
+      "epoch": 1.129744651483782,
+      "grad_norm": 0.43814027309417725,
+      "learning_rate": 7.83523243577109e-05,
+      "loss": 0.3751,
+      "step": 410
+    },
+    {
+      "epoch": 1.1573498964803313,
+      "grad_norm": 0.4457204341888428,
+      "learning_rate": 7.70135944583575e-05,
+      "loss": 0.3869,
+      "step": 420
+    },
+    {
+      "epoch": 1.1849551414768806,
+      "grad_norm": 0.41421836614608765,
+      "learning_rate": 7.56469355470122e-05,
+      "loss": 0.3634,
+      "step": 430
+    },
+    {
+      "epoch": 1.21256038647343,
+      "grad_norm": 0.4416670799255371,
+      "learning_rate": 7.425376059495442e-05,
+      "loss": 0.3768,
+      "step": 440
+    },
+    {
+      "epoch": 1.2401656314699794,
+      "grad_norm": 0.44710710644721985,
+      "learning_rate": 7.283550998806108e-05,
+      "loss": 0.3669,
+      "step": 450
+    },
+    {
+      "epoch": 1.2677708764665288,
+      "grad_norm": 0.39852890372276306,
+      "learning_rate": 7.139365003760999e-05,
+      "loss": 0.3824,
+      "step": 460
+    },
+    {
+      "epoch": 1.295376121463078,
+      "grad_norm": 0.4412725269794464,
+      "learning_rate": 6.992967146427913e-05,
+      "loss": 0.3646,
+      "step": 470
+    },
+    {
+      "epoch": 1.3229813664596273,
+      "grad_norm": 0.41978228092193604,
+      "learning_rate": 6.844508785690964e-05,
+      "loss": 0.3755,
+      "step": 480
+    },
+    {
+      "epoch": 1.3505866114561766,
+      "grad_norm": 0.4214731752872467,
+      "learning_rate": 6.694143410762542e-05,
+      "loss": 0.3841,
+      "step": 490
+    },
+    {
+      "epoch": 1.378191856452726,
+      "grad_norm": 0.4128514230251312,
+      "learning_rate": 6.54202648249278e-05,
+      "loss": 0.3839,
+      "step": 500
+    },
+    {
+      "epoch": 1.4057971014492754,
+      "grad_norm": 0.3899001181125641,
+      "learning_rate": 6.388315272640544e-05,
+      "loss": 0.3726,
+      "step": 510
+    },
+    {
+      "epoch": 1.4334023464458248,
+      "grad_norm": 0.4347754120826721,
+      "learning_rate": 6.233168701272167e-05,
+      "loss": 0.3722,
+      "step": 520
+    },
+    {
+      "epoch": 1.4610075914423741,
+      "grad_norm": 0.3798378109931946,
+      "learning_rate": 6.076747172456015e-05,
+      "loss": 0.3623,
+      "step": 530
+    },
+    {
+      "epoch": 1.4886128364389233,
+      "grad_norm": 0.3879692256450653,
+      "learning_rate": 5.919212408422753e-05,
+      "loss": 0.3684,
+      "step": 540
+    },
+    {
+      "epoch": 1.5162180814354729,
+      "grad_norm": 0.4210754930973053,
+      "learning_rate": 5.76072728236279e-05,
+      "loss": 0.3674,
+      "step": 550
+    },
+    {
+      "epoch": 1.543823326432022,
+      "grad_norm": 0.4184245467185974,
+      "learning_rate": 5.6014556500337534e-05,
+      "loss": 0.3602,
+      "step": 560
+    },
+    {
+      "epoch": 1.5714285714285714,
+      "grad_norm": 0.43027910590171814,
+      "learning_rate": 5.44156218035211e-05,
+      "loss": 0.3872,
+      "step": 570
+    },
+    {
+      "epoch": 1.5990338164251208,
+      "grad_norm": 0.38721945881843567,
+      "learning_rate": 5.28121218514406e-05,
+      "loss": 0.3678,
+      "step": 580
+    },
+    {
+      "epoch": 1.6266390614216701,
+      "grad_norm": 0.4199799597263336,
+      "learning_rate": 5.1205714482317455e-05,
+      "loss": 0.3652,
+      "step": 590
+    },
+    {
+      "epoch": 1.6542443064182195,
+      "grad_norm": 0.40728333592414856,
+      "learning_rate": 4.95980605403146e-05,
+      "loss": 0.3786,
+      "step": 600
+    },
+    {
+      "epoch": 1.6818495514147687,
+      "grad_norm": 0.41107377409935,
+      "learning_rate": 4.79908221584108e-05,
+      "loss": 0.3715,
+      "step": 610
+    },
+    {
+      "epoch": 1.7094547964113183,
+      "grad_norm": 0.45491889119148254,
+      "learning_rate": 4.638566103994258e-05,
+      "loss": 0.386,
+      "step": 620
+    },
+    {
+      "epoch": 1.7370600414078674,
+      "grad_norm": 0.4167945683002472,
+      "learning_rate": 4.478423674059015e-05,
+      "loss": 0.3723,
+      "step": 630
+    },
+    {
+      "epoch": 1.764665286404417,
+      "grad_norm": 0.4188650846481323,
+      "learning_rate": 4.318820495258396e-05,
+      "loss": 0.3794,
+      "step": 640
+    },
+    {
+      "epoch": 1.7922705314009661,
+      "grad_norm": 0.45200666785240173,
+      "learning_rate": 4.159921579290546e-05,
+      "loss": 0.3641,
+      "step": 650
+    },
+    {
+      "epoch": 1.8198757763975155,
+      "grad_norm": 0.42524534463882446,
+      "learning_rate": 4.0018912097252234e-05,
+      "loss": 0.3727,
+      "step": 660
+    },
+    {
+      "epoch": 1.847481021394065,
+      "grad_norm": 0.4238753318786621,
+      "learning_rate": 3.8448927721530967e-05,
+      "loss": 0.3666,
+      "step": 670
+    },
+    {
+      "epoch": 1.8750862663906143,
+      "grad_norm": 0.3949458599090576,
+      "learning_rate": 3.6890885852634635e-05,
+      "loss": 0.3707,
+      "step": 680
+    },
+    {
+      "epoch": 1.9026915113871636,
+      "grad_norm": 0.4040445387363434,
+      "learning_rate": 3.534639733025017e-05,
+      "loss": 0.3793,
+      "step": 690
+    },
+    {
+      "epoch": 1.9302967563837128,
+      "grad_norm": 0.42878955602645874,
+      "learning_rate": 3.3817058981431784e-05,
+      "loss": 0.3623,
+      "step": 700
+    },
+    {
+      "epoch": 1.9579020013802624,
+      "grad_norm": 0.42626291513442993,
+      "learning_rate": 3.230445196966181e-05,
+      "loss": 0.3564,
+      "step": 710
+    },
+    {
+      "epoch": 1.9855072463768115,
+      "grad_norm": 0.43052035570144653,
+      "learning_rate": 3.081014016010584e-05,
+      "loss": 0.3681,
+      "step": 720
+    },
+    {
+      "epoch": 2.0110420979986197,
+      "grad_norm": 0.4627828896045685,
+      "learning_rate": 2.9335668502752394e-05,
+      "loss": 0.359,
+      "step": 730
+    },
+    {
+      "epoch": 2.0386473429951693,
+      "grad_norm": 0.45345333218574524,
+      "learning_rate": 2.7882561435108824e-05,
+      "loss": 0.3189,
+      "step": 740
+    },
+    {
+      "epoch": 2.0662525879917184,
+      "grad_norm": 0.40497517585754395,
+      "learning_rate": 2.6452321306104634e-05,
+      "loss": 0.3409,
+      "step": 750
+    },
+    {
+      "epoch": 2.0938578329882676,
+      "grad_norm": 0.4666087329387665,
+      "learning_rate": 2.5046426822832175e-05,
+      "loss": 0.3354,
+      "step": 760
+    },
+    {
+      "epoch": 2.121463077984817,
+      "grad_norm": 0.38220757246017456,
+      "learning_rate": 2.3666331521730024e-05,
+      "loss": 0.3366,
+      "step": 770
+    },
+    {
+      "epoch": 2.1490683229813663,
+      "grad_norm": 0.4605223536491394,
+      "learning_rate": 2.2313462265790196e-05,
+      "loss": 0.3231,
+      "step": 780
+    },
+    {
+      "epoch": 2.176673567977916,
+      "grad_norm": 0.558403730392456,
+      "learning_rate": 2.098921776934269e-05,
+      "loss": 0.3333,
+      "step": 790
+    },
+    {
+      "epoch": 2.204278812974465,
+      "grad_norm": 0.45217105746269226,
+      "learning_rate": 1.96949671519424e-05,
+      "loss": 0.3401,
+      "step": 800
+    },
+    {
+      "epoch": 2.2318840579710146,
+      "grad_norm": 0.4413389563560486,
+      "learning_rate": 1.843204852285389e-05,
+      "loss": 0.3453,
+      "step": 810
+    },
+    {
+      "epoch": 2.259489302967564,
+      "grad_norm": 0.3977566063404083,
+      "learning_rate": 1.7201767597597196e-05,
+      "loss": 0.338,
+      "step": 820
+    },
+    {
+      "epoch": 2.287094547964113,
+      "grad_norm": 0.4817161560058594,
+      "learning_rate": 1.60053963479852e-05,
+      "loss": 0.3334,
+      "step": 830
+    },
+    {
+      "epoch": 2.3146997929606625,
+      "grad_norm": 0.4438902735710144,
+      "learning_rate": 1.4844171687048058e-05,
+      "loss": 0.3359,
+      "step": 840
+    },
+    {
+      "epoch": 2.3423050379572117,
+      "grad_norm": 0.45830076932907104,
+      "learning_rate": 1.371929419020459e-05,
+      "loss": 0.3534,
+      "step": 850
+    },
+    {
+      "epoch": 2.3699102829537613,
+      "grad_norm": 0.48253732919692993,
+      "learning_rate": 1.2631926854002574e-05,
+      "loss": 0.3247,
+      "step": 860
+    },
+    {
+      "epoch": 2.3975155279503104,
+      "grad_norm": 0.4572385549545288,
+      "learning_rate": 1.1583193893711475e-05,
+      "loss": 0.3309,
+      "step": 870
+    },
+    {
+      "epoch": 2.42512077294686,
+      "grad_norm": 0.4570174217224121,
+      "learning_rate": 1.0574179581010468e-05,
+      "loss": 0.3408,
+      "step": 880
+    },
+    {
+      "epoch": 2.452726017943409,
+      "grad_norm": 0.5289928913116455,
+      "learning_rate": 9.60592712297379e-06,
+      "loss": 0.3338,
+      "step": 890
+    },
+    {
+      "epoch": 2.4803312629399588,
+      "grad_norm": 0.49394240975379944,
+      "learning_rate": 8.679437583512168e-06,
+      "loss": 0.3398,
+      "step": 900
+    },
+    {
+      "epoch": 2.507936507936508,
+      "grad_norm": 0.412822425365448,
+      "learning_rate": 7.795668848385623e-06,
+      "loss": 0.333,
+      "step": 910
+    },
+    {
+      "epoch": 2.5355417529330575,
+      "grad_norm": 0.4305315911769867,
+      "learning_rate": 6.95553463485748e-06,
+      "loss": 0.342,
+      "step": 920
+    },
+    {
+      "epoch": 2.5631469979296067,
+      "grad_norm": 0.43158090114593506,
+      "learning_rate": 6.159903547013746e-06,
+      "loss": 0.3335,
+      "step": 930
+    },
+    {
+      "epoch": 2.590752242926156,
+      "grad_norm": 0.4319579005241394,
+      "learning_rate": 5.409598177724401e-06,
+      "loss": 0.3426,
+      "step": 940
+    },
+    {
+      "epoch": 2.6183574879227054,
+      "grad_norm": 0.4702156186103821,
+      "learning_rate": 4.7053942581750385e-06,
+      "loss": 0.3463,
+      "step": 950
+    },
+    {
+      "epoch": 2.6459627329192545,
+      "grad_norm": 0.38157370686531067,
+      "learning_rate": 4.048019855848273e-06,
+      "loss": 0.3331,
+      "step": 960
+    },
+    {
+      "epoch": 2.673567977915804,
+      "grad_norm": 0.4141283631324768,
+      "learning_rate": 3.438154621784029e-06,
+      "loss": 0.3422,
+      "step": 970
+    },
+    {
+      "epoch": 2.7011732229123533,
+      "grad_norm": 0.42628729343414307,
+      "learning_rate": 2.8764290878969756e-06,
+      "loss": 0.3262,
+      "step": 980
+    },
+    {
+      "epoch": 2.728778467908903,
+      "grad_norm": 0.4850899577140808,
+      "learning_rate": 2.3634240150775646e-06,
+      "loss": 0.3303,
+      "step": 990
+    },
+    {
+      "epoch": 2.756383712905452,
+      "grad_norm": 0.4277842938899994,
+      "learning_rate": 1.8996697927507468e-06,
+      "loss": 0.3446,
+      "step": 1000
+    },
+    {
+      "epoch": 2.783988957902001,
+      "grad_norm": 0.45691201090812683,
+      "learning_rate": 1.4856458905130822e-06,
+      "loss": 0.3309,
+      "step": 1010
+    },
+    {
+      "epoch": 2.8115942028985508,
+      "grad_norm": 0.4542577862739563,
+      "learning_rate": 1.1217803624152311e-06,
+      "loss": 0.326,
+      "step": 1020
+    },
+    {
+      "epoch": 2.8391994478951,
+      "grad_norm": 0.39988699555397034,
+      "learning_rate": 8.084494044022839e-07,
+      "loss": 0.3364,
+      "step": 1030
+    },
+    {
+      "epoch": 2.8668046928916495,
+      "grad_norm": 0.43636584281921387,
+      "learning_rate": 5.459769653695657e-07,
+      "loss": 0.3313,
+      "step": 1040
+    },
+    {
+      "epoch": 2.8944099378881987,
+      "grad_norm": 0.4335787892341614,
+      "learning_rate": 3.346344122360179e-07,
+      "loss": 0.328,
+      "step": 1050
+    },
+    {
+      "epoch": 2.9220151828847483,
+      "grad_norm": 0.4669038951396942,
+      "learning_rate": 1.746402493813415e-07,
+      "loss": 0.3426,
+      "step": 1060
+    },
+    {
+      "epoch": 2.9496204278812974,
+      "grad_norm": 0.43036729097366333,
+      "learning_rate": 6.615989273713874e-08,
+      "loss": 0.3378,
+      "step": 1070
+    },
+    {
+      "epoch": 2.9772256728778466,
+      "grad_norm": 0.4190558195114136,
+      "learning_rate": 9.305498765438404e-09,
+      "loss": 0.3358,
+      "step": 1080
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1086,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.17252944434797e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1086/training_args.bin b/checkpoint-1086/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fd86c75750949f0ca2ee56bc27dadb57430a90de
--- /dev/null
+++ b/checkpoint-1086/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52a6484c213110d668e89b1ff8d77bac863e0460a3e92ff200a8df3f14879a5
+size 5688
diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d823a0d9f612b6fd128ea17ebbddb8df140520ef
--- /dev/null
+++ b/checkpoint-500/README.md
@@ -0,0 +1,202 @@
+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..652c79f18ba64f20d9cd4fc1eff31c4b47afb1c6
--- /dev/null
+++ b/checkpoint-500/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "pissa_niter_16",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bb64c1678b0993d094dd0652422db9a60b40ece8
--- /dev/null
+++ b/checkpoint-500/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a43131df51ef87099c41c2eea064b6c7bed5f2ef3b3f3f89d0eeab4739d280a1
+size 268555264
diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74ddf5668a869bc1a80b8f6b3b7c9e6cc10d05ad
--- /dev/null
+++ b/checkpoint-500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b8e1f62ba644f7ff12a4315ffe2e414d517d1aefb5665ce137d8d7e46935864
+size 537626770
diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..37ac50652a3badbfb1bdeaccb8b1934575b584eb
--- /dev/null
+++ b/checkpoint-500/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbe0d720c4c75a6a04213fa3b64bacbe794718a53e2b56ebb67a1a795014dfad
+size 15024
diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0bc3650851dae439677613c9e23a5528de47b679
--- /dev/null
+++ b/checkpoint-500/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72452d3138d0ca2ff89429e3294a834ae7a68e8596fc757735ca56ae52509d57
+size 15024
diff --git a/checkpoint-500/rng_state_2.pth b/checkpoint-500/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0e00a6e8b4b743026f68d749a8cb3bdd4b746838
--- /dev/null
+++ b/checkpoint-500/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f36e306fb8ebcf53a167bfd6c9af74db410a269ada1e619e3e816f5269543b9d
+size 15024
diff --git a/checkpoint-500/rng_state_3.pth b/checkpoint-500/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5354141d42e077c356f9ca8c6b12bd7e5e41f2af
--- /dev/null
+++ b/checkpoint-500/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb47ce0c6f815a6f8302b0e3819b4c2315ca71dae3138d97fdceb765cdd0a039
+size 15024
diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..94197b147067ec43b36354ccdb9b2783606e6183
--- /dev/null
+++ b/checkpoint-500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9b15c8fe8bd1f8470d946d672234e9bd15a1a3744a13876ee58a8a23e297c9d
+size 1064
diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1
--- /dev/null
+++ b/checkpoint-500/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c
--- /dev/null
+++ b/checkpoint-500/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
+size 11422778
diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0a414ab9b6f7fec711d4c1346f5847dd0d5bd0ff
--- /dev/null
+++ b/checkpoint-500/tokenizer_config.json
@@ -0,0 +1,197 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<｜end▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<｜User｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151645": {
+      "content": "<｜Assistant｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151646": {
+      "content": "<｜begin▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|EOT|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151648": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151649": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<｜begin▁of▁sentence｜>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<｜end▁of▁sentence｜>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "<｜end▁of▁sentence｜>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "split_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizerFast",
+  "unk_token": null,
+  "use_default_system_prompt": false
+}
diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5dfd5733280944330d7e84b420bbc35c487e3577
--- /dev/null
+++ b/checkpoint-500/trainer_state.json
@@ -0,0 +1,383 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.378191856452726,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.027605244996549344,
+      "grad_norm": 1.6335422992706299,
+      "learning_rate": 9.174311926605506e-06,
+      "loss": 0.8102,
+      "step": 10
+    },
+    {
+      "epoch": 0.05521048999309869,
+      "grad_norm": 0.8111785054206848,
+      "learning_rate": 1.834862385321101e-05,
+      "loss": 0.6999,
+      "step": 20
+    },
+    {
+      "epoch": 0.08281573498964803,
+      "grad_norm": 0.4619831144809723,
+      "learning_rate": 2.7522935779816515e-05,
+      "loss": 0.5682,
+      "step": 30
+    },
+    {
+      "epoch": 0.11042097998619738,
+      "grad_norm": 0.4434720575809479,
+      "learning_rate": 3.669724770642202e-05,
+      "loss": 0.5232,
+      "step": 40
+    },
+    {
+      "epoch": 0.13802622498274672,
+      "grad_norm": 0.44054797291755676,
+      "learning_rate": 4.587155963302753e-05,
+      "loss": 0.5084,
+      "step": 50
+    },
+    {
+      "epoch": 0.16563146997929606,
+      "grad_norm": 0.42256447672843933,
+      "learning_rate": 5.504587155963303e-05,
+      "loss": 0.477,
+      "step": 60
+    },
+    {
+      "epoch": 0.1932367149758454,
+      "grad_norm": 0.4349405765533447,
+      "learning_rate": 6.422018348623854e-05,
+      "loss": 0.4841,
+      "step": 70
+    },
+    {
+      "epoch": 0.22084195997239475,
+      "grad_norm": 0.4515930712223053,
+      "learning_rate": 7.339449541284404e-05,
+      "loss": 0.4704,
+      "step": 80
+    },
+    {
+      "epoch": 0.2484472049689441,
+      "grad_norm": 0.45412737131118774,
+      "learning_rate": 8.256880733944955e-05,
+      "loss": 0.4718,
+      "step": 90
+    },
+    {
+      "epoch": 0.27605244996549344,
+      "grad_norm": 0.49010995030403137,
+      "learning_rate": 9.174311926605506e-05,
+      "loss": 0.4496,
+      "step": 100
+    },
+    {
+      "epoch": 0.3036576949620428,
+      "grad_norm": 0.4931396245956421,
+      "learning_rate": 9.999974150612772e-05,
+      "loss": 0.4524,
+      "step": 110
+    },
+    {
+      "epoch": 0.33126293995859213,
+      "grad_norm": 1.1270735263824463,
+      "learning_rate": 9.996872547536591e-05,
+      "loss": 0.4503,
+      "step": 120
+    },
+    {
+      "epoch": 0.3588681849551415,
+      "grad_norm": 0.48991507291793823,
+      "learning_rate": 9.988604741439287e-05,
+      "loss": 0.4399,
+      "step": 130
+    },
+    {
+      "epoch": 0.3864734299516908,
+      "grad_norm": 0.45801088213920593,
+      "learning_rate": 9.975179280300506e-05,
+      "loss": 0.4524,
+      "step": 140
+    },
+    {
+      "epoch": 0.4140786749482402,
+      "grad_norm": 0.420897901058197,
+      "learning_rate": 9.956610044533896e-05,
+      "loss": 0.4281,
+      "step": 150
+    },
+    {
+      "epoch": 0.4416839199447895,
+      "grad_norm": 0.4336962103843689,
+      "learning_rate": 9.932916232636318e-05,
+      "loss": 0.4305,
+      "step": 160
+    },
+    {
+      "epoch": 0.4692891649413389,
+      "grad_norm": 0.44120800495147705,
+      "learning_rate": 9.904122341338765e-05,
+      "loss": 0.4208,
+      "step": 170
+    },
+    {
+      "epoch": 0.4968944099378882,
+      "grad_norm": 0.9154078364372253,
+      "learning_rate": 9.870258140279503e-05,
+      "loss": 0.4436,
+      "step": 180
+    },
+    {
+      "epoch": 0.5244996549344375,
+      "grad_norm": 0.4551916718482971,
+      "learning_rate": 9.831358641225624e-05,
+      "loss": 0.4288,
+      "step": 190
+    },
+    {
+      "epoch": 0.5521048999309869,
+      "grad_norm": 0.4513665437698364,
+      "learning_rate": 9.787464061874825e-05,
+      "loss": 0.4384,
+      "step": 200
+    },
+    {
+      "epoch": 0.5797101449275363,
+      "grad_norm": 0.43779632449150085,
+      "learning_rate": 9.738619784274833e-05,
+      "loss": 0.4178,
+      "step": 210
+    },
+    {
+      "epoch": 0.6073153899240856,
+      "grad_norm": 0.4170076847076416,
+      "learning_rate": 9.684876307903494e-05,
+      "loss": 0.42,
+      "step": 220
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.4370488226413727,
+      "learning_rate": 9.626289197457994e-05,
+      "loss": 0.4296,
+      "step": 230
+    },
+    {
+      "epoch": 0.6625258799171843,
+      "grad_norm": 0.42547333240509033,
+      "learning_rate": 9.562919025407236e-05,
+      "loss": 0.4264,
+      "step": 240
+    },
+    {
+      "epoch": 0.6901311249137336,
+      "grad_norm": 0.4317057430744171,
+      "learning_rate": 9.494831309366723e-05,
+      "loss": 0.4052,
+      "step": 250
+    },
+    {
+      "epoch": 0.717736369910283,
+      "grad_norm": 0.40589675307273865,
+      "learning_rate": 9.422096444360735e-05,
+      "loss": 0.41,
+      "step": 260
+    },
+    {
+      "epoch": 0.7453416149068323,
+      "grad_norm": 0.44671744108200073,
+      "learning_rate": 9.34478963004181e-05,
+      "loss": 0.4162,
+      "step": 270
+    },
+    {
+      "epoch": 0.7729468599033816,
+      "grad_norm": 0.41162508726119995,
+      "learning_rate": 9.262990792942768e-05,
+      "loss": 0.4183,
+      "step": 280
+    },
+    {
+      "epoch": 0.800552104899931,
+      "grad_norm": 0.483149915933609,
+      "learning_rate": 9.176784503841697e-05,
+      "loss": 0.4174,
+      "step": 290
+    },
+    {
+      "epoch": 0.8281573498964804,
+      "grad_norm": 0.4605332612991333,
+      "learning_rate": 9.086259890325297e-05,
+      "loss": 0.4191,
+      "step": 300
+    },
+    {
+      "epoch": 0.8557625948930296,
+      "grad_norm": 0.4153307378292084,
+      "learning_rate": 8.991510544640991e-05,
+      "loss": 0.4253,
+      "step": 310
+    },
+    {
+      "epoch": 0.883367839889579,
+      "grad_norm": 0.43806084990501404,
+      "learning_rate": 8.892634426933106e-05,
+      "loss": 0.4265,
+      "step": 320
+    },
+    {
+      "epoch": 0.9109730848861284,
+      "grad_norm": 0.45412200689315796,
+      "learning_rate": 8.78973376396311e-05,
+      "loss": 0.4365,
+      "step": 330
+    },
+    {
+      "epoch": 0.9385783298826778,
+      "grad_norm": 0.3769752085208893,
+      "learning_rate": 8.682914943418676e-05,
+      "loss": 0.4058,
+      "step": 340
+    },
+    {
+      "epoch": 0.966183574879227,
+      "grad_norm": 0.4275883436203003,
+      "learning_rate": 8.572288403920792e-05,
+      "loss": 0.4078,
+      "step": 350
+    },
+    {
+      "epoch": 0.9937888198757764,
+      "grad_norm": 0.43371307849884033,
+      "learning_rate": 8.45796852084268e-05,
+      "loss": 0.4063,
+      "step": 360
+    },
+    {
+      "epoch": 1.0193236714975846,
+      "grad_norm": 0.4527032673358917,
+      "learning_rate": 8.340073488058552e-05,
+      "loss": 0.3742,
+      "step": 370
+    },
+    {
+      "epoch": 1.0469289164941338,
+      "grad_norm": 0.5205631256103516,
+      "learning_rate": 8.218725195744463e-05,
+      "loss": 0.3809,
+      "step": 380
+    },
+    {
+      "epoch": 1.0745341614906831,
+      "grad_norm": 0.4031950533390045,
+      "learning_rate": 8.094049104357609e-05,
+      "loss": 0.3823,
+      "step": 390
+    },
+    {
+      "epoch": 1.1021394064872325,
+      "grad_norm": 0.41949087381362915,
+      "learning_rate": 7.966174114924351e-05,
+      "loss": 0.3765,
+      "step": 400
+    },
+    {
+      "epoch": 1.129744651483782,
+      "grad_norm": 0.43814027309417725,
+      "learning_rate": 7.83523243577109e-05,
+      "loss": 0.3751,
+      "step": 410
+    },
+    {
+      "epoch": 1.1573498964803313,
+      "grad_norm": 0.4457204341888428,
+      "learning_rate": 7.70135944583575e-05,
+      "loss": 0.3869,
+      "step": 420
+    },
+    {
+      "epoch": 1.1849551414768806,
+      "grad_norm": 0.41421836614608765,
+      "learning_rate": 7.56469355470122e-05,
+      "loss": 0.3634,
+      "step": 430
+    },
+    {
+      "epoch": 1.21256038647343,
+      "grad_norm": 0.4416670799255371,
+      "learning_rate": 7.425376059495442e-05,
+      "loss": 0.3768,
+      "step": 440
+    },
+    {
+      "epoch": 1.2401656314699794,
+      "grad_norm": 0.44710710644721985,
+      "learning_rate": 7.283550998806108e-05,
+      "loss": 0.3669,
+      "step": 450
+    },
+    {
+      "epoch": 1.2677708764665288,
+      "grad_norm": 0.39852890372276306,
+      "learning_rate": 7.139365003760999e-05,
+      "loss": 0.3824,
+      "step": 460
+    },
+    {
+      "epoch": 1.295376121463078,
+      "grad_norm": 0.4412725269794464,
+      "learning_rate": 6.992967146427913e-05,
+      "loss": 0.3646,
+      "step": 470
+    },
+    {
+      "epoch": 1.3229813664596273,
+      "grad_norm": 0.41978228092193604,
+      "learning_rate": 6.844508785690964e-05,
+      "loss": 0.3755,
+      "step": 480
+    },
+    {
+      "epoch": 1.3505866114561766,
+      "grad_norm": 0.4214731752872467,
+      "learning_rate": 6.694143410762542e-05,
+      "loss": 0.3841,
+      "step": 490
+    },
+    {
+      "epoch": 1.378191856452726,
+      "grad_norm": 0.4128514230251312,
+      "learning_rate": 6.54202648249278e-05,
+      "loss": 0.3839,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1086,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.837431317598044e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fd86c75750949f0ca2ee56bc27dadb57430a90de
--- /dev/null
+++ b/checkpoint-500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52a6484c213110d668e89b1ff8d77bac863e0460a3e92ff200a8df3f14879a5
+size 5688
diff --git a/pissa_backup/README.md b/pissa_backup/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d823a0d9f612b6fd128ea17ebbddb8df140520ef
--- /dev/null
+++ b/pissa_backup/README.md
@@ -0,0 +1,202 @@
+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/pissa_backup/adapter_config.json b/pissa_backup/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..df89fe583f3943b9bfeca2c52b5233866c00a110
--- /dev/null
+++ b/pissa_backup/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/pissa_backup/adapter_model.safetensors b/pissa_backup/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..476a84d19c4420d1d508ed6ebb6e7ce24ac1217b
--- /dev/null
+++ b/pissa_backup/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d2b60858f8196e9c42a81145ebed482335b25f140656f334689c5cd28feb329
+size 268555264
diff --git a/pissa_converted/README.md b/pissa_converted/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d823a0d9f612b6fd128ea17ebbddb8df140520ef
--- /dev/null
+++ b/pissa_converted/README.md
@@ -0,0 +1,202 @@
+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/pissa_converted/adapter_config.json b/pissa_converted/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d04cab88dd3f79b639159ddee01d4fe060aa22c2
--- /dev/null
+++ b/pissa_converted/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/pissa_converted/adapter_model.safetensors b/pissa_converted/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3e5c02f3fe68ab34acad6fa189a2c85e7f786d71
--- /dev/null
+++ b/pissa_converted/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbfc40b8ec9b2f06cd550e2a9d01cf1c316504db097c80f10ae188c4bc579292
+size 536991984
diff --git a/pissa_init/README.md b/pissa_init/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d823a0d9f612b6fd128ea17ebbddb8df140520ef
--- /dev/null
+++ b/pissa_init/README.md
@@ -0,0 +1,202 @@
+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+library_name: peft
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/pissa_init/adapter_config.json b/pissa_init/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c76b6552676cd51a98750e2f26c3d85b375090d5
--- /dev/null
+++ b/pissa_init/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/pissa_init/adapter_model.safetensors b/pissa_init/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..215bd57118598136cf03c9630dd90dfa6b35d525
--- /dev/null
+++ b/pissa_init/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:304735ac6297f3e0dfc0131f190887142e8d1c539967263d1154c4e62620a739
+size 268555264
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
+size 11422778
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0a414ab9b6f7fec711d4c1346f5847dd0d5bd0ff
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,197 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<｜end▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<｜User｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151645": {
+      "content": "<｜Assistant｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151646": {
+      "content": "<｜begin▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|EOT|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151648": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151649": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<｜begin▁of▁sentence｜>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<｜end▁of▁sentence｜>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "<｜end▁of▁sentence｜>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "split_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizerFast",
+  "unk_token": null,
+  "use_default_system_prompt": false
+}
diff --git a/train_results.json b/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6f40b85634398802ef051edb9b192fe596a15ba6
--- /dev/null
+++ b/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 2.9937888198757765,
+    "total_flos": 6.17252944434797e+18,
+    "train_loss": 0.3891050570797086,
+    "train_runtime": 6764.7829,
+    "train_samples_per_second": 5.14,
+    "train_steps_per_second": 0.161
+}
\ No newline at end of file
diff --git a/trainer_log.jsonl b/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..c7490982d06fd4b759317c4abbbd8741eda6efa9
--- /dev/null
+++ b/trainer_log.jsonl
@@ -0,0 +1,109 @@
+{"current_steps": 10, "total_steps": 1086, "loss": 0.8102, "lr": 9.174311926605506e-06, "epoch": 0.027605244996549344, "percentage": 0.92, "elapsed_time": "0:01:04", "remaining_time": "1:56:24"}
+{"current_steps": 20, "total_steps": 1086, "loss": 0.6999, "lr": 1.834862385321101e-05, "epoch": 0.05521048999309869, "percentage": 1.84, "elapsed_time": "0:02:07", "remaining_time": "1:53:04"}
+{"current_steps": 30, "total_steps": 1086, "loss": 0.5682, "lr": 2.7522935779816515e-05, "epoch": 0.08281573498964803, "percentage": 2.76, "elapsed_time": "0:03:09", "remaining_time": "1:50:53"}
+{"current_steps": 40, "total_steps": 1086, "loss": 0.5232, "lr": 3.669724770642202e-05, "epoch": 0.11042097998619738, "percentage": 3.68, "elapsed_time": "0:04:09", "remaining_time": "1:48:39"}
+{"current_steps": 50, "total_steps": 1086, "loss": 0.5084, "lr": 4.587155963302753e-05, "epoch": 0.13802622498274672, "percentage": 4.6, "elapsed_time": "0:05:10", "remaining_time": "1:47:12"}
+{"current_steps": 60, "total_steps": 1086, "loss": 0.477, "lr": 5.504587155963303e-05, "epoch": 0.16563146997929606, "percentage": 5.52, "elapsed_time": "0:06:11", "remaining_time": "1:45:48"}
+{"current_steps": 70, "total_steps": 1086, "loss": 0.4841, "lr": 6.422018348623854e-05, "epoch": 0.1932367149758454, "percentage": 6.45, "elapsed_time": "0:07:14", "remaining_time": "1:45:06"}
+{"current_steps": 80, "total_steps": 1086, "loss": 0.4704, "lr": 7.339449541284404e-05, "epoch": 0.22084195997239475, "percentage": 7.37, "elapsed_time": "0:08:16", "remaining_time": "1:44:06"}
+{"current_steps": 90, "total_steps": 1086, "loss": 0.4718, "lr": 8.256880733944955e-05, "epoch": 0.2484472049689441, "percentage": 8.29, "elapsed_time": "0:09:17", "remaining_time": "1:42:54"}
+{"current_steps": 100, "total_steps": 1086, "loss": 0.4496, "lr": 9.174311926605506e-05, "epoch": 0.27605244996549344, "percentage": 9.21, "elapsed_time": "0:10:21", "remaining_time": "1:42:05"}
+{"current_steps": 110, "total_steps": 1086, "loss": 0.4524, "lr": 9.999974150612772e-05, "epoch": 0.3036576949620428, "percentage": 10.13, "elapsed_time": "0:11:24", "remaining_time": "1:41:15"}
+{"current_steps": 120, "total_steps": 1086, "loss": 0.4503, "lr": 9.996872547536591e-05, "epoch": 0.33126293995859213, "percentage": 11.05, "elapsed_time": "0:12:26", "remaining_time": "1:40:13"}
+{"current_steps": 130, "total_steps": 1086, "loss": 0.4399, "lr": 9.988604741439287e-05, "epoch": 0.3588681849551415, "percentage": 11.97, "elapsed_time": "0:13:30", "remaining_time": "1:39:17"}
+{"current_steps": 140, "total_steps": 1086, "loss": 0.4524, "lr": 9.975179280300506e-05, "epoch": 0.3864734299516908, "percentage": 12.89, "elapsed_time": "0:14:32", "remaining_time": "1:38:12"}
+{"current_steps": 150, "total_steps": 1086, "loss": 0.4281, "lr": 9.956610044533896e-05, "epoch": 0.4140786749482402, "percentage": 13.81, "elapsed_time": "0:15:36", "remaining_time": "1:37:25"}
+{"current_steps": 160, "total_steps": 1086, "loss": 0.4305, "lr": 9.932916232636318e-05, "epoch": 0.4416839199447895, "percentage": 14.73, "elapsed_time": "0:16:39", "remaining_time": "1:36:23"}
+{"current_steps": 170, "total_steps": 1086, "loss": 0.4208, "lr": 9.904122341338765e-05, "epoch": 0.4692891649413389, "percentage": 15.65, "elapsed_time": "0:17:42", "remaining_time": "1:35:25"}
+{"current_steps": 180, "total_steps": 1086, "loss": 0.4436, "lr": 9.870258140279503e-05, "epoch": 0.4968944099378882, "percentage": 16.57, "elapsed_time": "0:18:43", "remaining_time": "1:34:14"}
+{"current_steps": 190, "total_steps": 1086, "loss": 0.4288, "lr": 9.831358641225624e-05, "epoch": 0.5244996549344375, "percentage": 17.5, "elapsed_time": "0:19:45", "remaining_time": "1:33:09"}
+{"current_steps": 200, "total_steps": 1086, "loss": 0.4384, "lr": 9.787464061874825e-05, "epoch": 0.5521048999309869, "percentage": 18.42, "elapsed_time": "0:20:48", "remaining_time": "1:32:11"}
+{"current_steps": 210, "total_steps": 1086, "loss": 0.4178, "lr": 9.738619784274833e-05, "epoch": 0.5797101449275363, "percentage": 19.34, "elapsed_time": "0:21:50", "remaining_time": "1:31:07"}
+{"current_steps": 220, "total_steps": 1086, "loss": 0.42, "lr": 9.684876307903494e-05, "epoch": 0.6073153899240856, "percentage": 20.26, "elapsed_time": "0:22:50", "remaining_time": "1:29:56"}
+{"current_steps": 230, "total_steps": 1086, "loss": 0.4296, "lr": 9.626289197457994e-05, "epoch": 0.6349206349206349, "percentage": 21.18, "elapsed_time": "0:23:52", "remaining_time": "1:28:53"}
+{"current_steps": 240, "total_steps": 1086, "loss": 0.4264, "lr": 9.562919025407236e-05, "epoch": 0.6625258799171843, "percentage": 22.1, "elapsed_time": "0:24:56", "remaining_time": "1:27:55"}
+{"current_steps": 250, "total_steps": 1086, "loss": 0.4052, "lr": 9.494831309366723e-05, "epoch": 0.6901311249137336, "percentage": 23.02, "elapsed_time": "0:26:00", "remaining_time": "1:26:58"}
+{"current_steps": 260, "total_steps": 1086, "loss": 0.41, "lr": 9.422096444360735e-05, "epoch": 0.717736369910283, "percentage": 23.94, "elapsed_time": "0:27:02", "remaining_time": "1:25:54"}
+{"current_steps": 270, "total_steps": 1086, "loss": 0.4162, "lr": 9.34478963004181e-05, "epoch": 0.7453416149068323, "percentage": 24.86, "elapsed_time": "0:28:04", "remaining_time": "1:24:51"}
+{"current_steps": 280, "total_steps": 1086, "loss": 0.4183, "lr": 9.262990792942768e-05, "epoch": 0.7729468599033816, "percentage": 25.78, "elapsed_time": "0:29:09", "remaining_time": "1:23:54"}
+{"current_steps": 290, "total_steps": 1086, "loss": 0.4174, "lr": 9.176784503841697e-05, "epoch": 0.800552104899931, "percentage": 26.7, "elapsed_time": "0:30:09", "remaining_time": "1:22:45"}
+{"current_steps": 300, "total_steps": 1086, "loss": 0.4191, "lr": 9.086259890325297e-05, "epoch": 0.8281573498964804, "percentage": 27.62, "elapsed_time": "0:31:10", "remaining_time": "1:21:41"}
+{"current_steps": 310, "total_steps": 1086, "loss": 0.4253, "lr": 8.991510544640991e-05, "epoch": 0.8557625948930296, "percentage": 28.55, "elapsed_time": "0:32:11", "remaining_time": "1:20:35"}
+{"current_steps": 320, "total_steps": 1086, "loss": 0.4265, "lr": 8.892634426933106e-05, "epoch": 0.883367839889579, "percentage": 29.47, "elapsed_time": "0:33:13", "remaining_time": "1:19:32"}
+{"current_steps": 330, "total_steps": 1086, "loss": 0.4365, "lr": 8.78973376396311e-05, "epoch": 0.9109730848861284, "percentage": 30.39, "elapsed_time": "0:34:15", "remaining_time": "1:18:29"}
+{"current_steps": 340, "total_steps": 1086, "loss": 0.4058, "lr": 8.682914943418676e-05, "epoch": 0.9385783298826778, "percentage": 31.31, "elapsed_time": "0:35:17", "remaining_time": "1:17:24"}
+{"current_steps": 350, "total_steps": 1086, "loss": 0.4078, "lr": 8.572288403920792e-05, "epoch": 0.966183574879227, "percentage": 32.23, "elapsed_time": "0:36:18", "remaining_time": "1:16:21"}
+{"current_steps": 360, "total_steps": 1086, "loss": 0.4063, "lr": 8.45796852084268e-05, "epoch": 0.9937888198757764, "percentage": 33.15, "elapsed_time": "0:37:20", "remaining_time": "1:15:18"}
+{"current_steps": 370, "total_steps": 1086, "loss": 0.3742, "lr": 8.340073488058552e-05, "epoch": 1.0193236714975846, "percentage": 34.07, "elapsed_time": "0:38:16", "remaining_time": "1:14:03"}
+{"current_steps": 380, "total_steps": 1086, "loss": 0.3809, "lr": 8.218725195744463e-05, "epoch": 1.0469289164941338, "percentage": 34.99, "elapsed_time": "0:39:18", "remaining_time": "1:13:01"}
+{"current_steps": 390, "total_steps": 1086, "loss": 0.3823, "lr": 8.094049104357609e-05, "epoch": 1.0745341614906831, "percentage": 35.91, "elapsed_time": "0:40:20", "remaining_time": "1:11:59"}
+{"current_steps": 400, "total_steps": 1086, "loss": 0.3765, "lr": 7.966174114924351e-05, "epoch": 1.1021394064872325, "percentage": 36.83, "elapsed_time": "0:41:24", "remaining_time": "1:11:01"}
+{"current_steps": 410, "total_steps": 1086, "loss": 0.3751, "lr": 7.83523243577109e-05, "epoch": 1.129744651483782, "percentage": 37.75, "elapsed_time": "0:42:26", "remaining_time": "1:09:58"}
+{"current_steps": 420, "total_steps": 1086, "loss": 0.3869, "lr": 7.70135944583575e-05, "epoch": 1.1573498964803313, "percentage": 38.67, "elapsed_time": "0:43:29", "remaining_time": "1:08:57"}
+{"current_steps": 430, "total_steps": 1086, "loss": 0.3634, "lr": 7.56469355470122e-05, "epoch": 1.1849551414768806, "percentage": 39.59, "elapsed_time": "0:44:30", "remaining_time": "1:07:53"}
+{"current_steps": 440, "total_steps": 1086, "loss": 0.3768, "lr": 7.425376059495442e-05, "epoch": 1.21256038647343, "percentage": 40.52, "elapsed_time": "0:45:32", "remaining_time": "1:06:51"}
+{"current_steps": 450, "total_steps": 1086, "loss": 0.3669, "lr": 7.283550998806108e-05, "epoch": 1.2401656314699794, "percentage": 41.44, "elapsed_time": "0:46:33", "remaining_time": "1:05:47"}
+{"current_steps": 460, "total_steps": 1086, "loss": 0.3824, "lr": 7.139365003760999e-05, "epoch": 1.2677708764665288, "percentage": 42.36, "elapsed_time": "0:47:34", "remaining_time": "1:04:44"}
+{"current_steps": 470, "total_steps": 1086, "loss": 0.3646, "lr": 6.992967146427913e-05, "epoch": 1.295376121463078, "percentage": 43.28, "elapsed_time": "0:48:37", "remaining_time": "1:03:43"}
+{"current_steps": 480, "total_steps": 1086, "loss": 0.3755, "lr": 6.844508785690964e-05, "epoch": 1.3229813664596273, "percentage": 44.2, "elapsed_time": "0:49:36", "remaining_time": "1:02:38"}
+{"current_steps": 490, "total_steps": 1086, "loss": 0.3841, "lr": 6.694143410762542e-05, "epoch": 1.3505866114561766, "percentage": 45.12, "elapsed_time": "0:50:37", "remaining_time": "1:01:34"}
+{"current_steps": 500, "total_steps": 1086, "loss": 0.3839, "lr": 6.54202648249278e-05, "epoch": 1.378191856452726, "percentage": 46.04, "elapsed_time": "0:51:41", "remaining_time": "1:00:34"}
+{"current_steps": 510, "total_steps": 1086, "loss": 0.3726, "lr": 6.388315272640544e-05, "epoch": 1.4057971014492754, "percentage": 46.96, "elapsed_time": "0:52:45", "remaining_time": "0:59:34"}
+{"current_steps": 520, "total_steps": 1086, "loss": 0.3722, "lr": 6.233168701272167e-05, "epoch": 1.4334023464458248, "percentage": 47.88, "elapsed_time": "0:53:45", "remaining_time": "0:58:31"}
+{"current_steps": 530, "total_steps": 1086, "loss": 0.3623, "lr": 6.076747172456015e-05, "epoch": 1.4610075914423741, "percentage": 48.8, "elapsed_time": "0:54:49", "remaining_time": "0:57:30"}
+{"current_steps": 540, "total_steps": 1086, "loss": 0.3684, "lr": 5.919212408422753e-05, "epoch": 1.4886128364389233, "percentage": 49.72, "elapsed_time": "0:55:49", "remaining_time": "0:56:27"}
+{"current_steps": 550, "total_steps": 1086, "loss": 0.3674, "lr": 5.76072728236279e-05, "epoch": 1.5162180814354729, "percentage": 50.64, "elapsed_time": "0:56:54", "remaining_time": "0:55:27"}
+{"current_steps": 560, "total_steps": 1086, "loss": 0.3602, "lr": 5.6014556500337534e-05, "epoch": 1.543823326432022, "percentage": 51.57, "elapsed_time": "0:57:55", "remaining_time": "0:54:24"}
+{"current_steps": 570, "total_steps": 1086, "loss": 0.3872, "lr": 5.44156218035211e-05, "epoch": 1.5714285714285714, "percentage": 52.49, "elapsed_time": "0:58:59", "remaining_time": "0:53:24"}
+{"current_steps": 580, "total_steps": 1086, "loss": 0.3678, "lr": 5.28121218514406e-05, "epoch": 1.5990338164251208, "percentage": 53.41, "elapsed_time": "1:00:01", "remaining_time": "0:52:22"}
+{"current_steps": 590, "total_steps": 1086, "loss": 0.3652, "lr": 5.1205714482317455e-05, "epoch": 1.6266390614216701, "percentage": 54.33, "elapsed_time": "1:01:05", "remaining_time": "0:51:21"}
+{"current_steps": 600, "total_steps": 1086, "loss": 0.3786, "lr": 4.95980605403146e-05, "epoch": 1.6542443064182195, "percentage": 55.25, "elapsed_time": "1:02:10", "remaining_time": "0:50:21"}
+{"current_steps": 610, "total_steps": 1086, "loss": 0.3715, "lr": 4.79908221584108e-05, "epoch": 1.6818495514147687, "percentage": 56.17, "elapsed_time": "1:03:13", "remaining_time": "0:49:19"}
+{"current_steps": 620, "total_steps": 1086, "loss": 0.386, "lr": 4.638566103994258e-05, "epoch": 1.7094547964113183, "percentage": 57.09, "elapsed_time": "1:04:14", "remaining_time": "0:48:17"}
+{"current_steps": 630, "total_steps": 1086, "loss": 0.3723, "lr": 4.478423674059015e-05, "epoch": 1.7370600414078674, "percentage": 58.01, "elapsed_time": "1:05:19", "remaining_time": "0:47:16"}
+{"current_steps": 640, "total_steps": 1086, "loss": 0.3794, "lr": 4.318820495258396e-05, "epoch": 1.764665286404417, "percentage": 58.93, "elapsed_time": "1:06:21", "remaining_time": "0:46:14"}
+{"current_steps": 650, "total_steps": 1086, "loss": 0.3641, "lr": 4.159921579290546e-05, "epoch": 1.7922705314009661, "percentage": 59.85, "elapsed_time": "1:07:24", "remaining_time": "0:45:13"}
+{"current_steps": 660, "total_steps": 1086, "loss": 0.3727, "lr": 4.0018912097252234e-05, "epoch": 1.8198757763975155, "percentage": 60.77, "elapsed_time": "1:08:27", "remaining_time": "0:44:11"}
+{"current_steps": 670, "total_steps": 1086, "loss": 0.3666, "lr": 3.8448927721530967e-05, "epoch": 1.847481021394065, "percentage": 61.69, "elapsed_time": "1:09:31", "remaining_time": "0:43:09"}
+{"current_steps": 680, "total_steps": 1086, "loss": 0.3707, "lr": 3.6890885852634635e-05, "epoch": 1.8750862663906143, "percentage": 62.62, "elapsed_time": "1:10:33", "remaining_time": "0:42:07"}
+{"current_steps": 690, "total_steps": 1086, "loss": 0.3793, "lr": 3.534639733025017e-05, "epoch": 1.9026915113871636, "percentage": 63.54, "elapsed_time": "1:11:36", "remaining_time": "0:41:05"}
+{"current_steps": 700, "total_steps": 1086, "loss": 0.3623, "lr": 3.3817058981431784e-05, "epoch": 1.9302967563837128, "percentage": 64.46, "elapsed_time": "1:12:38", "remaining_time": "0:40:03"}
+{"current_steps": 710, "total_steps": 1086, "loss": 0.3564, "lr": 3.230445196966181e-05, "epoch": 1.9579020013802624, "percentage": 65.38, "elapsed_time": "1:13:41", "remaining_time": "0:39:01"}
+{"current_steps": 720, "total_steps": 1086, "loss": 0.3681, "lr": 3.081014016010584e-05, "epoch": 1.9855072463768115, "percentage": 66.3, "elapsed_time": "1:14:41", "remaining_time": "0:37:58"}
+{"current_steps": 730, "total_steps": 1086, "loss": 0.359, "lr": 2.9335668502752394e-05, "epoch": 2.0110420979986197, "percentage": 67.22, "elapsed_time": "1:15:37", "remaining_time": "0:36:52"}
+{"current_steps": 740, "total_steps": 1086, "loss": 0.3189, "lr": 2.7882561435108824e-05, "epoch": 2.0386473429951693, "percentage": 68.14, "elapsed_time": "1:16:40", "remaining_time": "0:35:50"}
+{"current_steps": 750, "total_steps": 1086, "loss": 0.3409, "lr": 2.6452321306104634e-05, "epoch": 2.0662525879917184, "percentage": 69.06, "elapsed_time": "1:17:44", "remaining_time": "0:34:49"}
+{"current_steps": 760, "total_steps": 1086, "loss": 0.3354, "lr": 2.5046426822832175e-05, "epoch": 2.0938578329882676, "percentage": 69.98, "elapsed_time": "1:18:46", "remaining_time": "0:33:47"}
+{"current_steps": 770, "total_steps": 1086, "loss": 0.3366, "lr": 2.3666331521730024e-05, "epoch": 2.121463077984817, "percentage": 70.9, "elapsed_time": "1:19:48", "remaining_time": "0:32:45"}
+{"current_steps": 780, "total_steps": 1086, "loss": 0.3231, "lr": 2.2313462265790196e-05, "epoch": 2.1490683229813663, "percentage": 71.82, "elapsed_time": "1:20:49", "remaining_time": "0:31:42"}
+{"current_steps": 790, "total_steps": 1086, "loss": 0.3333, "lr": 2.098921776934269e-05, "epoch": 2.176673567977916, "percentage": 72.74, "elapsed_time": "1:21:52", "remaining_time": "0:30:40"}
+{"current_steps": 800, "total_steps": 1086, "loss": 0.3401, "lr": 1.96949671519424e-05, "epoch": 2.204278812974465, "percentage": 73.66, "elapsed_time": "1:22:52", "remaining_time": "0:29:37"}
+{"current_steps": 810, "total_steps": 1086, "loss": 0.3453, "lr": 1.843204852285389e-05, "epoch": 2.2318840579710146, "percentage": 74.59, "elapsed_time": "1:23:53", "remaining_time": "0:28:35"}
+{"current_steps": 820, "total_steps": 1086, "loss": 0.338, "lr": 1.7201767597597196e-05, "epoch": 2.259489302967564, "percentage": 75.51, "elapsed_time": "1:24:57", "remaining_time": "0:27:33"}
+{"current_steps": 830, "total_steps": 1086, "loss": 0.3334, "lr": 1.60053963479852e-05, "epoch": 2.287094547964113, "percentage": 76.43, "elapsed_time": "1:25:58", "remaining_time": "0:26:31"}
+{"current_steps": 840, "total_steps": 1086, "loss": 0.3359, "lr": 1.4844171687048058e-05, "epoch": 2.3146997929606625, "percentage": 77.35, "elapsed_time": "1:27:02", "remaining_time": "0:25:29"}
+{"current_steps": 850, "total_steps": 1086, "loss": 0.3534, "lr": 1.371929419020459e-05, "epoch": 2.3423050379572117, "percentage": 78.27, "elapsed_time": "1:28:03", "remaining_time": "0:24:27"}
+{"current_steps": 860, "total_steps": 1086, "loss": 0.3247, "lr": 1.2631926854002574e-05, "epoch": 2.3699102829537613, "percentage": 79.19, "elapsed_time": "1:29:06", "remaining_time": "0:23:25"}
+{"current_steps": 870, "total_steps": 1086, "loss": 0.3309, "lr": 1.1583193893711475e-05, "epoch": 2.3975155279503104, "percentage": 80.11, "elapsed_time": "1:30:09", "remaining_time": "0:22:23"}
+{"current_steps": 880, "total_steps": 1086, "loss": 0.3408, "lr": 1.0574179581010468e-05, "epoch": 2.42512077294686, "percentage": 81.03, "elapsed_time": "1:31:11", "remaining_time": "0:21:20"}
+{"current_steps": 890, "total_steps": 1086, "loss": 0.3338, "lr": 9.60592712297379e-06, "epoch": 2.452726017943409, "percentage": 81.95, "elapsed_time": "1:32:12", "remaining_time": "0:20:18"}
+{"current_steps": 900, "total_steps": 1086, "loss": 0.3398, "lr": 8.679437583512168e-06, "epoch": 2.4803312629399588, "percentage": 82.87, "elapsed_time": "1:33:12", "remaining_time": "0:19:15"}
+{"current_steps": 910, "total_steps": 1086, "loss": 0.333, "lr": 7.795668848385623e-06, "epoch": 2.507936507936508, "percentage": 83.79, "elapsed_time": "1:34:17", "remaining_time": "0:18:14"}
+{"current_steps": 920, "total_steps": 1086, "loss": 0.342, "lr": 6.95553463485748e-06, "epoch": 2.5355417529330575, "percentage": 84.71, "elapsed_time": "1:35:17", "remaining_time": "0:17:11"}
+{"current_steps": 930, "total_steps": 1086, "loss": 0.3335, "lr": 6.159903547013746e-06, "epoch": 2.5631469979296067, "percentage": 85.64, "elapsed_time": "1:36:21", "remaining_time": "0:16:09"}
+{"current_steps": 940, "total_steps": 1086, "loss": 0.3426, "lr": 5.409598177724401e-06, "epoch": 2.590752242926156, "percentage": 86.56, "elapsed_time": "1:37:22", "remaining_time": "0:15:07"}
+{"current_steps": 950, "total_steps": 1086, "loss": 0.3463, "lr": 4.7053942581750385e-06, "epoch": 2.6183574879227054, "percentage": 87.48, "elapsed_time": "1:38:22", "remaining_time": "0:14:05"}
+{"current_steps": 960, "total_steps": 1086, "loss": 0.3331, "lr": 4.048019855848273e-06, "epoch": 2.6459627329192545, "percentage": 88.4, "elapsed_time": "1:39:25", "remaining_time": "0:13:03"}
+{"current_steps": 970, "total_steps": 1086, "loss": 0.3422, "lr": 3.438154621784029e-06, "epoch": 2.673567977915804, "percentage": 89.32, "elapsed_time": "1:40:27", "remaining_time": "0:12:00"}
+{"current_steps": 980, "total_steps": 1086, "loss": 0.3262, "lr": 2.8764290878969756e-06, "epoch": 2.7011732229123533, "percentage": 90.24, "elapsed_time": "1:41:27", "remaining_time": "0:10:58"}
+{"current_steps": 990, "total_steps": 1086, "loss": 0.3303, "lr": 2.3634240150775646e-06, "epoch": 2.728778467908903, "percentage": 91.16, "elapsed_time": "1:42:32", "remaining_time": "0:09:56"}
+{"current_steps": 1000, "total_steps": 1086, "loss": 0.3446, "lr": 1.8996697927507468e-06, "epoch": 2.756383712905452, "percentage": 92.08, "elapsed_time": "1:43:34", "remaining_time": "0:08:54"}
+{"current_steps": 1010, "total_steps": 1086, "loss": 0.3309, "lr": 1.4856458905130822e-06, "epoch": 2.783988957902001, "percentage": 93.0, "elapsed_time": "1:44:40", "remaining_time": "0:07:52"}
+{"current_steps": 1020, "total_steps": 1086, "loss": 0.326, "lr": 1.1217803624152311e-06, "epoch": 2.8115942028985508, "percentage": 93.92, "elapsed_time": "1:45:46", "remaining_time": "0:06:50"}
+{"current_steps": 1030, "total_steps": 1086, "loss": 0.3364, "lr": 8.084494044022839e-07, "epoch": 2.8391994478951, "percentage": 94.84, "elapsed_time": "1:46:48", "remaining_time": "0:05:48"}
+{"current_steps": 1040, "total_steps": 1086, "loss": 0.3313, "lr": 5.459769653695657e-07, "epoch": 2.8668046928916495, "percentage": 95.76, "elapsed_time": "1:47:50", "remaining_time": "0:04:46"}
+{"current_steps": 1050, "total_steps": 1086, "loss": 0.328, "lr": 3.346344122360179e-07, "epoch": 2.8944099378881987, "percentage": 96.69, "elapsed_time": "1:48:51", "remaining_time": "0:03:43"}
+{"current_steps": 1060, "total_steps": 1086, "loss": 0.3426, "lr": 1.746402493813415e-07, "epoch": 2.9220151828847483, "percentage": 97.61, "elapsed_time": "1:49:53", "remaining_time": "0:02:41"}
+{"current_steps": 1070, "total_steps": 1086, "loss": 0.3378, "lr": 6.615989273713874e-08, "epoch": 2.9496204278812974, "percentage": 98.53, "elapsed_time": "1:50:59", "remaining_time": "0:01:39"}
+{"current_steps": 1080, "total_steps": 1086, "loss": 0.3358, "lr": 9.305498765438404e-09, "epoch": 2.9772256728778466, "percentage": 99.45, "elapsed_time": "1:52:03", "remaining_time": "0:00:37"}
+{"current_steps": 1086, "total_steps": 1086, "epoch": 2.9937888198757765, "percentage": 100.0, "elapsed_time": "1:52:43", "remaining_time": "0:00:00"}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e2e64e3591e4dec2b26b7f77dbde6e53f919be07
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,798 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.9937888198757765,
+  "eval_steps": 500,
+  "global_step": 1086,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.027605244996549344,
+      "grad_norm": 1.6335422992706299,
+      "learning_rate": 9.174311926605506e-06,
+      "loss": 0.8102,
+      "step": 10
+    },
+    {
+      "epoch": 0.05521048999309869,
+      "grad_norm": 0.8111785054206848,
+      "learning_rate": 1.834862385321101e-05,
+      "loss": 0.6999,
+      "step": 20
+    },
+    {
+      "epoch": 0.08281573498964803,
+      "grad_norm": 0.4619831144809723,
+      "learning_rate": 2.7522935779816515e-05,
+      "loss": 0.5682,
+      "step": 30
+    },
+    {
+      "epoch": 0.11042097998619738,
+      "grad_norm": 0.4434720575809479,
+      "learning_rate": 3.669724770642202e-05,
+      "loss": 0.5232,
+      "step": 40
+    },
+    {
+      "epoch": 0.13802622498274672,
+      "grad_norm": 0.44054797291755676,
+      "learning_rate": 4.587155963302753e-05,
+      "loss": 0.5084,
+      "step": 50
+    },
+    {
+      "epoch": 0.16563146997929606,
+      "grad_norm": 0.42256447672843933,
+      "learning_rate": 5.504587155963303e-05,
+      "loss": 0.477,
+      "step": 60
+    },
+    {
+      "epoch": 0.1932367149758454,
+      "grad_norm": 0.4349405765533447,
+      "learning_rate": 6.422018348623854e-05,
+      "loss": 0.4841,
+      "step": 70
+    },
+    {
+      "epoch": 0.22084195997239475,
+      "grad_norm": 0.4515930712223053,
+      "learning_rate": 7.339449541284404e-05,
+      "loss": 0.4704,
+      "step": 80
+    },
+    {
+      "epoch": 0.2484472049689441,
+      "grad_norm": 0.45412737131118774,
+      "learning_rate": 8.256880733944955e-05,
+      "loss": 0.4718,
+      "step": 90
+    },
+    {
+      "epoch": 0.27605244996549344,
+      "grad_norm": 0.49010995030403137,
+      "learning_rate": 9.174311926605506e-05,
+      "loss": 0.4496,
+      "step": 100
+    },
+    {
+      "epoch": 0.3036576949620428,
+      "grad_norm": 0.4931396245956421,
+      "learning_rate": 9.999974150612772e-05,
+      "loss": 0.4524,
+      "step": 110
+    },
+    {
+      "epoch": 0.33126293995859213,
+      "grad_norm": 1.1270735263824463,
+      "learning_rate": 9.996872547536591e-05,
+      "loss": 0.4503,
+      "step": 120
+    },
+    {
+      "epoch": 0.3588681849551415,
+      "grad_norm": 0.48991507291793823,
+      "learning_rate": 9.988604741439287e-05,
+      "loss": 0.4399,
+      "step": 130
+    },
+    {
+      "epoch": 0.3864734299516908,
+      "grad_norm": 0.45801088213920593,
+      "learning_rate": 9.975179280300506e-05,
+      "loss": 0.4524,
+      "step": 140
+    },
+    {
+      "epoch": 0.4140786749482402,
+      "grad_norm": 0.420897901058197,
+      "learning_rate": 9.956610044533896e-05,
+      "loss": 0.4281,
+      "step": 150
+    },
+    {
+      "epoch": 0.4416839199447895,
+      "grad_norm": 0.4336962103843689,
+      "learning_rate": 9.932916232636318e-05,
+      "loss": 0.4305,
+      "step": 160
+    },
+    {
+      "epoch": 0.4692891649413389,
+      "grad_norm": 0.44120800495147705,
+      "learning_rate": 9.904122341338765e-05,
+      "loss": 0.4208,
+      "step": 170
+    },
+    {
+      "epoch": 0.4968944099378882,
+      "grad_norm": 0.9154078364372253,
+      "learning_rate": 9.870258140279503e-05,
+      "loss": 0.4436,
+      "step": 180
+    },
+    {
+      "epoch": 0.5244996549344375,
+      "grad_norm": 0.4551916718482971,
+      "learning_rate": 9.831358641225624e-05,
+      "loss": 0.4288,
+      "step": 190
+    },
+    {
+      "epoch": 0.5521048999309869,
+      "grad_norm": 0.4513665437698364,
+      "learning_rate": 9.787464061874825e-05,
+      "loss": 0.4384,
+      "step": 200
+    },
+    {
+      "epoch": 0.5797101449275363,
+      "grad_norm": 0.43779632449150085,
+      "learning_rate": 9.738619784274833e-05,
+      "loss": 0.4178,
+      "step": 210
+    },
+    {
+      "epoch": 0.6073153899240856,
+      "grad_norm": 0.4170076847076416,
+      "learning_rate": 9.684876307903494e-05,
+      "loss": 0.42,
+      "step": 220
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.4370488226413727,
+      "learning_rate": 9.626289197457994e-05,
+      "loss": 0.4296,
+      "step": 230
+    },
+    {
+      "epoch": 0.6625258799171843,
+      "grad_norm": 0.42547333240509033,
+      "learning_rate": 9.562919025407236e-05,
+      "loss": 0.4264,
+      "step": 240
+    },
+    {
+      "epoch": 0.6901311249137336,
+      "grad_norm": 0.4317057430744171,
+      "learning_rate": 9.494831309366723e-05,
+      "loss": 0.4052,
+      "step": 250
+    },
+    {
+      "epoch": 0.717736369910283,
+      "grad_norm": 0.40589675307273865,
+      "learning_rate": 9.422096444360735e-05,
+      "loss": 0.41,
+      "step": 260
+    },
+    {
+      "epoch": 0.7453416149068323,
+      "grad_norm": 0.44671744108200073,
+      "learning_rate": 9.34478963004181e-05,
+      "loss": 0.4162,
+      "step": 270
+    },
+    {
+      "epoch": 0.7729468599033816,
+      "grad_norm": 0.41162508726119995,
+      "learning_rate": 9.262990792942768e-05,
+      "loss": 0.4183,
+      "step": 280
+    },
+    {
+      "epoch": 0.800552104899931,
+      "grad_norm": 0.483149915933609,
+      "learning_rate": 9.176784503841697e-05,
+      "loss": 0.4174,
+      "step": 290
+    },
+    {
+      "epoch": 0.8281573498964804,
+      "grad_norm": 0.4605332612991333,
+      "learning_rate": 9.086259890325297e-05,
+      "loss": 0.4191,
+      "step": 300
+    },
+    {
+      "epoch": 0.8557625948930296,
+      "grad_norm": 0.4153307378292084,
+      "learning_rate": 8.991510544640991e-05,
+      "loss": 0.4253,
+      "step": 310
+    },
+    {
+      "epoch": 0.883367839889579,
+      "grad_norm": 0.43806084990501404,
+      "learning_rate": 8.892634426933106e-05,
+      "loss": 0.4265,
+      "step": 320
+    },
+    {
+      "epoch": 0.9109730848861284,
+      "grad_norm": 0.45412200689315796,
+      "learning_rate": 8.78973376396311e-05,
+      "loss": 0.4365,
+      "step": 330
+    },
+    {
+      "epoch": 0.9385783298826778,
+      "grad_norm": 0.3769752085208893,
+      "learning_rate": 8.682914943418676e-05,
+      "loss": 0.4058,
+      "step": 340
+    },
+    {
+      "epoch": 0.966183574879227,
+      "grad_norm": 0.4275883436203003,
+      "learning_rate": 8.572288403920792e-05,
+      "loss": 0.4078,
+      "step": 350
+    },
+    {
+      "epoch": 0.9937888198757764,
+      "grad_norm": 0.43371307849884033,
+      "learning_rate": 8.45796852084268e-05,
+      "loss": 0.4063,
+      "step": 360
+    },
+    {
+      "epoch": 1.0193236714975846,
+      "grad_norm": 0.4527032673358917,
+      "learning_rate": 8.340073488058552e-05,
+      "loss": 0.3742,
+      "step": 370
+    },
+    {
+      "epoch": 1.0469289164941338,
+      "grad_norm": 0.5205631256103516,
+      "learning_rate": 8.218725195744463e-05,
+      "loss": 0.3809,
+      "step": 380
+    },
+    {
+      "epoch": 1.0745341614906831,
+      "grad_norm": 0.4031950533390045,
+      "learning_rate": 8.094049104357609e-05,
+      "loss": 0.3823,
+      "step": 390
+    },
+    {
+      "epoch": 1.1021394064872325,
+      "grad_norm": 0.41949087381362915,
+      "learning_rate": 7.966174114924351e-05,
+      "loss": 0.3765,
+      "step": 400
+    },
+    {
+      "epoch": 1.129744651483782,
+      "grad_norm": 0.43814027309417725,
+      "learning_rate": 7.83523243577109e-05,
+      "loss": 0.3751,
+      "step": 410
+    },
+    {
+      "epoch": 1.1573498964803313,
+      "grad_norm": 0.4457204341888428,
+      "learning_rate": 7.70135944583575e-05,
+      "loss": 0.3869,
+      "step": 420
+    },
+    {
+      "epoch": 1.1849551414768806,
+      "grad_norm": 0.41421836614608765,
+      "learning_rate": 7.56469355470122e-05,
+      "loss": 0.3634,
+      "step": 430
+    },
+    {
+      "epoch": 1.21256038647343,
+      "grad_norm": 0.4416670799255371,
+      "learning_rate": 7.425376059495442e-05,
+      "loss": 0.3768,
+      "step": 440
+    },
+    {
+      "epoch": 1.2401656314699794,
+      "grad_norm": 0.44710710644721985,
+      "learning_rate": 7.283550998806108e-05,
+      "loss": 0.3669,
+      "step": 450
+    },
+    {
+      "epoch": 1.2677708764665288,
+      "grad_norm": 0.39852890372276306,
+      "learning_rate": 7.139365003760999e-05,
+      "loss": 0.3824,
+      "step": 460
+    },
+    {
+      "epoch": 1.295376121463078,
+      "grad_norm": 0.4412725269794464,
+      "learning_rate": 6.992967146427913e-05,
+      "loss": 0.3646,
+      "step": 470
+    },
+    {
+      "epoch": 1.3229813664596273,
+      "grad_norm": 0.41978228092193604,
+      "learning_rate": 6.844508785690964e-05,
+      "loss": 0.3755,
+      "step": 480
+    },
+    {
+      "epoch": 1.3505866114561766,
+      "grad_norm": 0.4214731752872467,
+      "learning_rate": 6.694143410762542e-05,
+      "loss": 0.3841,
+      "step": 490
+    },
+    {
+      "epoch": 1.378191856452726,
+      "grad_norm": 0.4128514230251312,
+      "learning_rate": 6.54202648249278e-05,
+      "loss": 0.3839,
+      "step": 500
+    },
+    {
+      "epoch": 1.4057971014492754,
+      "grad_norm": 0.3899001181125641,
+      "learning_rate": 6.388315272640544e-05,
+      "loss": 0.3726,
+      "step": 510
+    },
+    {
+      "epoch": 1.4334023464458248,
+      "grad_norm": 0.4347754120826721,
+      "learning_rate": 6.233168701272167e-05,
+      "loss": 0.3722,
+      "step": 520
+    },
+    {
+      "epoch": 1.4610075914423741,
+      "grad_norm": 0.3798378109931946,
+      "learning_rate": 6.076747172456015e-05,
+      "loss": 0.3623,
+      "step": 530
+    },
+    {
+      "epoch": 1.4886128364389233,
+      "grad_norm": 0.3879692256450653,
+      "learning_rate": 5.919212408422753e-05,
+      "loss": 0.3684,
+      "step": 540
+    },
+    {
+      "epoch": 1.5162180814354729,
+      "grad_norm": 0.4210754930973053,
+      "learning_rate": 5.76072728236279e-05,
+      "loss": 0.3674,
+      "step": 550
+    },
+    {
+      "epoch": 1.543823326432022,
+      "grad_norm": 0.4184245467185974,
+      "learning_rate": 5.6014556500337534e-05,
+      "loss": 0.3602,
+      "step": 560
+    },
+    {
+      "epoch": 1.5714285714285714,
+      "grad_norm": 0.43027910590171814,
+      "learning_rate": 5.44156218035211e-05,
+      "loss": 0.3872,
+      "step": 570
+    },
+    {
+      "epoch": 1.5990338164251208,
+      "grad_norm": 0.38721945881843567,
+      "learning_rate": 5.28121218514406e-05,
+      "loss": 0.3678,
+      "step": 580
+    },
+    {
+      "epoch": 1.6266390614216701,
+      "grad_norm": 0.4199799597263336,
+      "learning_rate": 5.1205714482317455e-05,
+      "loss": 0.3652,
+      "step": 590
+    },
+    {
+      "epoch": 1.6542443064182195,
+      "grad_norm": 0.40728333592414856,
+      "learning_rate": 4.95980605403146e-05,
+      "loss": 0.3786,
+      "step": 600
+    },
+    {
+      "epoch": 1.6818495514147687,
+      "grad_norm": 0.41107377409935,
+      "learning_rate": 4.79908221584108e-05,
+      "loss": 0.3715,
+      "step": 610
+    },
+    {
+      "epoch": 1.7094547964113183,
+      "grad_norm": 0.45491889119148254,
+      "learning_rate": 4.638566103994258e-05,
+      "loss": 0.386,
+      "step": 620
+    },
+    {
+      "epoch": 1.7370600414078674,
+      "grad_norm": 0.4167945683002472,
+      "learning_rate": 4.478423674059015e-05,
+      "loss": 0.3723,
+      "step": 630
+    },
+    {
+      "epoch": 1.764665286404417,
+      "grad_norm": 0.4188650846481323,
+      "learning_rate": 4.318820495258396e-05,
+      "loss": 0.3794,
+      "step": 640
+    },
+    {
+      "epoch": 1.7922705314009661,
+      "grad_norm": 0.45200666785240173,
+      "learning_rate": 4.159921579290546e-05,
+      "loss": 0.3641,
+      "step": 650
+    },
+    {
+      "epoch": 1.8198757763975155,
+      "grad_norm": 0.42524534463882446,
+      "learning_rate": 4.0018912097252234e-05,
+      "loss": 0.3727,
+      "step": 660
+    },
+    {
+      "epoch": 1.847481021394065,
+      "grad_norm": 0.4238753318786621,
+      "learning_rate": 3.8448927721530967e-05,
+      "loss": 0.3666,
+      "step": 670
+    },
+    {
+      "epoch": 1.8750862663906143,
+      "grad_norm": 0.3949458599090576,
+      "learning_rate": 3.6890885852634635e-05,
+      "loss": 0.3707,
+      "step": 680
+    },
+    {
+      "epoch": 1.9026915113871636,
+      "grad_norm": 0.4040445387363434,
+      "learning_rate": 3.534639733025017e-05,
+      "loss": 0.3793,
+      "step": 690
+    },
+    {
+      "epoch": 1.9302967563837128,
+      "grad_norm": 0.42878955602645874,
+      "learning_rate": 3.3817058981431784e-05,
+      "loss": 0.3623,
+      "step": 700
+    },
+    {
+      "epoch": 1.9579020013802624,
+      "grad_norm": 0.42626291513442993,
+      "learning_rate": 3.230445196966181e-05,
+      "loss": 0.3564,
+      "step": 710
+    },
+    {
+      "epoch": 1.9855072463768115,
+      "grad_norm": 0.43052035570144653,
+      "learning_rate": 3.081014016010584e-05,
+      "loss": 0.3681,
+      "step": 720
+    },
+    {
+      "epoch": 2.0110420979986197,
+      "grad_norm": 0.4627828896045685,
+      "learning_rate": 2.9335668502752394e-05,
+      "loss": 0.359,
+      "step": 730
+    },
+    {
+      "epoch": 2.0386473429951693,
+      "grad_norm": 0.45345333218574524,
+      "learning_rate": 2.7882561435108824e-05,
+      "loss": 0.3189,
+      "step": 740
+    },
+    {
+      "epoch": 2.0662525879917184,
+      "grad_norm": 0.40497517585754395,
+      "learning_rate": 2.6452321306104634e-05,
+      "loss": 0.3409,
+      "step": 750
+    },
+    {
+      "epoch": 2.0938578329882676,
+      "grad_norm": 0.4666087329387665,
+      "learning_rate": 2.5046426822832175e-05,
+      "loss": 0.3354,
+      "step": 760
+    },
+    {
+      "epoch": 2.121463077984817,
+      "grad_norm": 0.38220757246017456,
+      "learning_rate": 2.3666331521730024e-05,
+      "loss": 0.3366,
+      "step": 770
+    },
+    {
+      "epoch": 2.1490683229813663,
+      "grad_norm": 0.4605223536491394,
+      "learning_rate": 2.2313462265790196e-05,
+      "loss": 0.3231,
+      "step": 780
+    },
+    {
+      "epoch": 2.176673567977916,
+      "grad_norm": 0.558403730392456,
+      "learning_rate": 2.098921776934269e-05,
+      "loss": 0.3333,
+      "step": 790
+    },
+    {
+      "epoch": 2.204278812974465,
+      "grad_norm": 0.45217105746269226,
+      "learning_rate": 1.96949671519424e-05,
+      "loss": 0.3401,
+      "step": 800
+    },
+    {
+      "epoch": 2.2318840579710146,
+      "grad_norm": 0.4413389563560486,
+      "learning_rate": 1.843204852285389e-05,
+      "loss": 0.3453,
+      "step": 810
+    },
+    {
+      "epoch": 2.259489302967564,
+      "grad_norm": 0.3977566063404083,
+      "learning_rate": 1.7201767597597196e-05,
+      "loss": 0.338,
+      "step": 820
+    },
+    {
+      "epoch": 2.287094547964113,
+      "grad_norm": 0.4817161560058594,
+      "learning_rate": 1.60053963479852e-05,
+      "loss": 0.3334,
+      "step": 830
+    },
+    {
+      "epoch": 2.3146997929606625,
+      "grad_norm": 0.4438902735710144,
+      "learning_rate": 1.4844171687048058e-05,
+      "loss": 0.3359,
+      "step": 840
+    },
+    {
+      "epoch": 2.3423050379572117,
+      "grad_norm": 0.45830076932907104,
+      "learning_rate": 1.371929419020459e-05,
+      "loss": 0.3534,
+      "step": 850
+    },
+    {
+      "epoch": 2.3699102829537613,
+      "grad_norm": 0.48253732919692993,
+      "learning_rate": 1.2631926854002574e-05,
+      "loss": 0.3247,
+      "step": 860
+    },
+    {
+      "epoch": 2.3975155279503104,
+      "grad_norm": 0.4572385549545288,
+      "learning_rate": 1.1583193893711475e-05,
+      "loss": 0.3309,
+      "step": 870
+    },
+    {
+      "epoch": 2.42512077294686,
+      "grad_norm": 0.4570174217224121,
+      "learning_rate": 1.0574179581010468e-05,
+      "loss": 0.3408,
+      "step": 880
+    },
+    {
+      "epoch": 2.452726017943409,
+      "grad_norm": 0.5289928913116455,
+      "learning_rate": 9.60592712297379e-06,
+      "loss": 0.3338,
+      "step": 890
+    },
+    {
+      "epoch": 2.4803312629399588,
+      "grad_norm": 0.49394240975379944,
+      "learning_rate": 8.679437583512168e-06,
+      "loss": 0.3398,
+      "step": 900
+    },
+    {
+      "epoch": 2.507936507936508,
+      "grad_norm": 0.412822425365448,
+      "learning_rate": 7.795668848385623e-06,
+      "loss": 0.333,
+      "step": 910
+    },
+    {
+      "epoch": 2.5355417529330575,
+      "grad_norm": 0.4305315911769867,
+      "learning_rate": 6.95553463485748e-06,
+      "loss": 0.342,
+      "step": 920
+    },
+    {
+      "epoch": 2.5631469979296067,
+      "grad_norm": 0.43158090114593506,
+      "learning_rate": 6.159903547013746e-06,
+      "loss": 0.3335,
+      "step": 930
+    },
+    {
+      "epoch": 2.590752242926156,
+      "grad_norm": 0.4319579005241394,
+      "learning_rate": 5.409598177724401e-06,
+      "loss": 0.3426,
+      "step": 940
+    },
+    {
+      "epoch": 2.6183574879227054,
+      "grad_norm": 0.4702156186103821,
+      "learning_rate": 4.7053942581750385e-06,
+      "loss": 0.3463,
+      "step": 950
+    },
+    {
+      "epoch": 2.6459627329192545,
+      "grad_norm": 0.38157370686531067,
+      "learning_rate": 4.048019855848273e-06,
+      "loss": 0.3331,
+      "step": 960
+    },
+    {
+      "epoch": 2.673567977915804,
+      "grad_norm": 0.4141283631324768,
+      "learning_rate": 3.438154621784029e-06,
+      "loss": 0.3422,
+      "step": 970
+    },
+    {
+      "epoch": 2.7011732229123533,
+      "grad_norm": 0.42628729343414307,
+      "learning_rate": 2.8764290878969756e-06,
+      "loss": 0.3262,
+      "step": 980
+    },
+    {
+      "epoch": 2.728778467908903,
+      "grad_norm": 0.4850899577140808,
+      "learning_rate": 2.3634240150775646e-06,
+      "loss": 0.3303,
+      "step": 990
+    },
+    {
+      "epoch": 2.756383712905452,
+      "grad_norm": 0.4277842938899994,
+      "learning_rate": 1.8996697927507468e-06,
+      "loss": 0.3446,
+      "step": 1000
+    },
+    {
+      "epoch": 2.783988957902001,
+      "grad_norm": 0.45691201090812683,
+      "learning_rate": 1.4856458905130822e-06,
+      "loss": 0.3309,
+      "step": 1010
+    },
+    {
+      "epoch": 2.8115942028985508,
+      "grad_norm": 0.4542577862739563,
+      "learning_rate": 1.1217803624152311e-06,
+      "loss": 0.326,
+      "step": 1020
+    },
+    {
+      "epoch": 2.8391994478951,
+      "grad_norm": 0.39988699555397034,
+      "learning_rate": 8.084494044022839e-07,
+      "loss": 0.3364,
+      "step": 1030
+    },
+    {
+      "epoch": 2.8668046928916495,
+      "grad_norm": 0.43636584281921387,
+      "learning_rate": 5.459769653695657e-07,
+      "loss": 0.3313,
+      "step": 1040
+    },
+    {
+      "epoch": 2.8944099378881987,
+      "grad_norm": 0.4335787892341614,
+      "learning_rate": 3.346344122360179e-07,
+      "loss": 0.328,
+      "step": 1050
+    },
+    {
+      "epoch": 2.9220151828847483,
+      "grad_norm": 0.4669038951396942,
+      "learning_rate": 1.746402493813415e-07,
+      "loss": 0.3426,
+      "step": 1060
+    },
+    {
+      "epoch": 2.9496204278812974,
+      "grad_norm": 0.43036729097366333,
+      "learning_rate": 6.615989273713874e-08,
+      "loss": 0.3378,
+      "step": 1070
+    },
+    {
+      "epoch": 2.9772256728778466,
+      "grad_norm": 0.4190558195114136,
+      "learning_rate": 9.305498765438404e-09,
+      "loss": 0.3358,
+      "step": 1080
+    },
+    {
+      "epoch": 2.9937888198757765,
+      "step": 1086,
+      "total_flos": 6.17252944434797e+18,
+      "train_loss": 0.3891050570797086,
+      "train_runtime": 6764.7829,
+      "train_samples_per_second": 5.14,
+      "train_steps_per_second": 0.161
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1086,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.17252944434797e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fd86c75750949f0ca2ee56bc27dadb57430a90de
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52a6484c213110d668e89b1ff8d77bac863e0460a3e92ff200a8df3f14879a5
+size 5688
diff --git a/training_loss.png b/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..3ec530bd4465dd893f0e87f525592ef900463ab1
Binary files /dev/null and b/training_loss.png differ