vdaita
/

diff-deepseek-code-ir

@@ -2,10 +2,11 @@
 license: other
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: deepseek-ai/deepseek-coder-6.7b-instruct
 model-index:
-- name: outputs/dscoder-code-ir-2
   results: []
 ---
@@ -27,10 +28,16 @@ strict: false
 datasets:
   - path: vdaita/editpackft_inst_code
     type: oasst
 dataset_prepared_path:
-val_set_size: 0.05
-output_dir: ./outputs/dscoder-code-ir-2
 sequence_len: 4096
 sample_packing: true
@@ -46,8 +53,15 @@ lora_dropout: 0.05
 lora_target_linear: true
 lora_fan_in_fan_out:
 wandb_project: huggingface
-wandb_log_model: axolotl-dscoder-code-2
 gradient_accumulation_steps: 4
 micro_batch_size: 2
@@ -80,15 +94,20 @@ weight_decay: 0.0
 fsdp:
 fsdp_config:
 ```
 </details><br>
-# outputs/dscoder-code-ir-2
 This model is a fine-tuned version of [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2242
 ## Model description
@@ -125,10 +144,10 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.6668        | 0.03  | 1    | 0.7461          |
-| 0.5084        | 0.26  | 10   | 0.4586          |
-| 0.241         | 0.53  | 20   | 0.2486          |
-| 0.2553        | 0.79  | 30   | 0.2242          |
 ### Framework versions

 license: other
 library_name: peft
 tags:
+- axolotl
 - generated_from_trainer
 base_model: deepseek-ai/deepseek-coder-6.7b-instruct
 model-index:
+- name: diff-deepseek-code-ir
   results: []
 ---
 datasets:
   - path: vdaita/editpackft_inst_code
+    split: train
     type: oasst
 dataset_prepared_path:
+test_datasets:
+  - path: vdaita/editpackft_inst_code
+    split: test
+    type: oasst
+output_dir: ./outputs/dscoder-code-ir-3
 sequence_len: 4096
 sample_packing: true
 lora_target_linear: true
 lora_fan_in_fan_out:
+lora_modules_to_save:
+  - embed_tokens
+  - lm_head
 wandb_project: huggingface
+wandb_log_model: axolotl-dscoder-code-3
+hub_model_id: vdaita/diff-deepseek-code-ir
+hub_strategy: every_save
 gradient_accumulation_steps: 4
 micro_batch_size: 2
 fsdp:
 fsdp_config:
+special_tokens:
+  bos_token: "<|begin_of_sentence|>"
+  eos_token: "<|end_of_sentence|>"
+  pad_token: "<|end_of_sentence|>"
 ```
 </details><br>
+# diff-deepseek-code-ir
 This model is a fine-tuned version of [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.2677
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.6921        | 0.03  | 1    | 0.7832          |
+| 0.5453        | 0.25  | 10   | 0.5221          |
+| 0.3129        | 0.51  | 20   | 0.2985          |
+| 0.2527        | 0.76  | 30   | 0.2677          |
 ### Framework versions

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15b45762ee6c593a22c5eb0196cd42325259a42e5d2a3040326db83e66f6db12
-size 319977674

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a374dcebcc148069ae22aec3b398b504514f6fb5b9980a3a763e7f69b983b02
+size 848460690