EngineeringSoftware
/

exLong

llama

8-bit precision

bitsandbytes

Model card Files Files and versions Community

JiyangZhang commited on Nov 29, 2024

Commit

f8beda1

1 Parent(s): 314dad7

Add exLong no eTest name

Browse files

Files changed (3) hide show

adapter_config.json +5 -5
adapter_model.bin +1 -1
logs.txt +25 -25

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
     "rank_pattern": {},
     "revision": null,
     "target_modules": [
-        "down_proj",
-        "k_proj",
         "o_proj",
-        "v_proj",
         "gate_proj",
-        "q_proj",
-        "up_proj"
     ],
     "task_type": "CAUSAL_LM",
     "use_dora": false,

     "rank_pattern": {},
     "revision": null,
     "target_modules": [
         "o_proj",
+        "k_proj",
         "gate_proj",
+        "v_proj",
+        "up_proj",
+        "down_proj",
+        "q_proj"
     ],
     "task_type": "CAUSAL_LM",
     "use_dora": false,

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b36a6bbf3e5cc8083c7f27bb292a084cf36135f6c40b268a40a0d4d5818fde3c
 size 319977674

 version https://git-lfs.github.com/spec/v1
+oid sha256:783fc5d060218d483d88b91733f99c6da0383ee829f013d2c7e54399af2414c8
 size 319977674

logs.txt CHANGED Viewed

@@ -5,7 +5,7 @@ tags:
 - generated_from_trainer
 library_name: peft
 model-index:
-- name: work/10283/sarella/ls6/exlong-internal/_work/exp/conditionnestack2e-with-name-ft/lora-codellama-7b-123
   results: []
 ---
@@ -23,7 +23,7 @@ base_model_config: codellama/CodeLlama-7b-Instruct-hf
 bf16: true
 dataset_prepared_path: null
 datasets:
-- path: /work/10283/sarella/ls6/exlong-internal/_work/setup/conditionnestack2e-with-name-ft/train/train/train-conditionnestack2e-with-name-ft.jsonl
   type:
     field_input: input
     field_instruction: instruction
@@ -63,7 +63,7 @@ micro_batch_size: 4
 model_type: LlamaForCausalLM
 num_epochs: 3
 optimizer: adamw_bnb_8bit
-output_dir: /work/10283/sarella/ls6/exlong-internal/_work/exp/conditionnestack2e-with-name-ft/lora-codellama-7b-123
 pad_to_sequence_len: true
 resume_from_checkpoint: null
 sample_packing: true
@@ -92,11 +92,11 @@ xformers_attention: null
 </details><br>
-# work/10283/sarella/ls6/exlong-internal/_work/exp/conditionnestack2e-with-name-ft/lora-codellama-7b-123
 This model is a fine-tuned version of [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2909
 ## Model description
@@ -130,26 +130,26 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.631         | 0.01  | 1    | 0.7976          |
-| 0.2584        | 0.16  | 20   | 0.3000          |
-| 0.2045        | 0.31  | 40   | 0.2802          |
-| 0.1596        | 0.47  | 60   | 0.2725          |
-| 0.2024        | 0.62  | 80   | 0.2715          |
-| 0.1613        | 0.78  | 100  | 0.2638          |
-| 0.1341        | 0.93  | 120  | 0.2638          |
-| 0.1295        | 1.07  | 140  | 0.2672          |
-| 0.122         | 1.22  | 160  | 0.2694          |
-| 0.1247        | 1.38  | 180  | 0.2711          |
-| 0.1344        | 1.53  | 200  | 0.2733          |
-| 0.1329        | 1.69  | 220  | 0.2796          |
-| 0.1095        | 1.85  | 240  | 0.2731          |
-| 0.1167        | 2.0   | 260  | 0.2751          |
-| 0.0929        | 2.14  | 280  | 0.2849          |
-| 0.1112        | 2.3   | 300  | 0.2927          |
-| 0.1132        | 2.45  | 320  | 0.2856          |
-| 0.0876        | 2.61  | 340  | 0.2915          |
-| 0.0982        | 2.76  | 360  | 0.2894          |
-| 0.1042        | 2.92  | 380  | 0.2909          |
 ### Framework versions

 - generated_from_trainer
 library_name: peft
 model-index:
+- name: work/10283/sarella/ls6/exlong-internal/_work/exp/conditionnestack2e-no-name-ft/lora-codellama-7b-123
   results: []
 ---
 bf16: true
 dataset_prepared_path: null
 datasets:
+- path: /work/10283/sarella/ls6/exlong-internal/_work/setup/conditionnestack2e-no-name-ft/train/train/train-conditionnestack2e-no-name-ft.jsonl
   type:
     field_input: input
     field_instruction: instruction
 model_type: LlamaForCausalLM
 num_epochs: 3
 optimizer: adamw_bnb_8bit
+output_dir: /work/10283/sarella/ls6/exlong-internal/_work/exp/conditionnestack2e-no-name-ft/lora-codellama-7b-123
 pad_to_sequence_len: true
 resume_from_checkpoint: null
 sample_packing: true
 </details><br>
+# work/10283/sarella/ls6/exlong-internal/_work/exp/conditionnestack2e-no-name-ft/lora-codellama-7b-123
 This model is a fine-tuned version of [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4931
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.8379        | 0.01  | 1    | 1.0354          |
+| 0.3779        | 0.16  | 20   | 0.4820          |
+| 0.3361        | 0.31  | 40   | 0.4560          |
+| 0.3153        | 0.47  | 60   | 0.4467          |
+| 0.2735        | 0.63  | 80   | 0.4457          |
+| 0.2437        | 0.78  | 100  | 0.4400          |
+| 0.2941        | 0.94  | 120  | 0.4416          |
+| 0.2153        | 1.08  | 140  | 0.4466          |
+| 0.2583        | 1.23  | 160  | 0.4499          |
+| 0.2026        | 1.39  | 180  | 0.4540          |
+| 0.185         | 1.55  | 200  | 0.4541          |
+| 0.2296        | 1.7   | 220  | 0.4604          |
+| 0.2059        | 1.86  | 240  | 0.4591          |
+| 0.1998        | 2.02  | 260  | 0.4626          |
+| 0.1879        | 2.15  | 280  | 0.4828          |
+| 0.1861        | 2.31  | 300  | 0.4944          |
+| 0.1561        | 2.47  | 320  | 0.4947          |
+| 0.1888        | 2.62  | 340  | 0.4939          |
+| 0.1665        | 2.78  | 360  | 0.4945          |
+| 0.1627        | 2.94  | 380  | 0.4931          |
 ### Framework versions