llm/llama38binstruct-summary-100s

Files changed (6) hide show

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [NousResearch/Meta-Llama-3-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.8113
 ## Model description
@@ -39,7 +39,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 1e-05
 - train_batch_size: 2
 - eval_batch_size: 8
 - seed: 42
@@ -54,10 +54,10 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 2.3176        | 10.0  | 25   | 2.8113          |
-| 2.3111        | 20.0  | 50   | 2.8113          |
-| 2.3098        | 30.0  | 75   | 2.8113          |
-| 2.3188        | 40.0  | 100  | 2.8113          |
 ### Framework versions

 This model is a fine-tuned version of [NousResearch/Meta-Llama-3-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.4113
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0002
 - train_batch_size: 2
 - eval_batch_size: 8
 - seed: 42
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.6248        | 10.0  | 25   | 1.7454          |
+| 0.0129        | 20.0  | 50   | 2.0997          |
+| 0.0048        | 30.0  | 75   | 2.3748          |
+| 0.0035        | 40.0  | 100  | 2.4113          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
-  "lora_dropout": 0.15,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "gate_proj",
-    "up_proj",
     "q_proj",
     "down_proj",
     "o_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
+  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "down_proj",
+    "gate_proj",
+    "up_proj",
+    "k_proj",
     "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
-size 48

 version https://git-lfs.github.com/spec/v1
+oid sha256:af9a36074c57992daf0f50184679987c713bb570eef1e0c528792fbd4b6a82d2
+size 167832240

runs/Jun19_05-00-52_0113f146e29c/events.out.tfevents.1718773290.0113f146e29c.1122.7 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:408ba6435648441cb355cb87e976e65aee860542bfe6a5188621f657b1707dab
+size 7322

runs/Jun19_05-33-52_0113f146e29c/events.out.tfevents.1718775254.0113f146e29c.57332.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce730eb23290d472309448c87feb6048bb8ccb264d105c9ff1d67a9d80e887f2
+size 9237

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3e39e9be223e4e51725d5a54334094cd4d30b30442d71c05ce7347ff488a3f1
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b6a133b2959b8874953eff0eb1fd4348bc71812a1110398b0cc36cbdf2de4d3
 size 5432