vpkrishna commited on
Commit
aea8052
1 Parent(s): 80b9f7e

llm/llama38binstruct-summary-100s

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [NousResearch/Meta-Llama-3-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 2.8113
24
 
25
  ## Model description
26
 
@@ -39,7 +39,7 @@ More information needed
39
  ### Training hyperparameters
40
 
41
  The following hyperparameters were used during training:
42
- - learning_rate: 1e-05
43
  - train_batch_size: 2
44
  - eval_batch_size: 8
45
  - seed: 42
@@ -54,10 +54,10 @@ The following hyperparameters were used during training:
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:-----:|:----:|:---------------:|
57
- | 2.3176 | 10.0 | 25 | 2.8113 |
58
- | 2.3111 | 20.0 | 50 | 2.8113 |
59
- | 2.3098 | 30.0 | 75 | 2.8113 |
60
- | 2.3188 | 40.0 | 100 | 2.8113 |
61
 
62
 
63
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [NousResearch/Meta-Llama-3-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.4113
24
 
25
  ## Model description
26
 
 
39
  ### Training hyperparameters
40
 
41
  The following hyperparameters were used during training:
42
+ - learning_rate: 0.0002
43
  - train_batch_size: 2
44
  - eval_batch_size: 8
45
  - seed: 42
 
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:-----:|:----:|:---------------:|
57
+ | 0.6248 | 10.0 | 25 | 1.7454 |
58
+ | 0.0129 | 20.0 | 50 | 2.0997 |
59
+ | 0.0048 | 30.0 | 75 | 2.3748 |
60
+ | 0.0035 | 40.0 | 100 | 2.4113 |
61
 
62
 
63
  ### Framework versions
adapter_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 32,
14
- "lora_dropout": 0.15,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
- "gate_proj",
25
- "up_proj",
26
  "q_proj",
27
  "down_proj",
 
 
 
28
  "o_proj",
29
- "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 32,
14
+ "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "q_proj",
24
  "down_proj",
25
+ "gate_proj",
26
+ "up_proj",
27
+ "k_proj",
28
  "o_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
3
- size 48
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9a36074c57992daf0f50184679987c713bb570eef1e0c528792fbd4b6a82d2
3
+ size 167832240
runs/Jun19_05-00-52_0113f146e29c/events.out.tfevents.1718773290.0113f146e29c.1122.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408ba6435648441cb355cb87e976e65aee860542bfe6a5188621f657b1707dab
3
+ size 7322
runs/Jun19_05-33-52_0113f146e29c/events.out.tfevents.1718775254.0113f146e29c.57332.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce730eb23290d472309448c87feb6048bb8ccb264d105c9ff1d67a9d80e887f2
3
+ size 9237
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3e39e9be223e4e51725d5a54334094cd4d30b30442d71c05ce7347ff488a3f1
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b6a133b2959b8874953eff0eb1fd4348bc71812a1110398b0cc36cbdf2de4d3
3
  size 5432