End of training
Browse files
README.md
CHANGED
@@ -5,8 +5,6 @@ datasets:
|
|
5 |
library_name: peft
|
6 |
license: llama3.1
|
7 |
tags:
|
8 |
-
- trl
|
9 |
-
- sft
|
10 |
- generated_from_trainer
|
11 |
model-index:
|
12 |
- name: Llama-3.1-8B-Summarization-QLoRa
|
@@ -20,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
20 |
|
21 |
This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the scitldr dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
-
- Loss: 2.
|
24 |
|
25 |
## Model description
|
26 |
|
@@ -53,9 +51,15 @@ The following hyperparameters were used during training:
|
|
53 |
|
54 |
| Training Loss | Epoch | Step | Validation Loss |
|
55 |
|:-------------:|:------:|:----:|:---------------:|
|
56 |
-
| 2.
|
57 |
-
| 2.
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
|
61 |
### Framework versions
|
|
|
5 |
library_name: peft
|
6 |
license: llama3.1
|
7 |
tags:
|
|
|
|
|
8 |
- generated_from_trainer
|
9 |
model-index:
|
10 |
- name: Llama-3.1-8B-Summarization-QLoRa
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the scitldr dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 2.3813
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
51 |
|
52 |
| Training Loss | Epoch | Step | Validation Loss |
|
53 |
|:-------------:|:------:|:----:|:---------------:|
|
54 |
+
| 2.1968 | 0.2008 | 200 | 2.2962 |
|
55 |
+
| 2.2026 | 0.4016 | 400 | 2.3085 |
|
56 |
+
| 2.205 | 0.6024 | 600 | 2.3048 |
|
57 |
+
| 2.2028 | 0.8032 | 800 | 2.2968 |
|
58 |
+
| 2.2001 | 1.0040 | 1000 | 2.2911 |
|
59 |
+
| 1.7063 | 1.2048 | 1200 | 2.3696 |
|
60 |
+
| 1.6856 | 1.4056 | 1400 | 2.3756 |
|
61 |
+
| 1.6556 | 1.6064 | 1600 | 2.3823 |
|
62 |
+
| 1.6331 | 1.8072 | 1800 | 2.3813 |
|
63 |
|
64 |
|
65 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -21,12 +21,12 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"gate_proj",
|
24 |
-
"k_proj",
|
25 |
"o_proj",
|
26 |
-
"up_proj",
|
27 |
"v_proj",
|
28 |
"down_proj",
|
29 |
-
"q_proj"
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"gate_proj",
|
|
|
24 |
"o_proj",
|
|
|
25 |
"v_proj",
|
26 |
"down_proj",
|
27 |
+
"q_proj",
|
28 |
+
"up_proj",
|
29 |
+
"k_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02fc3b6d86bf430a7476dbc01b5911d713628daee74c73b91890349686b10e46
|
3 |
size 167832240
|
runs/Sep05_19-01-15_b9267fa06218/events.out.tfevents.1725563040.b9267fa06218.3117.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cb060374bbc0cc1501a920d235064f0b96d0b9fbbbd02f8076bfe2c3a540d0f
|
3 |
+
size 10357
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ecf2f360c442ba5ca72715fc639fb52c4419bf19f39d96244e4258ef71a5926
|
3 |
+
size 5176
|