Model save
Browse files- README.md +7 -10
- generation_config.json +3 -5
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +1 -1
README.md
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
---
|
2 |
license: other
|
3 |
-
|
4 |
tags:
|
5 |
- trl
|
6 |
- sft
|
7 |
- generated_from_trainer
|
8 |
-
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
9 |
datasets:
|
10 |
- generator
|
11 |
model-index:
|
@@ -30,8 +29,7 @@ More information needed
|
|
30 |
|
31 |
## Training and evaluation data
|
32 |
|
33 |
-
|
34 |
-
This dataset also contains the evaluation data ("dev" part) and the testing data ("test" part).
|
35 |
|
36 |
## Training procedure
|
37 |
|
@@ -39,24 +37,23 @@ This dataset also contains the evaluation data ("dev" part) and the testing data
|
|
39 |
|
40 |
The following hyperparameters were used during training:
|
41 |
- learning_rate: 0.0002
|
42 |
-
- train_batch_size:
|
43 |
- eval_batch_size: 8
|
44 |
- seed: 42
|
45 |
- gradient_accumulation_steps: 2
|
46 |
-
- total_train_batch_size:
|
47 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
48 |
-
- lr_scheduler_type:
|
49 |
- lr_scheduler_warmup_ratio: 0.03
|
50 |
- num_epochs: 1
|
51 |
|
52 |
### Training results
|
53 |
|
54 |
-
|
55 |
|
56 |
### Framework versions
|
57 |
|
58 |
-
- PEFT 0.7.2.dev0
|
59 |
- Transformers 4.36.2
|
60 |
- Pytorch 2.1.2+cu121
|
61 |
- Datasets 2.16.1
|
62 |
-
- Tokenizers 0.15.2
|
|
|
1 |
---
|
2 |
license: other
|
3 |
+
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
4 |
tags:
|
5 |
- trl
|
6 |
- sft
|
7 |
- generated_from_trainer
|
|
|
8 |
datasets:
|
9 |
- generator
|
10 |
model-index:
|
|
|
29 |
|
30 |
## Training and evaluation data
|
31 |
|
32 |
+
More information needed
|
|
|
33 |
|
34 |
## Training procedure
|
35 |
|
|
|
37 |
|
38 |
The following hyperparameters were used during training:
|
39 |
- learning_rate: 0.0002
|
40 |
+
- train_batch_size: 1
|
41 |
- eval_batch_size: 8
|
42 |
- seed: 42
|
43 |
- gradient_accumulation_steps: 2
|
44 |
+
- total_train_batch_size: 2
|
45 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
46 |
+
- lr_scheduler_type: cosine
|
47 |
- lr_scheduler_warmup_ratio: 0.03
|
48 |
- num_epochs: 1
|
49 |
|
50 |
### Training results
|
51 |
|
52 |
+
|
53 |
|
54 |
### Framework versions
|
55 |
|
|
|
56 |
- Transformers 4.36.2
|
57 |
- Pytorch 2.1.2+cu121
|
58 |
- Datasets 2.16.1
|
59 |
+
- Tokenizers 0.15.2
|
generation_config.json
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
{
|
2 |
-
"bos_token_id":
|
3 |
"do_sample": true,
|
4 |
-
"eos_token_id":
|
5 |
-
128001,
|
6 |
-
128009
|
7 |
-
],
|
8 |
"max_length": 4096,
|
|
|
9 |
"temperature": 0.6,
|
10 |
"top_p": 0.9,
|
11 |
"transformers_version": "4.36.2"
|
|
|
1 |
{
|
2 |
+
"bos_token_id": 128257,
|
3 |
"do_sample": true,
|
4 |
+
"eos_token_id": 128256,
|
|
|
|
|
|
|
5 |
"max_length": 4096,
|
6 |
+
"pad_token_id": 128256,
|
7 |
"temperature": 0.6,
|
8 |
"top_p": 0.9,
|
9 |
"transformers_version": "4.36.2"
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53a989a55fd02cb7119232b24aaced01e4ec0df2528ed6410fb1a33b32d7f326
|
3 |
+
size 4976715056
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d14ccf1a38a3dbee6e56b1901065f587171efde9ce58633fbb2b8366570adf12
|
3 |
+
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01fc65dfc0472490cfdc5b1522c9933c43f3b4fb2cf88ac50d1dd527f9aff24c
|
3 |
+
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168155192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee1687938bc153181b4355541c5fc8d294e9224533afc18136061680619a93ea
|
3 |
size 1168155192
|