Minbyul commited on
Commit
9c67982
1 Parent(s): 51da073

Model save

Browse files
README.md CHANGED
@@ -1,15 +1,11 @@
1
  ---
2
  base_model: dmis-lab/selfbiorag_7b
3
  tags:
4
- - alignment-handbook
5
- - trl
6
- - sft
7
- - generated_from_trainer
8
  - trl
9
  - sft
10
  - generated_from_trainer
11
  datasets:
12
- - HuggingFaceH4/deita-10k-v0-sft
13
  model-index:
14
  - name: selfbiorag-7b-wo-live_qa-iter-sft-step1
15
  results: []
@@ -20,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # selfbiorag-7b-wo-live_qa-iter-sft-step1
22
 
23
- This model is a fine-tuned version of [dmis-lab/selfbiorag_7b](https://huggingface.co/dmis-lab/selfbiorag_7b) on the HuggingFaceH4/deita-10k-v0-sft dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 1.9657
26
 
27
  ## Model description
28
 
@@ -59,14 +55,14 @@ The following hyperparameters were used during training:
59
 
60
  | Training Loss | Epoch | Step | Validation Loss |
61
  |:-------------:|:-----:|:----:|:---------------:|
62
- | 1.5018 | 1.0 | 5 | 1.7469 |
63
- | 1.1848 | 2.0 | 10 | 1.9052 |
64
- | 0.9623 | 3.0 | 15 | 1.9657 |
65
 
66
 
67
  ### Framework versions
68
 
69
- - Transformers 4.39.0.dev0
70
- - Pytorch 2.1.2
71
  - Datasets 2.14.6
72
  - Tokenizers 0.15.2
 
1
  ---
2
  base_model: dmis-lab/selfbiorag_7b
3
  tags:
 
 
 
 
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
  datasets:
8
+ - generator
9
  model-index:
10
  - name: selfbiorag-7b-wo-live_qa-iter-sft-step1
11
  results: []
 
16
 
17
  # selfbiorag-7b-wo-live_qa-iter-sft-step1
18
 
19
+ This model is a fine-tuned version of [dmis-lab/selfbiorag_7b](https://huggingface.co/dmis-lab/selfbiorag_7b) on the generator dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.1582
22
 
23
  ## Model description
24
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 1.2659 | 0.91 | 5 | 1.2054 |
59
+ | 1.0094 | 2.0 | 11 | 1.1690 |
60
+ | 0.8249 | 2.73 | 15 | 1.1582 |
61
 
62
 
63
  ### Framework versions
64
 
65
+ - Transformers 4.38.2
66
+ - Pytorch 2.1.2+cu121
67
  - Datasets 2.14.6
68
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_loss": 1.9656569957733154,
4
- "eval_runtime": 1.8218,
5
- "eval_samples": 100,
6
- "eval_samples_per_second": 4.391,
7
- "eval_steps_per_second": 0.549,
8
- "train_loss": 1.22816797097524,
9
- "train_runtime": 294.6904,
10
  "train_samples": 4848,
11
- "train_samples_per_second": 3.176,
12
- "train_steps_per_second": 0.051
13
  }
 
1
  {
2
+ "epoch": 2.73,
3
+ "train_loss": 1.0476287603378296,
4
+ "train_runtime": 318.5273,
 
 
 
 
 
5
  "train_samples": 4848,
6
+ "train_samples_per_second": 3.306,
7
+ "train_steps_per_second": 0.047
8
  }
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.39.0.dev0",
26
- "use_cache": true,
27
  "vocab_size": 32016
28
  }
 
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.38.2",
26
+ "use_cache": false,
27
  "vocab_size": 32016
28
  }
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
- "transformers_version": "4.39.0.dev0"
10
  }
 
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
+ "transformers_version": "4.38.2"
10
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2e508574c3f3efaf638d758870e4e73ee29de4828cbf4b295fd2cc366e85dfd
3
  size 4939116424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93c4c2b9b3ec5dc0efd95f4b203ce8723fffa0e89d2903299b3f2c5696b4c420
3
  size 4939116424
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb1b3c56456213b69e50d195f19bcd64b29abe831b726c28e7df2bccd6891081
3
  size 4947390880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b277ce0ae848f0124609ae2982d0558a2c8015cab2bad35611540a3ef75a72e
3
  size 4947390880
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9220410beb9d45a759a152c57e89d5b510fdc214aac830a65438c92f7e2bde35
3
  size 3590619888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20d276b07d457aaa761e20c71e1d7881afa6a139da4bbe78cbc3527d5e57b3b0
3
  size 3590619888
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 1.22816797097524,
4
- "train_runtime": 294.6904,
5
  "train_samples": 4848,
6
- "train_samples_per_second": 3.176,
7
- "train_steps_per_second": 0.051
8
  }
 
1
  {
2
+ "epoch": 2.73,
3
+ "train_loss": 1.0476287603378296,
4
+ "train_runtime": 318.5273,
5
  "train_samples": 4848,
6
+ "train_samples_per_second": 3.306,
7
+ "train_steps_per_second": 0.047
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 15,
7
  "is_hyper_param_search": false,
@@ -9,65 +9,65 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.2,
13
- "grad_norm": 8.297603900150138,
14
  "learning_rate": 1e-05,
15
- "loss": 1.6795,
16
  "step": 1
17
  },
18
  {
19
- "epoch": 1.0,
20
- "grad_norm": 2.7714905692865806,
21
  "learning_rate": 1.7485107481711014e-05,
22
- "loss": 1.5018,
23
  "step": 5
24
  },
25
  {
26
- "epoch": 1.0,
27
- "eval_loss": 1.746938943862915,
28
- "eval_runtime": 1.6252,
29
- "eval_samples_per_second": 4.922,
30
- "eval_steps_per_second": 0.615,
31
  "step": 5
32
  },
33
  {
34
- "epoch": 2.0,
35
- "grad_norm": 2.355589361090063,
36
  "learning_rate": 6.453951129574644e-06,
37
- "loss": 1.1848,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 2.0,
42
- "eval_loss": 1.9052479267120361,
43
- "eval_runtime": 1.5781,
44
- "eval_samples_per_second": 5.069,
45
- "eval_steps_per_second": 0.634,
46
- "step": 10
47
  },
48
  {
49
- "epoch": 3.0,
50
- "grad_norm": 1.8474736569347063,
51
  "learning_rate": 0.0,
52
- "loss": 0.9623,
53
  "step": 15
54
  },
55
  {
56
- "epoch": 3.0,
57
- "eval_loss": 1.9656569957733154,
58
- "eval_runtime": 1.5681,
59
- "eval_samples_per_second": 5.102,
60
- "eval_steps_per_second": 0.638,
61
  "step": 15
62
  },
63
  {
64
- "epoch": 3.0,
65
  "step": 15,
66
  "total_flos": 3088349921280.0,
67
- "train_loss": 1.22816797097524,
68
- "train_runtime": 294.6904,
69
- "train_samples_per_second": 3.176,
70
- "train_steps_per_second": 0.051
71
  }
72
  ],
73
  "logging_steps": 5,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7272727272727275,
5
  "eval_steps": 500,
6
  "global_step": 15,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.18,
13
+ "grad_norm": 8.112652061034474,
14
  "learning_rate": 1e-05,
15
+ "loss": 1.4798,
16
  "step": 1
17
  },
18
  {
19
+ "epoch": 0.91,
20
+ "grad_norm": 2.848398983756101,
21
  "learning_rate": 1.7485107481711014e-05,
22
+ "loss": 1.2659,
23
  "step": 5
24
  },
25
  {
26
+ "epoch": 0.91,
27
+ "eval_loss": 1.2053821086883545,
28
+ "eval_runtime": 1.8264,
29
+ "eval_samples_per_second": 3.833,
30
+ "eval_steps_per_second": 0.548,
31
  "step": 5
32
  },
33
  {
34
+ "epoch": 1.82,
35
+ "grad_norm": 2.6874796106077996,
36
  "learning_rate": 6.453951129574644e-06,
37
+ "loss": 1.0094,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 2.0,
42
+ "eval_loss": 1.1690208911895752,
43
+ "eval_runtime": 1.8589,
44
+ "eval_samples_per_second": 3.766,
45
+ "eval_steps_per_second": 0.538,
46
+ "step": 11
47
  },
48
  {
49
+ "epoch": 2.73,
50
+ "grad_norm": 1.7499037267304558,
51
  "learning_rate": 0.0,
52
+ "loss": 0.8249,
53
  "step": 15
54
  },
55
  {
56
+ "epoch": 2.73,
57
+ "eval_loss": 1.1581600904464722,
58
+ "eval_runtime": 1.8666,
59
+ "eval_samples_per_second": 3.75,
60
+ "eval_steps_per_second": 0.536,
61
  "step": 15
62
  },
63
  {
64
+ "epoch": 2.73,
65
  "step": 15,
66
  "total_flos": 3088349921280.0,
67
+ "train_loss": 1.0476287603378296,
68
+ "train_runtime": 318.5273,
69
+ "train_samples_per_second": 3.306,
70
+ "train_steps_per_second": 0.047
71
  }
72
  ],
73
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91e3f59bd6deed88623a8848dda32804386130d521096cbe9354c14e9906dfd5
3
  size 6200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4860520a8292e2b6575761ceba29cc22d6e44d02462c96fceff35960661933a
3
  size 6200