Minbyul commited on
Commit
f6084ac
1 Parent(s): 96b0d4c

Model save

Browse files
README.md CHANGED
@@ -1,15 +1,11 @@
1
  ---
2
  base_model: dmis-lab/selfbiorag_7b
3
  tags:
4
- - alignment-handbook
5
- - trl
6
- - sft
7
- - generated_from_trainer
8
  - trl
9
  - sft
10
  - generated_from_trainer
11
  datasets:
12
- - HuggingFaceH4/deita-10k-v0-sft
13
  model-index:
14
  - name: selfbiorag-7b-wo-kqa_golden-iter-sft-step1
15
  results: []
@@ -20,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # selfbiorag-7b-wo-kqa_golden-iter-sft-step1
22
 
23
- This model is a fine-tuned version of [dmis-lab/selfbiorag_7b](https://huggingface.co/dmis-lab/selfbiorag_7b) on the HuggingFaceH4/deita-10k-v0-sft dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 1.5888
26
 
27
  ## Model description
28
 
@@ -59,14 +55,14 @@ The following hyperparameters were used during training:
59
 
60
  | Training Loss | Epoch | Step | Validation Loss |
61
  |:-------------:|:-----:|:----:|:---------------:|
62
- | 1.6447 | 0.84 | 4 | 1.3963 |
63
- | 1.4922 | 1.89 | 9 | 1.5709 |
64
- | 1.1792 | 2.53 | 12 | 1.5888 |
65
 
66
 
67
  ### Framework versions
68
 
69
- - Transformers 4.39.0.dev0
70
- - Pytorch 2.1.2
71
  - Datasets 2.14.6
72
  - Tokenizers 0.15.2
 
1
  ---
2
  base_model: dmis-lab/selfbiorag_7b
3
  tags:
 
 
 
 
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
  datasets:
8
+ - generator
9
  model-index:
10
  - name: selfbiorag-7b-wo-kqa_golden-iter-sft-step1
11
  results: []
 
16
 
17
  # selfbiorag-7b-wo-kqa_golden-iter-sft-step1
18
 
19
+ This model is a fine-tuned version of [dmis-lab/selfbiorag_7b](https://huggingface.co/dmis-lab/selfbiorag_7b) on the generator dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.0452
22
 
23
  ## Model description
24
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 1.2821 | 0.91 | 5 | 1.1136 |
59
+ | 0.9973 | 2.0 | 11 | 1.0571 |
60
+ | 0.82 | 2.73 | 15 | 1.0452 |
61
 
62
 
63
  ### Framework versions
64
 
65
+ - Transformers 4.38.2
66
+ - Pytorch 2.1.2+cu121
67
  - Datasets 2.14.6
68
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
- "epoch": 2.53,
3
- "eval_loss": 1.5887579917907715,
4
- "eval_runtime": 36.4368,
5
- "eval_samples": 4044,
6
- "eval_samples_per_second": 10.045,
7
- "eval_steps_per_second": 0.631,
8
- "train_loss": 1.2966619332631428,
9
- "train_runtime": 333.6869,
10
  "train_samples": 4750,
11
- "train_samples_per_second": 2.724,
12
- "train_steps_per_second": 0.036
13
  }
 
1
  {
2
+ "epoch": 2.73,
3
+ "train_loss": 1.0436887741088867,
4
+ "train_runtime": 249.1645,
 
 
 
 
 
5
  "train_samples": 4750,
6
+ "train_samples_per_second": 4.142,
7
+ "train_steps_per_second": 0.06
8
  }
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.39.0.dev0",
26
- "use_cache": true,
27
  "vocab_size": 32016
28
  }
 
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.38.2",
26
+ "use_cache": false,
27
  "vocab_size": 32016
28
  }
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
- "transformers_version": "4.39.0.dev0"
10
  }
 
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
+ "transformers_version": "4.38.2"
10
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff5454e9289c713b5c475fa6ec400937ac46bbbd570c0020d62e20477d4ee767
3
  size 4939116424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bef77656dd3240b01ad7835aa55b2c321719a43e2df2ea0a38a954d2ccc1106e
3
  size 4939116424
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6eb8cf289dadd6a53a66c4ff0e7b44c08351c943ae5d5923de036ce70c6ffb23
3
  size 4947390880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a9ac4ef29e483dbf24f6f277f27a4bfdd63daf1b2940c472411a583b1d57f6
3
  size 4947390880
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dea427888a8f336e8c365e11181ef5f6d6ba51f93b1c3268cb1bf6bcfb8ae9d6
3
  size 3590619888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3c462b8c72af582ac9ac76a924e3e1a0b4228909be7b422d7bef01734a71ef
3
  size 3590619888
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.53,
3
- "train_loss": 1.2966619332631428,
4
- "train_runtime": 333.6869,
5
  "train_samples": 4750,
6
- "train_samples_per_second": 2.724,
7
- "train_steps_per_second": 0.036
8
  }
 
1
  {
2
+ "epoch": 2.73,
3
+ "train_loss": 1.0436887741088867,
4
+ "train_runtime": 249.1645,
5
  "train_samples": 4750,
6
+ "train_samples_per_second": 4.142,
7
+ "train_steps_per_second": 0.06
8
  }
trainer_state.json CHANGED
@@ -1,74 +1,81 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.526315789473684,
5
  "eval_steps": 500,
6
- "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.21,
13
- "grad_norm": 7.908194508354831,
14
  "learning_rate": 1e-05,
15
- "loss": 1.6447,
16
  "step": 1
17
  },
18
  {
19
- "epoch": 0.84,
20
- "eval_loss": 1.3962677717208862,
21
- "eval_runtime": 36.3272,
22
- "eval_samples_per_second": 10.075,
23
- "eval_steps_per_second": 0.633,
24
- "step": 4
25
  },
26
  {
27
- "epoch": 1.05,
28
- "grad_norm": 2.7740427914859738,
29
- "learning_rate": 1.5877852522924733e-05,
30
- "loss": 1.4922,
 
31
  "step": 5
32
  },
33
  {
34
- "epoch": 1.89,
35
- "eval_loss": 1.5709341764450073,
36
- "eval_runtime": 35.8444,
37
- "eval_samples_per_second": 10.211,
38
- "eval_steps_per_second": 0.642,
39
- "step": 9
40
  },
41
  {
42
- "epoch": 2.11,
43
- "grad_norm": 2.1451956932311864,
44
- "learning_rate": 1.9098300562505266e-06,
45
- "loss": 1.1792,
46
- "step": 10
 
 
 
 
 
 
 
 
47
  },
48
  {
49
- "epoch": 2.53,
50
- "eval_loss": 1.5887579917907715,
51
- "eval_runtime": 36.3881,
52
- "eval_samples_per_second": 10.058,
53
- "eval_steps_per_second": 0.632,
54
- "step": 12
55
  },
56
  {
57
- "epoch": 2.53,
58
- "step": 12,
59
- "total_flos": 2460210954240.0,
60
- "train_loss": 1.2966619332631428,
61
- "train_runtime": 333.6869,
62
- "train_samples_per_second": 2.724,
63
- "train_steps_per_second": 0.036
64
  }
65
  ],
66
  "logging_steps": 5,
67
- "max_steps": 12,
68
  "num_input_tokens_seen": 0,
69
  "num_train_epochs": 3,
70
  "save_steps": 500,
71
- "total_flos": 2460210954240.0,
72
  "train_batch_size": 4,
73
  "trial_name": null,
74
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7272727272727275,
5
  "eval_steps": 500,
6
+ "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.18,
13
+ "grad_norm": 7.793758473406165,
14
  "learning_rate": 1e-05,
15
+ "loss": 1.4406,
16
  "step": 1
17
  },
18
  {
19
+ "epoch": 0.91,
20
+ "grad_norm": 2.8267893418386913,
21
+ "learning_rate": 1.7485107481711014e-05,
22
+ "loss": 1.2821,
23
+ "step": 5
 
24
  },
25
  {
26
+ "epoch": 0.91,
27
+ "eval_loss": 1.1136233806610107,
28
+ "eval_runtime": 1.3266,
29
+ "eval_samples_per_second": 10.553,
30
+ "eval_steps_per_second": 0.754,
31
  "step": 5
32
  },
33
  {
34
+ "epoch": 1.82,
35
+ "grad_norm": 2.7191793787610057,
36
+ "learning_rate": 6.453951129574644e-06,
37
+ "loss": 0.9973,
38
+ "step": 10
 
39
  },
40
  {
41
+ "epoch": 2.0,
42
+ "eval_loss": 1.0571211576461792,
43
+ "eval_runtime": 1.3822,
44
+ "eval_samples_per_second": 10.129,
45
+ "eval_steps_per_second": 0.723,
46
+ "step": 11
47
+ },
48
+ {
49
+ "epoch": 2.73,
50
+ "grad_norm": 1.7371058324616164,
51
+ "learning_rate": 0.0,
52
+ "loss": 0.82,
53
+ "step": 15
54
  },
55
  {
56
+ "epoch": 2.73,
57
+ "eval_loss": 1.045231580734253,
58
+ "eval_runtime": 1.3746,
59
+ "eval_samples_per_second": 10.185,
60
+ "eval_steps_per_second": 0.727,
61
+ "step": 15
62
  },
63
  {
64
+ "epoch": 2.73,
65
+ "step": 15,
66
+ "total_flos": 3088349921280.0,
67
+ "train_loss": 1.0436887741088867,
68
+ "train_runtime": 249.1645,
69
+ "train_samples_per_second": 4.142,
70
+ "train_steps_per_second": 0.06
71
  }
72
  ],
73
  "logging_steps": 5,
74
+ "max_steps": 15,
75
  "num_input_tokens_seen": 0,
76
  "num_train_epochs": 3,
77
  "save_steps": 500,
78
+ "total_flos": 3088349921280.0,
79
  "train_batch_size": 4,
80
  "trial_name": null,
81
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d406f9aa449db8b0d918c79ac45287c291c029fa2c5ad4638f01e580ad50656
3
  size 6200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6ca3c9e83615ad729882cccf279e609bf3059b16167cb67f5d5977df49d19fa
3
  size 6200