chansung commited on
Commit
7795954
1 Parent(s): e53e007

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,13 @@
2
  license: gemma
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
 
8
  - generated_from_trainer
9
  base_model: google/gemma-2b
10
  datasets:
11
- - llama-duo/synth_summarize_dataset_dedup
12
  model-index:
13
  - name: gemma2b-summarize-gpt4o-2k
14
  results: []
@@ -19,9 +19,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # gemma2b-summarize-gpt4o-2k
21
 
22
- This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the llama-duo/synth_summarize_dataset_dedup dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 2.5693
25
 
26
  ## Model description
27
 
@@ -58,16 +58,16 @@ The following hyperparameters were used during training:
58
 
59
  | Training Loss | Epoch | Step | Validation Loss |
60
  |:-------------:|:-----:|:----:|:---------------:|
61
- | 2.9969 | 1.0 | 5 | 3.1171 |
62
- | 2.5079 | 2.0 | 10 | 2.8491 |
63
- | 2.1709 | 3.0 | 15 | 2.7251 |
64
- | 1.9297 | 4.0 | 20 | 2.6267 |
65
- | 1.7591 | 5.0 | 25 | 2.5900 |
66
- | 1.6527 | 6.0 | 30 | 2.5908 |
67
- | 1.5938 | 7.0 | 35 | 2.5817 |
68
- | 1.5589 | 8.0 | 40 | 2.5729 |
69
- | 1.5434 | 9.0 | 45 | 2.5688 |
70
- | 1.5362 | 10.0 | 50 | 2.5693 |
71
 
72
 
73
  ### Framework versions
 
2
  license: gemma
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - sft
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  base_model: google/gemma-2b
10
  datasets:
11
+ - generator
12
  model-index:
13
  - name: gemma2b-summarize-gpt4o-2k
14
  results: []
 
19
 
20
  # gemma2b-summarize-gpt4o-2k
21
 
22
+ This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the generator dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 2.5878
25
 
26
  ## Model description
27
 
 
58
 
59
  | Training Loss | Epoch | Step | Validation Loss |
60
  |:-------------:|:-----:|:----:|:---------------:|
61
+ | 2.9978 | 1.0 | 5 | 3.1071 |
62
+ | 2.5123 | 2.0 | 10 | 2.8503 |
63
+ | 2.2077 | 3.0 | 15 | 2.7154 |
64
+ | 1.9749 | 4.0 | 20 | 2.6507 |
65
+ | 1.8015 | 5.0 | 25 | 2.6242 |
66
+ | 1.6817 | 6.0 | 30 | 2.6105 |
67
+ | 1.6095 | 7.0 | 35 | 2.6003 |
68
+ | 1.5701 | 8.0 | 40 | 2.5917 |
69
+ | 1.5524 | 9.0 | 45 | 2.5882 |
70
+ | 1.5443 | 10.0 | 50 | 2.5878 |
71
 
72
 
73
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "k_proj",
25
  "v_proj",
26
- "o_proj",
 
27
  "up_proj",
28
- "gate_proj",
29
- "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "gate_proj",
 
24
  "v_proj",
25
+ "q_proj",
26
+ "down_proj",
27
  "up_proj",
28
+ "o_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:384152cc55936a07cedb1682ede49183b4d90927bdb646907aa3aefeabc34e92
3
  size 19644912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8e1cea41566ad451f2b5f26fee052cd2307ba926ad0df6f788db23e6de3911
3
  size 19644912
all_results.json CHANGED
@@ -6,9 +6,9 @@
6
  "eval_samples_per_second": 18.91,
7
  "eval_steps_per_second": 1.891,
8
  "total_flos": 2.9368666998964224e+16,
9
- "train_loss": 1.9250271797180176,
10
- "train_runtime": 273.2217,
11
  "train_samples": 2019,
12
- "train_samples_per_second": 8.052,
13
- "train_steps_per_second": 0.183
14
  }
 
6
  "eval_samples_per_second": 18.91,
7
  "eval_steps_per_second": 1.891,
8
  "total_flos": 2.9368666998964224e+16,
9
+ "train_loss": 1.9452767181396484,
10
+ "train_runtime": 274.5975,
11
  "train_samples": 2019,
12
+ "train_samples_per_second": 8.012,
13
+ "train_steps_per_second": 0.182
14
  }
runs/Jun05_13-40-36_user-HP-Z8-Fury-G5-Workstation-Desktop-PC/events.out.tfevents.1717562450.user-HP-Z8-Fury-G5-Workstation-Desktop-PC.24815.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32214e1d82dead710cb009529df36d23bbd15a7098e0be493779095a0293a45b
3
+ size 10878
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 2.9368666998964224e+16,
4
- "train_loss": 1.9250271797180176,
5
- "train_runtime": 273.2217,
6
  "train_samples": 2019,
7
- "train_samples_per_second": 8.052,
8
- "train_steps_per_second": 0.183
9
  }
 
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 2.9368666998964224e+16,
4
+ "train_loss": 1.9452767181396484,
5
+ "train_runtime": 274.5975,
6
  "train_samples": 2019,
7
+ "train_samples_per_second": 8.012,
8
+ "train_steps_per_second": 0.182
9
  }
trainer_state.json CHANGED
@@ -17,162 +17,162 @@
17
  },
18
  {
19
  "epoch": 1.0,
20
- "grad_norm": 1.7265625,
21
  "learning_rate": 0.0002,
22
- "loss": 2.9969,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 1.0,
27
- "eval_loss": 3.1170578002929688,
28
- "eval_runtime": 0.4884,
29
- "eval_samples_per_second": 20.475,
30
- "eval_steps_per_second": 2.047,
31
  "step": 5
32
  },
33
  {
34
  "epoch": 2.0,
35
- "grad_norm": 1.390625,
36
  "learning_rate": 0.00019396926207859084,
37
- "loss": 2.5079,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 2.0,
42
- "eval_loss": 2.849125862121582,
43
- "eval_runtime": 0.5022,
44
- "eval_samples_per_second": 19.912,
45
- "eval_steps_per_second": 1.991,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 3.0,
50
- "grad_norm": 1.2109375,
51
  "learning_rate": 0.0001766044443118978,
52
- "loss": 2.1709,
53
  "step": 15
54
  },
55
  {
56
  "epoch": 3.0,
57
- "eval_loss": 2.725147247314453,
58
- "eval_runtime": 0.5172,
59
- "eval_samples_per_second": 19.335,
60
- "eval_steps_per_second": 1.934,
61
  "step": 15
62
  },
63
  {
64
  "epoch": 4.0,
65
- "grad_norm": 0.8984375,
66
  "learning_rate": 0.00015000000000000001,
67
- "loss": 1.9297,
68
  "step": 20
69
  },
70
  {
71
  "epoch": 4.0,
72
- "eval_loss": 2.6266896724700928,
73
- "eval_runtime": 0.5348,
74
- "eval_samples_per_second": 18.699,
75
- "eval_steps_per_second": 1.87,
76
  "step": 20
77
  },
78
  {
79
  "epoch": 5.0,
80
- "grad_norm": 0.796875,
81
  "learning_rate": 0.00011736481776669306,
82
- "loss": 1.7591,
83
  "step": 25
84
  },
85
  {
86
  "epoch": 5.0,
87
- "eval_loss": 2.5900256633758545,
88
- "eval_runtime": 0.5346,
89
- "eval_samples_per_second": 18.707,
90
- "eval_steps_per_second": 1.871,
91
  "step": 25
92
  },
93
  {
94
  "epoch": 6.0,
95
- "grad_norm": 0.61328125,
96
  "learning_rate": 8.263518223330697e-05,
97
- "loss": 1.6527,
98
  "step": 30
99
  },
100
  {
101
  "epoch": 6.0,
102
- "eval_loss": 2.590770721435547,
103
- "eval_runtime": 0.5433,
104
- "eval_samples_per_second": 18.406,
105
- "eval_steps_per_second": 1.841,
106
  "step": 30
107
  },
108
  {
109
  "epoch": 7.0,
110
- "grad_norm": 0.341796875,
111
  "learning_rate": 5.000000000000002e-05,
112
- "loss": 1.5938,
113
  "step": 35
114
  },
115
  {
116
  "epoch": 7.0,
117
- "eval_loss": 2.5816967487335205,
118
- "eval_runtime": 0.5475,
119
- "eval_samples_per_second": 18.265,
120
- "eval_steps_per_second": 1.826,
121
  "step": 35
122
  },
123
  {
124
  "epoch": 8.0,
125
- "grad_norm": 0.322265625,
126
  "learning_rate": 2.339555568810221e-05,
127
- "loss": 1.5589,
128
  "step": 40
129
  },
130
  {
131
  "epoch": 8.0,
132
- "eval_loss": 2.5729336738586426,
133
- "eval_runtime": 0.5448,
134
- "eval_samples_per_second": 18.356,
135
- "eval_steps_per_second": 1.836,
136
  "step": 40
137
  },
138
  {
139
  "epoch": 9.0,
140
- "grad_norm": 0.287109375,
141
  "learning_rate": 6.030737921409169e-06,
142
- "loss": 1.5434,
143
  "step": 45
144
  },
145
  {
146
  "epoch": 9.0,
147
- "eval_loss": 2.5688018798828125,
148
- "eval_runtime": 0.5464,
149
- "eval_samples_per_second": 18.302,
150
- "eval_steps_per_second": 1.83,
151
  "step": 45
152
  },
153
  {
154
  "epoch": 10.0,
155
- "grad_norm": 0.310546875,
156
  "learning_rate": 0.0,
157
- "loss": 1.5362,
158
  "step": 50
159
  },
160
  {
161
  "epoch": 10.0,
162
- "eval_loss": 2.5693273544311523,
163
- "eval_runtime": 0.5446,
164
- "eval_samples_per_second": 18.363,
165
- "eval_steps_per_second": 1.836,
166
  "step": 50
167
  },
168
  {
169
  "epoch": 10.0,
170
  "step": 50,
171
  "total_flos": 2.9368666998964224e+16,
172
- "train_loss": 1.9250271797180176,
173
- "train_runtime": 273.2217,
174
- "train_samples_per_second": 8.052,
175
- "train_steps_per_second": 0.183
176
  }
177
  ],
178
  "logging_steps": 5,
 
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "grad_norm": 1.65625,
21
  "learning_rate": 0.0002,
22
+ "loss": 2.9978,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 1.0,
27
+ "eval_loss": 3.1071324348449707,
28
+ "eval_runtime": 0.5233,
29
+ "eval_samples_per_second": 19.11,
30
+ "eval_steps_per_second": 1.911,
31
  "step": 5
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "grad_norm": 2.265625,
36
  "learning_rate": 0.00019396926207859084,
37
+ "loss": 2.5123,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 2.0,
42
+ "eval_loss": 2.8502731323242188,
43
+ "eval_runtime": 0.5332,
44
+ "eval_samples_per_second": 18.755,
45
+ "eval_steps_per_second": 1.876,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "grad_norm": 1.5,
51
  "learning_rate": 0.0001766044443118978,
52
+ "loss": 2.2077,
53
  "step": 15
54
  },
55
  {
56
  "epoch": 3.0,
57
+ "eval_loss": 2.715447425842285,
58
+ "eval_runtime": 0.5429,
59
+ "eval_samples_per_second": 18.419,
60
+ "eval_steps_per_second": 1.842,
61
  "step": 15
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "grad_norm": 0.765625,
66
  "learning_rate": 0.00015000000000000001,
67
+ "loss": 1.9749,
68
  "step": 20
69
  },
70
  {
71
  "epoch": 4.0,
72
+ "eval_loss": 2.6506550312042236,
73
+ "eval_runtime": 0.54,
74
+ "eval_samples_per_second": 18.517,
75
+ "eval_steps_per_second": 1.852,
76
  "step": 20
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "grad_norm": 0.7109375,
81
  "learning_rate": 0.00011736481776669306,
82
+ "loss": 1.8015,
83
  "step": 25
84
  },
85
  {
86
  "epoch": 5.0,
87
+ "eval_loss": 2.624201536178589,
88
+ "eval_runtime": 0.557,
89
+ "eval_samples_per_second": 17.953,
90
+ "eval_steps_per_second": 1.795,
91
  "step": 25
92
  },
93
  {
94
  "epoch": 6.0,
95
+ "grad_norm": 0.369140625,
96
  "learning_rate": 8.263518223330697e-05,
97
+ "loss": 1.6817,
98
  "step": 30
99
  },
100
  {
101
  "epoch": 6.0,
102
+ "eval_loss": 2.610503673553467,
103
+ "eval_runtime": 0.5392,
104
+ "eval_samples_per_second": 18.546,
105
+ "eval_steps_per_second": 1.855,
106
  "step": 30
107
  },
108
  {
109
  "epoch": 7.0,
110
+ "grad_norm": 0.365234375,
111
  "learning_rate": 5.000000000000002e-05,
112
+ "loss": 1.6095,
113
  "step": 35
114
  },
115
  {
116
  "epoch": 7.0,
117
+ "eval_loss": 2.600292682647705,
118
+ "eval_runtime": 0.5472,
119
+ "eval_samples_per_second": 18.276,
120
+ "eval_steps_per_second": 1.828,
121
  "step": 35
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "grad_norm": 0.466796875,
126
  "learning_rate": 2.339555568810221e-05,
127
+ "loss": 1.5701,
128
  "step": 40
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "eval_loss": 2.5916552543640137,
133
+ "eval_runtime": 0.5514,
134
+ "eval_samples_per_second": 18.135,
135
+ "eval_steps_per_second": 1.813,
136
  "step": 40
137
  },
138
  {
139
  "epoch": 9.0,
140
+ "grad_norm": 0.3046875,
141
  "learning_rate": 6.030737921409169e-06,
142
+ "loss": 1.5524,
143
  "step": 45
144
  },
145
  {
146
  "epoch": 9.0,
147
+ "eval_loss": 2.5882315635681152,
148
+ "eval_runtime": 0.5445,
149
+ "eval_samples_per_second": 18.366,
150
+ "eval_steps_per_second": 1.837,
151
  "step": 45
152
  },
153
  {
154
  "epoch": 10.0,
155
+ "grad_norm": 0.322265625,
156
  "learning_rate": 0.0,
157
+ "loss": 1.5443,
158
  "step": 50
159
  },
160
  {
161
  "epoch": 10.0,
162
+ "eval_loss": 2.587819814682007,
163
+ "eval_runtime": 0.5585,
164
+ "eval_samples_per_second": 17.905,
165
+ "eval_steps_per_second": 1.79,
166
  "step": 50
167
  },
168
  {
169
  "epoch": 10.0,
170
  "step": 50,
171
  "total_flos": 2.9368666998964224e+16,
172
+ "train_loss": 1.9452767181396484,
173
+ "train_runtime": 274.5975,
174
+ "train_samples_per_second": 8.012,
175
+ "train_steps_per_second": 0.182
176
  }
177
  ],
178
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:538f26b6c9e5799ffada9b6a4eab53120a62cc6a5ea7b377dec56a152b4d1ee2
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7baabb3fdff536acdfbe9b9b772fd100476be129edda2a86728e6b8b73db04c
3
  size 5304