kekunh commited on
Commit
bba747b
1 Parent(s): 4a8ff78

End of training

Browse files
README.md CHANGED
@@ -14,8 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
14
  # fine_tuned_llama2_7b
15
 
16
  This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 2.3496
19
 
20
  ## Model description
21
 
@@ -38,8 +36,8 @@ The following hyperparameters were used during training:
38
  - train_batch_size: 1
39
  - eval_batch_size: 8
40
  - seed: 42
41
- - gradient_accumulation_steps: 4
42
- - total_train_batch_size: 4
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: cosine
45
  - lr_scheduler_warmup_ratio: 0.1
 
14
  # fine_tuned_llama2_7b
15
 
16
  This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
 
36
  - train_batch_size: 1
37
  - eval_batch_size: 8
38
  - seed: 42
39
+ - gradient_accumulation_steps: 8
40
+ - total_train_batch_size: 8
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: cosine
43
  - lr_scheduler_warmup_ratio: 0.1
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9033711981d03c882b5d170b9aa711c76812c0b76771ee81cd0335be4a220bb
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:234f19e4eaf40da80182f856e7da52635d1e006d160f1eb4d153da9ba6891f03
3
  size 639691872
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 1.1131515504795648e+16,
4
- "train_loss": 2.3624265621191913,
5
- "train_runtime": 6766.2575,
6
- "train_samples_per_second": 1.706,
7
- "train_steps_per_second": 0.426
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 2.5585123528237056e+16,
4
+ "train_loss": 0.9235503957773034,
5
+ "train_runtime": 6177.2242,
6
+ "train_samples_per_second": 1.707,
7
+ "train_steps_per_second": 0.213
8
  }
runs/Apr01_01-06-45_83eaa54fbb08/events.out.tfevents.1711933606.83eaa54fbb08.5375.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7f32ffedb0aaf052bb9a7be180e0d00bec3881dd1c72ace8dd09681a6c9b99
3
- size 7165
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a24416d7c619131464dad8de9f25e2e293f17a4406e36b610de7766635f3e34
3
+ size 8152
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 1.1131515504795648e+16,
4
- "train_loss": 2.3624265621191913,
5
- "train_runtime": 6766.2575,
6
- "train_samples_per_second": 1.706,
7
- "train_steps_per_second": 0.426
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 2.5585123528237056e+16,
4
+ "train_loss": 0.9235503957773034,
5
+ "train_runtime": 6177.2242,
6
+ "train_samples_per_second": 1.707,
7
+ "train_steps_per_second": 0.213
8
  }
trainer_state.json CHANGED
@@ -1,225 +1,120 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9997401022264576,
5
  "eval_steps": 500,
6
- "global_step": 2885,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03,
13
- "grad_norm": 22.53300666809082,
14
- "learning_rate": 6.71280276816609e-06,
15
- "loss": 3.1491,
16
  "step": 100
17
  },
18
  {
19
- "epoch": 0.07,
20
- "grad_norm": 36.242984771728516,
21
- "learning_rate": 1.356401384083045e-05,
22
- "loss": 2.6131,
23
  "step": 200
24
  },
25
  {
26
- "epoch": 0.1,
27
- "grad_norm": 25.216327667236328,
28
- "learning_rate": 1.999973639055537e-05,
29
- "loss": 2.5726,
30
  "step": 300
31
  },
32
  {
33
- "epoch": 0.14,
34
- "grad_norm": 14.273802757263184,
35
- "learning_rate": 1.9917836961775225e-05,
36
- "loss": 2.4989,
37
  "step": 400
38
  },
39
  {
40
- "epoch": 0.17,
41
- "grad_norm": 27.216812133789062,
42
- "learning_rate": 1.969086765436979e-05,
43
- "loss": 2.5906,
44
  "step": 500
45
  },
46
  {
47
- "epoch": 0.21,
48
- "grad_norm": 18.74100112915039,
49
- "learning_rate": 1.9322148386785378e-05,
50
- "loss": 2.4275,
51
  "step": 600
52
  },
53
  {
54
- "epoch": 0.24,
55
- "grad_norm": 20.627084732055664,
56
- "learning_rate": 1.8817072478109763e-05,
57
- "loss": 2.5103,
58
  "step": 700
59
  },
60
  {
61
- "epoch": 0.28,
62
- "grad_norm": 15.611855506896973,
63
- "learning_rate": 1.818302775908169e-05,
64
- "loss": 2.3706,
65
  "step": 800
66
  },
67
  {
68
- "epoch": 0.31,
69
- "grad_norm": 25.303524017333984,
70
- "learning_rate": 1.7429288509041197e-05,
71
- "loss": 2.3601,
72
  "step": 900
73
  },
74
  {
75
- "epoch": 0.35,
76
- "grad_norm": 20.18657875061035,
77
- "learning_rate": 1.6566879799477148e-05,
78
- "loss": 2.5054,
79
  "step": 1000
80
  },
81
  {
82
- "epoch": 0.38,
83
- "grad_norm": 17.65004539489746,
84
- "learning_rate": 1.560841622844192e-05,
85
- "loss": 2.3717,
86
  "step": 1100
87
  },
88
  {
89
- "epoch": 0.42,
90
- "grad_norm": 19.5482177734375,
91
- "learning_rate": 1.4578679381126853e-05,
92
- "loss": 2.3772,
93
  "step": 1200
94
  },
95
  {
96
- "epoch": 0.45,
97
- "grad_norm": 14.92688274383545,
98
- "learning_rate": 1.3471954275891059e-05,
99
- "loss": 2.2991,
100
  "step": 1300
101
  },
102
- {
103
- "epoch": 0.49,
104
- "grad_norm": 10.425432205200195,
105
- "learning_rate": 1.2314444308256605e-05,
106
- "loss": 2.2865,
107
- "step": 1400
108
- },
109
- {
110
- "epoch": 0.52,
111
- "grad_norm": 16.403301239013672,
112
- "learning_rate": 1.1123080572287608e-05,
113
- "loss": 2.2595,
114
- "step": 1500
115
- },
116
- {
117
- "epoch": 0.55,
118
- "grad_norm": 11.935959815979004,
119
- "learning_rate": 9.915289346843219e-06,
120
- "loss": 2.3662,
121
- "step": 1600
122
- },
123
- {
124
- "epoch": 0.59,
125
- "grad_norm": 18.410987854003906,
126
- "learning_rate": 8.708737198449509e-06,
127
- "loss": 2.2021,
128
- "step": 1700
129
- },
130
- {
131
- "epoch": 0.62,
132
- "grad_norm": 15.293601036071777,
133
- "learning_rate": 7.521072569442963e-06,
134
- "loss": 2.2545,
135
- "step": 1800
136
- },
137
- {
138
- "epoch": 0.66,
139
- "grad_norm": 16.34610939025879,
140
- "learning_rate": 6.369667631219584e-06,
141
- "loss": 2.3199,
142
- "step": 1900
143
- },
144
- {
145
- "epoch": 0.69,
146
- "grad_norm": 15.948208808898926,
147
- "learning_rate": 5.2713641785457504e-06,
148
- "loss": 2.2029,
149
- "step": 2000
150
- },
151
- {
152
- "epoch": 0.73,
153
- "grad_norm": 27.17706298828125,
154
- "learning_rate": 4.242227281777747e-06,
155
- "loss": 2.2861,
156
- "step": 2100
157
- },
158
- {
159
- "epoch": 0.76,
160
- "grad_norm": 19.407489776611328,
161
- "learning_rate": 3.297310300360622e-06,
162
- "loss": 2.2157,
163
- "step": 2200
164
- },
165
- {
166
- "epoch": 0.8,
167
- "grad_norm": 11.622710227966309,
168
- "learning_rate": 2.450434694793621e-06,
169
- "loss": 2.2724,
170
- "step": 2300
171
- },
172
- {
173
- "epoch": 0.83,
174
- "grad_norm": 16.701732635498047,
175
- "learning_rate": 1.7139878577898772e-06,
176
- "loss": 2.1622,
177
- "step": 2400
178
- },
179
- {
180
- "epoch": 0.87,
181
- "grad_norm": 10.720149040222168,
182
- "learning_rate": 1.0987419217881333e-06,
183
- "loss": 2.2026,
184
- "step": 2500
185
- },
186
- {
187
- "epoch": 0.9,
188
- "grad_norm": 14.398381233215332,
189
- "learning_rate": 6.136961931496943e-07,
190
- "loss": 2.2619,
191
- "step": 2600
192
- },
193
- {
194
- "epoch": 0.94,
195
- "grad_norm": 16.95086669921875,
196
- "learning_rate": 2.6594551778223896e-07,
197
- "loss": 2.2626,
198
- "step": 2700
199
- },
200
- {
201
- "epoch": 0.97,
202
- "grad_norm": 12.132495880126953,
203
- "learning_rate": 6.057650362879753e-08,
204
- "loss": 2.1139,
205
- "step": 2800
206
- },
207
  {
208
  "epoch": 1.0,
209
- "step": 2885,
210
- "total_flos": 1.1131515504795648e+16,
211
- "train_loss": 2.3624265621191913,
212
- "train_runtime": 6766.2575,
213
- "train_samples_per_second": 1.706,
214
- "train_steps_per_second": 0.426
215
  }
216
  ],
217
  "logging_steps": 100,
218
- "max_steps": 2885,
219
  "num_input_tokens_seen": 0,
220
  "num_train_epochs": 1,
221
  "save_steps": 500,
222
- "total_flos": 1.1131515504795648e+16,
223
  "train_batch_size": 1,
224
  "trial_name": null,
225
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9993360523570142,
5
  "eval_steps": 500,
6
+ "global_step": 1317,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.08,
13
+ "grad_norm": 2.6666550636291504,
14
+ "learning_rate": 1.5151515151515153e-05,
15
+ "loss": 1.75,
16
  "step": 100
17
  },
18
  {
19
+ "epoch": 0.15,
20
+ "grad_norm": 1.9126778841018677,
21
+ "learning_rate": 1.983794055463009e-05,
22
+ "loss": 0.9131,
23
  "step": 200
24
  },
25
  {
26
+ "epoch": 0.23,
27
+ "grad_norm": 2.010279655456543,
28
+ "learning_rate": 1.90244256701717e-05,
29
+ "loss": 0.921,
30
  "step": 300
31
  },
32
  {
33
+ "epoch": 0.3,
34
+ "grad_norm": 1.661656141281128,
35
+ "learning_rate": 1.7580334804873595e-05,
36
+ "loss": 0.8664,
37
  "step": 400
38
  },
39
  {
40
+ "epoch": 0.38,
41
+ "grad_norm": 1.8409279584884644,
42
+ "learning_rate": 1.5606572885773613e-05,
43
+ "loss": 0.8562,
44
  "step": 500
45
  },
46
  {
47
+ "epoch": 0.46,
48
+ "grad_norm": 1.8635607957839966,
49
+ "learning_rate": 1.324105526655396e-05,
50
+ "loss": 0.8561,
51
  "step": 600
52
  },
53
  {
54
+ "epoch": 0.53,
55
+ "grad_norm": 1.4191073179244995,
56
+ "learning_rate": 1.0649070980273363e-05,
57
+ "loss": 0.8621,
58
  "step": 700
59
  },
60
  {
61
+ "epoch": 0.61,
62
+ "grad_norm": 1.4062063694000244,
63
+ "learning_rate": 8.011733273733208e-06,
64
+ "loss": 0.8523,
65
  "step": 800
66
  },
67
  {
68
+ "epoch": 0.68,
69
+ "grad_norm": 1.7386995553970337,
70
+ "learning_rate": 5.51332443501349e-06,
71
+ "loss": 0.8277,
72
  "step": 900
73
  },
74
  {
75
+ "epoch": 0.76,
76
+ "grad_norm": 1.6414889097213745,
77
+ "learning_rate": 3.3284191862731585e-06,
78
+ "loss": 0.8406,
79
  "step": 1000
80
  },
81
  {
82
+ "epoch": 0.83,
83
+ "grad_norm": 1.6184569597244263,
84
+ "learning_rate": 1.6096863865200606e-06,
85
+ "loss": 0.8189,
86
  "step": 1100
87
  },
88
  {
89
+ "epoch": 0.91,
90
+ "grad_norm": 1.6727625131607056,
91
+ "learning_rate": 4.772213925798331e-07,
92
+ "loss": 0.837,
93
  "step": 1200
94
  },
95
  {
96
+ "epoch": 0.99,
97
+ "grad_norm": 1.7798255681991577,
98
+ "learning_rate": 1.0154472728808318e-08,
99
+ "loss": 0.8195,
100
  "step": 1300
101
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  {
103
  "epoch": 1.0,
104
+ "step": 1317,
105
+ "total_flos": 2.5585123528237056e+16,
106
+ "train_loss": 0.9235503957773034,
107
+ "train_runtime": 6177.2242,
108
+ "train_samples_per_second": 1.707,
109
+ "train_steps_per_second": 0.213
110
  }
111
  ],
112
  "logging_steps": 100,
113
+ "max_steps": 1317,
114
  "num_input_tokens_seen": 0,
115
  "num_train_epochs": 1,
116
  "save_steps": 500,
117
+ "total_flos": 2.5585123528237056e+16,
118
  "train_batch_size": 1,
119
  "trial_name": null,
120
  "trial_params": null