juraj-juraj commited on
Commit
27bf939
1 Parent(s): 35b9167

End of training

Browse files
README.md CHANGED
@@ -1,11 +1,27 @@
1
  ---
 
 
2
  license: bsd-3-clause
3
  base_model: Salesforce/codet5p-220m
4
  tags:
5
  - generated_from_trainer
 
 
 
 
6
  model-index:
7
  - name: t5_codet5p_220m_tuned
8
- results: []
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -13,7 +29,11 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # t5_codet5p_220m_tuned
15
 
16
- This model is a fine-tuned version of [Salesforce/codet5p-220m](https://huggingface.co/Salesforce/codet5p-220m) on an unknown dataset.
 
 
 
 
17
 
18
  ## Model description
19
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: bsd-3-clause
5
  base_model: Salesforce/codet5p-220m
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - juraj-juraj/python-docstring-human-gpt-generated-mix
10
+ metrics:
11
+ - bleu
12
  model-index:
13
  - name: t5_codet5p_220m_tuned
14
+ results:
15
+ - task:
16
+ name: Translation
17
+ type: translation
18
+ dataset:
19
+ name: juraj-juraj/python-docstring-human-gpt-generated-mix
20
+ type: juraj-juraj/python-docstring-human-gpt-generated-mix
21
+ metrics:
22
+ - name: Bleu
23
+ type: bleu
24
+ value: 16.1093
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
29
 
30
  # t5_codet5p_220m_tuned
31
 
32
+ This model is a fine-tuned version of [Salesforce/codet5p-220m](https://huggingface.co/Salesforce/codet5p-220m) on the juraj-juraj/python-docstring-human-gpt-generated-mix dataset.
33
+ It achieves the following results on the evaluation set:
34
+ - Loss: 1.9219
35
+ - Bleu: 16.1093
36
+ - Gen Len: 78.177
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_bleu": 19.7909,
4
- "eval_gen_len": 54.518,
5
- "eval_loss": 1.6466972827911377,
6
- "eval_runtime": 437.9521,
7
- "eval_samples": 1000,
8
- "eval_samples_per_second": 2.283,
9
- "eval_steps_per_second": 0.571,
10
- "predict_bleu": 17.8409,
11
- "predict_gen_len": 62.072,
12
- "predict_loss": 1.6726146936416626,
13
- "predict_runtime": 483.4666,
14
- "predict_samples": 1000,
15
- "predict_samples_per_second": 2.068,
16
- "predict_steps_per_second": 0.517,
17
- "train_loss": 1.6027834024951335,
18
- "train_runtime": 7360.333,
19
- "train_samples": 27895,
20
- "train_samples_per_second": 7.58,
21
- "train_steps_per_second": 2.527
22
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_bleu": 16.1093,
4
+ "eval_gen_len": 78.177,
5
+ "eval_loss": 1.9219088554382324,
6
+ "eval_runtime": 2713.4764,
7
+ "eval_samples": 7000,
8
+ "eval_samples_per_second": 2.58,
9
+ "eval_steps_per_second": 0.43,
10
+ "predict_bleu": 15.6979,
11
+ "predict_gen_len": 76.0034,
12
+ "predict_loss": 1.9471611976623535,
13
+ "predict_runtime": 3162.5874,
14
+ "predict_samples": 7895,
15
+ "predict_samples_per_second": 2.496,
16
+ "predict_steps_per_second": 0.416,
17
+ "train_loss": 1.0640255470939504,
18
+ "train_runtime": 2426.9638,
19
+ "train_samples": 10000,
20
+ "train_samples_per_second": 16.481,
21
+ "train_steps_per_second": 5.495
22
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_bleu": 19.7909,
4
- "eval_gen_len": 54.518,
5
- "eval_loss": 1.6466972827911377,
6
- "eval_runtime": 437.9521,
7
- "eval_samples": 1000,
8
- "eval_samples_per_second": 2.283,
9
- "eval_steps_per_second": 0.571
10
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_bleu": 16.1093,
4
+ "eval_gen_len": 78.177,
5
+ "eval_loss": 1.9219088554382324,
6
+ "eval_runtime": 2713.4764,
7
+ "eval_samples": 7000,
8
+ "eval_samples_per_second": 2.58,
9
+ "eval_steps_per_second": 0.43
10
  }
generation_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "decoder_start_token_id": 0,
5
  "eos_token_id": 2,
 
1
  {
 
2
  "bos_token_id": 1,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 2,
predict_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "predict_bleu": 17.8409,
3
- "predict_gen_len": 62.072,
4
- "predict_loss": 1.6726146936416626,
5
- "predict_runtime": 483.4666,
6
- "predict_samples": 1000,
7
- "predict_samples_per_second": 2.068,
8
- "predict_steps_per_second": 0.517
9
  }
 
1
  {
2
+ "predict_bleu": 15.6979,
3
+ "predict_gen_len": 76.0034,
4
+ "predict_loss": 1.9471611976623535,
5
+ "predict_runtime": 3162.5874,
6
+ "predict_samples": 7895,
7
+ "predict_samples_per_second": 2.496,
8
+ "predict_steps_per_second": 0.416
9
  }
runs/Mar27_17-47-30_abe32346ea06/events.out.tfevents.1711566880.abe32346ea06.4718.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9a3ce996050fa0b7096ab5f66b99c0783b592f79398500f0a55e12417c1453
3
+ size 458
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 1.6027834024951335,
4
- "train_runtime": 7360.333,
5
- "train_samples": 27895,
6
- "train_samples_per_second": 7.58,
7
- "train_steps_per_second": 2.527
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "train_loss": 1.0640255470939504,
4
+ "train_runtime": 2426.9638,
5
+ "train_samples": 10000,
6
+ "train_samples_per_second": 16.481,
7
+ "train_steps_per_second": 5.495
8
  }
trainer_state.json CHANGED
@@ -1,250 +1,212 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 18598,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.05,
13
- "learning_rate": 9.731153887514787e-06,
14
- "loss": 1.8656,
 
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.11,
19
- "learning_rate": 9.462307775029573e-06,
20
- "loss": 1.8079,
 
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.16,
25
- "learning_rate": 9.193461662544361e-06,
26
- "loss": 1.7757,
 
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.22,
31
- "learning_rate": 8.924615550059147e-06,
32
- "loss": 1.7549,
 
33
  "step": 2000
34
  },
35
  {
36
- "epoch": 0.27,
37
- "learning_rate": 8.655769437573934e-06,
38
- "loss": 1.7183,
 
39
  "step": 2500
40
  },
41
  {
42
- "epoch": 0.32,
43
- "learning_rate": 8.38692332508872e-06,
44
- "loss": 1.7325,
 
45
  "step": 3000
46
  },
47
  {
48
- "epoch": 0.38,
49
- "learning_rate": 8.118077212603506e-06,
50
- "loss": 1.7002,
 
51
  "step": 3500
52
  },
53
  {
54
- "epoch": 0.43,
55
- "learning_rate": 7.849231100118292e-06,
56
- "loss": 1.7217,
 
57
  "step": 4000
58
  },
59
  {
60
- "epoch": 0.48,
61
- "learning_rate": 7.580384987633079e-06,
62
- "loss": 1.7195,
 
63
  "step": 4500
64
  },
65
  {
66
- "epoch": 0.54,
67
- "learning_rate": 7.311538875147866e-06,
68
- "loss": 1.6911,
 
69
  "step": 5000
70
  },
71
  {
72
- "epoch": 0.59,
73
- "learning_rate": 7.042692762662653e-06,
74
- "loss": 1.6482,
 
75
  "step": 5500
76
  },
77
  {
78
- "epoch": 0.65,
79
- "learning_rate": 6.773846650177439e-06,
80
- "loss": 1.6911,
 
81
  "step": 6000
82
  },
83
  {
84
- "epoch": 0.7,
85
- "learning_rate": 6.505000537692225e-06,
86
- "loss": 1.6361,
 
87
  "step": 6500
88
  },
89
  {
90
- "epoch": 0.75,
91
- "learning_rate": 6.236154425207011e-06,
92
- "loss": 1.6181,
 
93
  "step": 7000
94
  },
95
  {
96
- "epoch": 0.81,
97
- "learning_rate": 5.967308312721799e-06,
98
- "loss": 1.6496,
 
99
  "step": 7500
100
  },
101
  {
102
- "epoch": 0.86,
103
- "learning_rate": 5.6984622002365855e-06,
104
- "loss": 1.6331,
 
105
  "step": 8000
106
  },
107
  {
108
- "epoch": 0.91,
109
- "learning_rate": 5.429616087751372e-06,
110
- "loss": 1.6057,
 
111
  "step": 8500
112
  },
113
  {
114
- "epoch": 0.97,
115
- "learning_rate": 5.160769975266158e-06,
116
- "loss": 1.6225,
 
117
  "step": 9000
118
  },
119
  {
120
- "epoch": 1.02,
121
- "learning_rate": 4.891923862780945e-06,
122
- "loss": 1.5955,
 
123
  "step": 9500
124
  },
125
  {
126
- "epoch": 1.08,
127
- "learning_rate": 4.623077750295731e-06,
128
- "loss": 1.528,
 
129
  "step": 10000
130
  },
131
  {
132
- "epoch": 1.13,
133
- "learning_rate": 4.354231637810517e-06,
134
- "loss": 1.5348,
 
135
  "step": 10500
136
  },
137
  {
138
- "epoch": 1.18,
139
- "learning_rate": 4.085385525325304e-06,
140
- "loss": 1.5142,
 
141
  "step": 11000
142
  },
143
  {
144
- "epoch": 1.24,
145
- "learning_rate": 3.816539412840091e-06,
146
- "loss": 1.4924,
 
147
  "step": 11500
148
  },
149
  {
150
- "epoch": 1.29,
151
- "learning_rate": 3.547693300354877e-06,
152
- "loss": 1.5045,
 
153
  "step": 12000
154
  },
155
  {
156
- "epoch": 1.34,
157
- "learning_rate": 3.2788471878696636e-06,
158
- "loss": 1.4959,
 
159
  "step": 12500
160
  },
161
  {
162
- "epoch": 1.4,
163
- "learning_rate": 3.01000107538445e-06,
164
- "loss": 1.5248,
 
165
  "step": 13000
166
  },
167
  {
168
- "epoch": 1.45,
169
- "learning_rate": 2.741154962899237e-06,
170
- "loss": 1.4984,
171
- "step": 13500
172
- },
173
- {
174
- "epoch": 1.51,
175
- "learning_rate": 2.4723088504140235e-06,
176
- "loss": 1.5079,
177
- "step": 14000
178
- },
179
- {
180
- "epoch": 1.56,
181
- "learning_rate": 2.2034627379288097e-06,
182
- "loss": 1.5105,
183
- "step": 14500
184
- },
185
- {
186
- "epoch": 1.61,
187
- "learning_rate": 1.9346166254435964e-06,
188
- "loss": 1.5058,
189
- "step": 15000
190
- },
191
- {
192
- "epoch": 1.67,
193
- "learning_rate": 1.6657705129583828e-06,
194
- "loss": 1.4941,
195
- "step": 15500
196
- },
197
- {
198
- "epoch": 1.72,
199
- "learning_rate": 1.3969244004731695e-06,
200
- "loss": 1.5275,
201
- "step": 16000
202
- },
203
- {
204
- "epoch": 1.77,
205
- "learning_rate": 1.1280782879879559e-06,
206
- "loss": 1.5091,
207
- "step": 16500
208
- },
209
- {
210
- "epoch": 1.83,
211
- "learning_rate": 8.592321755027423e-07,
212
- "loss": 1.4995,
213
- "step": 17000
214
- },
215
- {
216
- "epoch": 1.88,
217
- "learning_rate": 5.903860630175289e-07,
218
- "loss": 1.5139,
219
- "step": 17500
220
- },
221
- {
222
- "epoch": 1.94,
223
- "learning_rate": 3.215399505323153e-07,
224
- "loss": 1.4951,
225
- "step": 18000
226
- },
227
- {
228
- "epoch": 1.99,
229
- "learning_rate": 5.2693838047101844e-08,
230
- "loss": 1.478,
231
- "step": 18500
232
- },
233
- {
234
- "epoch": 2.0,
235
- "step": 18598,
236
- "total_flos": 2.094047285686272e+16,
237
- "train_loss": 1.6027834024951335,
238
- "train_runtime": 7360.333,
239
- "train_samples_per_second": 7.58,
240
- "train_steps_per_second": 2.527
241
  }
242
  ],
243
  "logging_steps": 500,
244
- "max_steps": 18598,
245
- "num_train_epochs": 2,
 
246
  "save_steps": 4000,
247
- "total_flos": 2.094047285686272e+16,
 
248
  "trial_name": null,
249
  "trial_params": null
250
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 13336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.15,
13
+ "grad_norm": 5.067293643951416,
14
+ "learning_rate": 9.625074985003e-06,
15
+ "loss": 1.3508,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.3,
20
+ "grad_norm": 6.026179313659668,
21
+ "learning_rate": 9.250149970005999e-06,
22
+ "loss": 1.3055,
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 0.45,
27
+ "grad_norm": 6.276480674743652,
28
+ "learning_rate": 8.875224955008999e-06,
29
+ "loss": 1.2235,
30
  "step": 1500
31
  },
32
  {
33
+ "epoch": 0.6,
34
+ "grad_norm": 5.172286510467529,
35
+ "learning_rate": 8.500299940011997e-06,
36
+ "loss": 1.2637,
37
  "step": 2000
38
  },
39
  {
40
+ "epoch": 0.75,
41
+ "grad_norm": 5.915102958679199,
42
+ "learning_rate": 8.125374925014997e-06,
43
+ "loss": 1.2232,
44
  "step": 2500
45
  },
46
  {
47
+ "epoch": 0.9,
48
+ "grad_norm": 5.094091415405273,
49
+ "learning_rate": 7.750449910017997e-06,
50
+ "loss": 1.1814,
51
  "step": 3000
52
  },
53
  {
54
+ "epoch": 1.05,
55
+ "grad_norm": 4.9657793045043945,
56
+ "learning_rate": 7.375524895020996e-06,
57
+ "loss": 1.1449,
58
  "step": 3500
59
  },
60
  {
61
+ "epoch": 1.2,
62
+ "grad_norm": 5.007996082305908,
63
+ "learning_rate": 7.000599880023996e-06,
64
+ "loss": 1.0923,
65
  "step": 4000
66
  },
67
  {
68
+ "epoch": 1.35,
69
+ "grad_norm": 5.494340896606445,
70
+ "learning_rate": 6.6256748650269955e-06,
71
+ "loss": 1.0908,
72
  "step": 4500
73
  },
74
  {
75
+ "epoch": 1.5,
76
+ "grad_norm": 4.9410271644592285,
77
+ "learning_rate": 6.250749850029995e-06,
78
+ "loss": 1.0804,
79
  "step": 5000
80
  },
81
  {
82
+ "epoch": 1.65,
83
+ "grad_norm": 5.13407564163208,
84
+ "learning_rate": 5.875824835032994e-06,
85
+ "loss": 1.0556,
86
  "step": 5500
87
  },
88
  {
89
+ "epoch": 1.8,
90
+ "grad_norm": 4.397137641906738,
91
+ "learning_rate": 5.500899820035993e-06,
92
+ "loss": 1.0886,
93
  "step": 6000
94
  },
95
  {
96
+ "epoch": 1.95,
97
+ "grad_norm": 5.924018383026123,
98
+ "learning_rate": 5.125974805038992e-06,
99
+ "loss": 1.0694,
100
  "step": 6500
101
  },
102
  {
103
+ "epoch": 2.1,
104
+ "grad_norm": 3.952533006668091,
105
+ "learning_rate": 4.751049790041992e-06,
106
+ "loss": 1.0158,
107
  "step": 7000
108
  },
109
  {
110
+ "epoch": 2.25,
111
+ "grad_norm": 6.275745868682861,
112
+ "learning_rate": 4.376124775044991e-06,
113
+ "loss": 1.0082,
114
  "step": 7500
115
  },
116
  {
117
+ "epoch": 2.4,
118
+ "grad_norm": 7.6413116455078125,
119
+ "learning_rate": 4.001199760047991e-06,
120
+ "loss": 0.9991,
121
  "step": 8000
122
  },
123
  {
124
+ "epoch": 2.55,
125
+ "grad_norm": 5.2266387939453125,
126
+ "learning_rate": 3.6262747450509898e-06,
127
+ "loss": 0.976,
128
  "step": 8500
129
  },
130
  {
131
+ "epoch": 2.7,
132
+ "grad_norm": 4.824028968811035,
133
+ "learning_rate": 3.2513497300539893e-06,
134
+ "loss": 0.9826,
135
  "step": 9000
136
  },
137
  {
138
+ "epoch": 2.85,
139
+ "grad_norm": 7.193837642669678,
140
+ "learning_rate": 2.876424715056989e-06,
141
+ "loss": 0.9731,
142
  "step": 9500
143
  },
144
  {
145
+ "epoch": 3.0,
146
+ "grad_norm": 6.571595191955566,
147
+ "learning_rate": 2.5014997000599884e-06,
148
+ "loss": 1.0054,
149
  "step": 10000
150
  },
151
  {
152
+ "epoch": 3.15,
153
+ "grad_norm": 4.61974573135376,
154
+ "learning_rate": 2.1265746850629876e-06,
155
+ "loss": 0.9583,
156
  "step": 10500
157
  },
158
  {
159
+ "epoch": 3.3,
160
+ "grad_norm": 5.337657451629639,
161
+ "learning_rate": 1.751649670065987e-06,
162
+ "loss": 0.9296,
163
  "step": 11000
164
  },
165
  {
166
+ "epoch": 3.45,
167
+ "grad_norm": 5.575818061828613,
168
+ "learning_rate": 1.3767246550689864e-06,
169
+ "loss": 0.9576,
170
  "step": 11500
171
  },
172
  {
173
+ "epoch": 3.6,
174
+ "grad_norm": 6.3436431884765625,
175
+ "learning_rate": 1.0017996400719856e-06,
176
+ "loss": 0.9241,
177
  "step": 12000
178
  },
179
  {
180
+ "epoch": 3.75,
181
+ "grad_norm": 3.276711940765381,
182
+ "learning_rate": 6.26874625074985e-07,
183
+ "loss": 0.9307,
184
  "step": 12500
185
  },
186
  {
187
+ "epoch": 3.9,
188
+ "grad_norm": 5.67604923248291,
189
+ "learning_rate": 2.519496100779844e-07,
190
+ "loss": 0.9255,
191
  "step": 13000
192
  },
193
  {
194
+ "epoch": 4.0,
195
+ "step": 13336,
196
+ "total_flos": 1.66894187000832e+16,
197
+ "train_loss": 1.0640255470939504,
198
+ "train_runtime": 2426.9638,
199
+ "train_samples_per_second": 16.481,
200
+ "train_steps_per_second": 5.495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 500,
204
+ "max_steps": 13336,
205
+ "num_input_tokens_seen": 0,
206
+ "num_train_epochs": 4,
207
  "save_steps": 4000,
208
+ "total_flos": 1.66894187000832e+16,
209
+ "train_batch_size": 3,
210
  "trial_name": null,
211
  "trial_params": null
212
  }