tiagoblima commited on
Commit
2c96f9f
1 Parent(s): 5769ed6

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -1
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. train_results.json +5 -5
  5. trainer_state.json +92 -44
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: unicamp-dl/ptt5-small-t5-vocab
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: t5_small-qg-aap
8
  results: []
@@ -13,7 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # t5_small-qg-aap
15
 
16
- This model is a fine-tuned version of [unicamp-dl/ptt5-small-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-small-t5-vocab) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: unicamp-dl/ptt5-small-t5-vocab
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tiagoblima/qg_squad_v1_pt
8
  model-index:
9
  - name: t5_small-qg-aap
10
  results: []
 
15
 
16
  # t5_small-qg-aap
17
 
18
+ This model is a fine-tuned version of [unicamp-dl/ptt5-small-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-small-t5-vocab) on the tiagoblima/qg_squad_v1_pt dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 1.4213
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 1.4470528364181519,
4
- "eval_runtime": 77.4953,
5
  "eval_samples": 8869,
6
- "eval_samples_per_second": 114.446,
7
- "eval_steps_per_second": 14.311,
8
- "train_loss": 1.2936539687732658,
9
- "train_runtime": 11629.1597,
10
  "train_samples": 51704,
11
- "train_samples_per_second": 44.461,
12
- "train_steps_per_second": 0.695
13
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_loss": 1.4212826490402222,
4
+ "eval_runtime": 78.3846,
5
  "eval_samples": 8869,
6
+ "eval_samples_per_second": 113.147,
7
+ "eval_steps_per_second": 14.148,
8
+ "train_loss": 1.1541598279090604,
9
+ "train_runtime": 16985.3317,
10
  "train_samples": 51704,
11
+ "train_samples_per_second": 45.661,
12
+ "train_steps_per_second": 0.714
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 1.4470528364181519,
4
- "eval_runtime": 77.4953,
5
  "eval_samples": 8869,
6
- "eval_samples_per_second": 114.446,
7
- "eval_steps_per_second": 14.311
8
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_loss": 1.4212826490402222,
4
+ "eval_runtime": 78.3846,
5
  "eval_samples": 8869,
6
+ "eval_samples_per_second": 113.147,
7
+ "eval_steps_per_second": 14.148
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 1.2936539687732658,
4
- "train_runtime": 11629.1597,
5
  "train_samples": 51704,
6
- "train_samples_per_second": 44.461,
7
- "train_steps_per_second": 0.695
8
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "train_loss": 1.1541598279090604,
4
+ "train_runtime": 16985.3317,
5
  "train_samples": 51704,
6
+ "train_samples_per_second": 45.661,
7
+ "train_steps_per_second": 0.714
8
  }
trainer_state.json CHANGED
@@ -1,124 +1,172 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 8080,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.62,
13
- "learning_rate": 9.381188118811881e-05,
14
- "loss": 1.7918,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.24,
19
- "learning_rate": 8.762376237623763e-05,
20
- "loss": 1.4547,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 1.86,
25
- "learning_rate": 8.143564356435644e-05,
26
- "loss": 1.3849,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 2.48,
31
- "learning_rate": 7.524752475247526e-05,
32
- "loss": 1.3378,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 3.09,
37
- "learning_rate": 6.905940594059406e-05,
38
- "loss": 1.3125,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 3.71,
43
- "learning_rate": 6.287128712871287e-05,
44
- "loss": 1.2841,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 4.33,
49
- "learning_rate": 5.668316831683168e-05,
50
- "loss": 1.2655,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 4.95,
55
- "learning_rate": 5.0495049504950497e-05,
56
- "loss": 1.2478,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 5.57,
61
- "learning_rate": 4.430693069306931e-05,
62
- "loss": 1.2333,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 6.19,
67
- "learning_rate": 3.811881188118812e-05,
68
- "loss": 1.2248,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 6.81,
73
- "learning_rate": 3.1930693069306936e-05,
74
- "loss": 1.2144,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 7.43,
79
- "learning_rate": 2.5742574257425746e-05,
80
- "loss": 1.1982,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 8.04,
85
- "learning_rate": 1.9554455445544556e-05,
86
- "loss": 1.2003,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 8.66,
91
- "learning_rate": 1.3366336633663367e-05,
92
- "loss": 1.19,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 9.28,
97
- "learning_rate": 7.178217821782178e-06,
98
- "loss": 1.1888,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 9.9,
103
- "learning_rate": 9.900990099009902e-07,
104
- "loss": 1.1881,
105
  "step": 8000
106
  },
107
  {
108
- "epoch": 10.0,
109
- "step": 8080,
110
- "total_flos": 5.248284377481216e+16,
111
- "train_loss": 1.2936539687732658,
112
- "train_runtime": 11629.1597,
113
- "train_samples_per_second": 44.461,
114
- "train_steps_per_second": 0.695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  }
116
  ],
117
  "logging_steps": 500,
118
- "max_steps": 8080,
119
- "num_train_epochs": 10,
120
  "save_steps": 500,
121
- "total_flos": 5.248284377481216e+16,
122
  "trial_name": null,
123
  "trial_params": null
124
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
  "eval_steps": 500,
6
+ "global_step": 12120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.62,
13
+ "learning_rate": 9.690594059405941e-05,
14
+ "loss": 1.7898,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.24,
19
+ "learning_rate": 9.174917491749175e-05,
20
+ "loss": 1.4524,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 1.86,
25
+ "learning_rate": 8.762376237623763e-05,
26
+ "loss": 1.3819,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 2.48,
31
+ "learning_rate": 8.34983498349835e-05,
32
+ "loss": 1.3341,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 3.09,
37
+ "learning_rate": 7.937293729372938e-05,
38
+ "loss": 1.3073,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 3.71,
43
+ "learning_rate": 7.524752475247526e-05,
44
+ "loss": 1.2771,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 4.33,
49
+ "learning_rate": 7.112211221122112e-05,
50
+ "loss": 1.2567,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 4.95,
55
+ "learning_rate": 6.6996699669967e-05,
56
+ "loss": 1.2378,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 5.57,
61
+ "learning_rate": 6.287128712871287e-05,
62
+ "loss": 1.2201,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 6.19,
67
+ "learning_rate": 5.874587458745875e-05,
68
+ "loss": 1.2095,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 6.81,
73
+ "learning_rate": 5.462046204620462e-05,
74
+ "loss": 1.1963,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 7.43,
79
+ "learning_rate": 5.0495049504950497e-05,
80
+ "loss": 1.1768,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 8.04,
85
+ "learning_rate": 4.636963696369637e-05,
86
+ "loss": 1.1763,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 8.66,
91
+ "learning_rate": 4.224422442244225e-05,
92
+ "loss": 1.1606,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 9.28,
97
+ "learning_rate": 3.811881188118812e-05,
98
+ "loss": 1.156,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 9.9,
103
+ "learning_rate": 3.3993399339933996e-05,
104
+ "loss": 1.1516,
105
  "step": 8000
106
  },
107
  {
108
+ "epoch": 10.52,
109
+ "learning_rate": 2.986798679867987e-05,
110
+ "loss": 1.1371,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 11.14,
115
+ "learning_rate": 2.5742574257425746e-05,
116
+ "loss": 1.1417,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 11.76,
121
+ "learning_rate": 2.161716171617162e-05,
122
+ "loss": 1.1285,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 12.38,
127
+ "learning_rate": 1.7491749174917492e-05,
128
+ "loss": 1.1302,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 13.0,
133
+ "learning_rate": 1.3366336633663367e-05,
134
+ "loss": 1.1223,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 13.61,
139
+ "learning_rate": 9.24092409240924e-06,
140
+ "loss": 1.1192,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 14.23,
145
+ "learning_rate": 5.115511551155116e-06,
146
+ "loss": 1.122,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 14.85,
151
+ "learning_rate": 9.900990099009902e-07,
152
+ "loss": 1.1116,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 15.0,
157
+ "step": 12120,
158
+ "total_flos": 7.872426566221824e+16,
159
+ "train_loss": 1.1541598279090604,
160
+ "train_runtime": 16985.3317,
161
+ "train_samples_per_second": 45.661,
162
+ "train_steps_per_second": 0.714
163
  }
164
  ],
165
  "logging_steps": 500,
166
+ "max_steps": 12120,
167
+ "num_train_epochs": 15,
168
  "save_steps": 500,
169
+ "total_flos": 7.872426566221824e+16,
170
  "trial_name": null,
171
  "trial_params": null
172
  }