jpodivin commited on
Commit
a12f0d6
1 Parent(s): fa1a429

End of training

Browse files
Files changed (5) hide show
  1. README.md +14 -2
  2. all_results.json +14 -14
  3. eval_results.json +10 -10
  4. train_results.json +5 -5
  5. trainer_state.json +142 -12
README.md CHANGED
@@ -3,11 +3,23 @@ license: apache-2.0
3
  base_model: google-t5/t5-small
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - rouge
8
  model-index:
9
  - name: pep_summarization
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +27,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # pep_summarization
17
 
18
- This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.1481
21
  - Rouge1: 72.899
 
3
  base_model: google-t5/t5-small
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - fedora-copr/pep-sum
8
  metrics:
9
  - rouge
10
  model-index:
11
  - name: pep_summarization
12
+ results:
13
+ - task:
14
+ name: Summarization
15
+ type: summarization
16
+ dataset:
17
+ name: fedora-copr/pep-sum
18
+ type: fedora-copr/pep-sum
19
+ metrics:
20
+ - name: Rouge1
21
+ type: rouge
22
+ value: 72.899
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
27
 
28
  # pep_summarization
29
 
30
+ This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on the fedora-copr/pep-sum dataset.
31
  It achieves the following results on the evaluation set:
32
  - Loss: 0.1481
33
  - Rouge1: 72.899
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_gen_len": 58.20289855072464,
4
- "eval_loss": 0.143265962600708,
5
- "eval_rouge1": 80.2131,
6
- "eval_rouge2": 78.9792,
7
- "eval_rougeL": 80.1239,
8
- "eval_rougeLsum": 80.028,
9
- "eval_runtime": 4.3293,
10
  "eval_samples": 69,
11
- "eval_samples_per_second": 15.938,
12
- "eval_steps_per_second": 4.158,
13
- "train_loss": 0.26120923913043476,
14
- "train_runtime": 13.4481,
15
  "train_samples": 276,
16
- "train_samples_per_second": 61.57,
17
- "train_steps_per_second": 15.392
18
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_gen_len": 63.768115942028984,
4
+ "eval_loss": 0.1480671912431717,
5
+ "eval_rouge1": 72.899,
6
+ "eval_rouge2": 70.9929,
7
+ "eval_rougeL": 72.3728,
8
+ "eval_rougeLsum": 72.4923,
9
+ "eval_runtime": 2.9382,
10
  "eval_samples": 69,
11
+ "eval_samples_per_second": 23.484,
12
+ "eval_steps_per_second": 3.063,
13
+ "train_loss": 0.2781148420061384,
14
+ "train_runtime": 69.51,
15
  "train_samples": 276,
16
+ "train_samples_per_second": 39.707,
17
+ "train_steps_per_second": 5.035
18
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_gen_len": 58.20289855072464,
4
- "eval_loss": 0.143265962600708,
5
- "eval_rouge1": 80.2131,
6
- "eval_rouge2": 78.9792,
7
- "eval_rougeL": 80.1239,
8
- "eval_rougeLsum": 80.028,
9
- "eval_runtime": 4.3293,
10
  "eval_samples": 69,
11
- "eval_samples_per_second": 15.938,
12
- "eval_steps_per_second": 4.158
13
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_gen_len": 63.768115942028984,
4
+ "eval_loss": 0.1480671912431717,
5
+ "eval_rouge1": 72.899,
6
+ "eval_rouge2": 70.9929,
7
+ "eval_rougeL": 72.3728,
8
+ "eval_rougeLsum": 72.4923,
9
+ "eval_runtime": 2.9382,
10
  "eval_samples": 69,
11
+ "eval_samples_per_second": 23.484,
12
+ "eval_steps_per_second": 3.063
13
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 0.26120923913043476,
4
- "train_runtime": 13.4481,
5
  "train_samples": 276,
6
- "train_samples_per_second": 61.57,
7
- "train_steps_per_second": 15.392
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.2781148420061384,
4
+ "train_runtime": 69.51,
5
  "train_samples": 276,
6
+ "train_samples_per_second": 39.707,
7
+ "train_steps_per_second": 5.035
8
  }
trainer_state.json CHANGED
@@ -1,30 +1,160 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 207,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 3.0,
13
- "step": 207,
14
- "total_flos": 224126023237632.0,
15
- "train_loss": 0.26120923913043476,
16
- "train_runtime": 13.4481,
17
- "train_samples_per_second": 61.57,
18
- "train_steps_per_second": 15.392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 207,
23
  "num_input_tokens_seen": 0,
24
- "num_train_epochs": 3,
25
  "save_steps": 500,
26
- "total_flos": 224126023237632.0,
27
- "train_batch_size": 4,
28
  "trial_name": null,
29
  "trial_params": null
30
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 81.08695652173913,
14
+ "eval_loss": 0.4794468879699707,
15
+ "eval_rouge1": 53.7599,
16
+ "eval_rouge2": 44.6918,
17
+ "eval_rougeL": 49.173,
18
+ "eval_rougeLsum": 51.6067,
19
+ "eval_runtime": 3.2476,
20
+ "eval_samples_per_second": 21.247,
21
+ "eval_steps_per_second": 2.771,
22
+ "step": 35
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_gen_len": 67.78260869565217,
27
+ "eval_loss": 0.29988083243370056,
28
+ "eval_rouge1": 61.8274,
29
+ "eval_rouge2": 55.9578,
30
+ "eval_rougeL": 59.2336,
31
+ "eval_rougeLsum": 60.4066,
32
+ "eval_runtime": 3.0007,
33
+ "eval_samples_per_second": 22.995,
34
+ "eval_steps_per_second": 2.999,
35
+ "step": 70
36
+ },
37
  {
38
  "epoch": 3.0,
39
+ "eval_gen_len": 63.0,
40
+ "eval_loss": 0.23501555621623993,
41
+ "eval_rouge1": 64.2495,
42
+ "eval_rouge2": 60.3342,
43
+ "eval_rougeL": 62.721,
44
+ "eval_rougeLsum": 63.73,
45
+ "eval_runtime": 2.821,
46
+ "eval_samples_per_second": 24.46,
47
+ "eval_steps_per_second": 3.19,
48
+ "step": 105
49
+ },
50
+ {
51
+ "epoch": 4.0,
52
+ "eval_gen_len": 64.23188405797102,
53
+ "eval_loss": 0.20167909562587738,
54
+ "eval_rouge1": 64.2686,
55
+ "eval_rouge2": 60.2656,
56
+ "eval_rougeL": 62.6414,
57
+ "eval_rougeLsum": 63.6299,
58
+ "eval_runtime": 2.8218,
59
+ "eval_samples_per_second": 24.452,
60
+ "eval_steps_per_second": 3.189,
61
+ "step": 140
62
+ },
63
+ {
64
+ "epoch": 5.0,
65
+ "eval_gen_len": 60.405797101449274,
66
+ "eval_loss": 0.1814391165971756,
67
+ "eval_rouge1": 71.4025,
68
+ "eval_rouge2": 68.7633,
69
+ "eval_rougeL": 70.366,
70
+ "eval_rougeLsum": 70.7136,
71
+ "eval_runtime": 2.7953,
72
+ "eval_samples_per_second": 24.684,
73
+ "eval_steps_per_second": 3.22,
74
+ "step": 175
75
+ },
76
+ {
77
+ "epoch": 6.0,
78
+ "eval_gen_len": 61.594202898550726,
79
+ "eval_loss": 0.1666056513786316,
80
+ "eval_rouge1": 70.4243,
81
+ "eval_rouge2": 67.771,
82
+ "eval_rougeL": 69.4202,
83
+ "eval_rougeLsum": 69.8005,
84
+ "eval_runtime": 2.7382,
85
+ "eval_samples_per_second": 25.199,
86
+ "eval_steps_per_second": 3.287,
87
+ "step": 210
88
+ },
89
+ {
90
+ "epoch": 7.0,
91
+ "eval_gen_len": 62.20289855072464,
92
+ "eval_loss": 0.15789227187633514,
93
+ "eval_rouge1": 71.1609,
94
+ "eval_rouge2": 68.4618,
95
+ "eval_rougeL": 70.0878,
96
+ "eval_rougeLsum": 70.5446,
97
+ "eval_runtime": 2.7285,
98
+ "eval_samples_per_second": 25.289,
99
+ "eval_steps_per_second": 3.299,
100
+ "step": 245
101
+ },
102
+ {
103
+ "epoch": 8.0,
104
+ "eval_gen_len": 64.30434782608695,
105
+ "eval_loss": 0.15203262865543365,
106
+ "eval_rouge1": 72.3025,
107
+ "eval_rouge2": 69.9693,
108
+ "eval_rougeL": 71.422,
109
+ "eval_rougeLsum": 71.7157,
110
+ "eval_runtime": 2.9693,
111
+ "eval_samples_per_second": 23.238,
112
+ "eval_steps_per_second": 3.031,
113
+ "step": 280
114
+ },
115
+ {
116
+ "epoch": 9.0,
117
+ "eval_gen_len": 63.768115942028984,
118
+ "eval_loss": 0.14907658100128174,
119
+ "eval_rouge1": 72.899,
120
+ "eval_rouge2": 70.9929,
121
+ "eval_rougeL": 72.3728,
122
+ "eval_rougeLsum": 72.4923,
123
+ "eval_runtime": 3.0097,
124
+ "eval_samples_per_second": 22.926,
125
+ "eval_steps_per_second": 2.99,
126
+ "step": 315
127
+ },
128
+ {
129
+ "epoch": 10.0,
130
+ "eval_gen_len": 63.768115942028984,
131
+ "eval_loss": 0.1480671912431717,
132
+ "eval_rouge1": 72.899,
133
+ "eval_rouge2": 70.9929,
134
+ "eval_rougeL": 72.3728,
135
+ "eval_rougeLsum": 72.4923,
136
+ "eval_runtime": 2.988,
137
+ "eval_samples_per_second": 23.092,
138
+ "eval_steps_per_second": 3.012,
139
+ "step": 350
140
+ },
141
+ {
142
+ "epoch": 10.0,
143
+ "step": 350,
144
+ "total_flos": 747086744125440.0,
145
+ "train_loss": 0.2781148420061384,
146
+ "train_runtime": 69.51,
147
+ "train_samples_per_second": 39.707,
148
+ "train_steps_per_second": 5.035
149
  }
150
  ],
151
  "logging_steps": 500,
152
+ "max_steps": 350,
153
  "num_input_tokens_seen": 0,
154
+ "num_train_epochs": 10,
155
  "save_steps": 500,
156
+ "total_flos": 747086744125440.0,
157
+ "train_batch_size": 8,
158
  "trial_name": null,
159
  "trial_params": null
160
  }