jpodivin commited on
Commit
4747369
1 Parent(s): 46ea677

End of training

Browse files
Files changed (5) hide show
  1. README.md +14 -2
  2. all_results.json +13 -13
  3. eval_results.json +9 -9
  4. train_results.json +4 -4
  5. trainer_state.json +51 -51
README.md CHANGED
@@ -3,11 +3,23 @@ license: mit
3
  base_model: facebook/bart-large-cnn
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - rouge
8
  model-index:
9
  - name: pep_summarization
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +27,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # pep_summarization
17
 
18
- This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.1242
21
  - Rouge1: 75.3806
 
3
  base_model: facebook/bart-large-cnn
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - fedora-copr/pep-sum
8
  metrics:
9
  - rouge
10
  model-index:
11
  - name: pep_summarization
12
+ results:
13
+ - task:
14
+ name: Summarization
15
+ type: summarization
16
+ dataset:
17
+ name: fedora-copr/pep-sum
18
+ type: fedora-copr/pep-sum
19
+ metrics:
20
+ - name: Rouge1
21
+ type: rouge
22
+ value: 75.3806
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
27
 
28
  # pep_summarization
29
 
30
+ This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on the fedora-copr/pep-sum dataset.
31
  It achieves the following results on the evaluation set:
32
  - Loss: 0.1242
33
  - Rouge1: 75.3806
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_gen_len": 68.04347826086956,
4
- "eval_loss": 0.9288389682769775,
5
- "eval_rouge1": 39.7591,
6
- "eval_rouge2": 24.1983,
7
- "eval_rougeL": 32.2869,
8
- "eval_rougeLsum": 36.1493,
9
- "eval_runtime": 9.8095,
10
  "eval_samples": 69,
11
- "eval_samples_per_second": 7.034,
12
- "eval_steps_per_second": 1.835,
13
- "train_loss": 0.8950336843297102,
14
- "train_runtime": 114.301,
15
  "train_samples": 276,
16
- "train_samples_per_second": 12.073,
17
- "train_steps_per_second": 3.018
18
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_gen_len": 85.31884057971014,
4
+ "eval_loss": 0.12416736036539078,
5
+ "eval_rouge1": 75.3806,
6
+ "eval_rouge2": 74.6735,
7
+ "eval_rougeL": 75.5866,
8
+ "eval_rougeLsum": 75.5446,
9
+ "eval_runtime": 11.3295,
10
  "eval_samples": 69,
11
+ "eval_samples_per_second": 6.09,
12
+ "eval_steps_per_second": 1.589,
13
+ "train_loss": 0.036720043679942256,
14
+ "train_runtime": 132.663,
15
  "train_samples": 276,
16
+ "train_samples_per_second": 10.402,
17
+ "train_steps_per_second": 2.601
18
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_gen_len": 68.04347826086956,
4
- "eval_loss": 0.9288389682769775,
5
- "eval_rouge1": 39.7591,
6
- "eval_rouge2": 24.1983,
7
- "eval_rougeL": 32.2869,
8
- "eval_rougeLsum": 36.1493,
9
- "eval_runtime": 9.8095,
10
  "eval_samples": 69,
11
- "eval_samples_per_second": 7.034,
12
- "eval_steps_per_second": 1.835
13
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_gen_len": 85.31884057971014,
4
+ "eval_loss": 0.12416736036539078,
5
+ "eval_rouge1": 75.3806,
6
+ "eval_rouge2": 74.6735,
7
+ "eval_rougeL": 75.5866,
8
+ "eval_rougeLsum": 75.5446,
9
+ "eval_runtime": 11.3295,
10
  "eval_samples": 69,
11
+ "eval_samples_per_second": 6.09,
12
+ "eval_steps_per_second": 1.589
13
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.8950336843297102,
4
- "train_runtime": 114.301,
5
  "train_samples": 276,
6
- "train_samples_per_second": 12.073,
7
- "train_steps_per_second": 3.018
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.036720043679942256,
4
+ "train_runtime": 132.663,
5
  "train_samples": 276,
6
+ "train_samples_per_second": 10.402,
7
+ "train_steps_per_second": 2.601
8
  }
trainer_state.json CHANGED
@@ -10,77 +10,77 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_gen_len": 68.04347826086956,
14
- "eval_loss": 0.9288747310638428,
15
- "eval_rouge1": 39.7591,
16
- "eval_rouge2": 24.1983,
17
- "eval_rougeL": 32.2869,
18
- "eval_rougeLsum": 36.1493,
19
- "eval_runtime": 9.3009,
20
- "eval_samples_per_second": 7.419,
21
- "eval_steps_per_second": 1.935,
22
  "step": 69
23
  },
24
  {
25
  "epoch": 2.0,
26
- "eval_gen_len": 68.04347826086956,
27
- "eval_loss": 0.9288538694381714,
28
- "eval_rouge1": 39.7591,
29
- "eval_rouge2": 24.1983,
30
- "eval_rougeL": 32.2869,
31
- "eval_rougeLsum": 36.1493,
32
- "eval_runtime": 9.2625,
33
- "eval_samples_per_second": 7.449,
34
- "eval_steps_per_second": 1.943,
35
  "step": 138
36
  },
37
  {
38
  "epoch": 3.0,
39
- "eval_gen_len": 68.04347826086956,
40
- "eval_loss": 0.9288433790206909,
41
- "eval_rouge1": 39.7591,
42
- "eval_rouge2": 24.1983,
43
- "eval_rougeL": 32.2869,
44
- "eval_rougeLsum": 36.1493,
45
- "eval_runtime": 9.3044,
46
- "eval_samples_per_second": 7.416,
47
- "eval_steps_per_second": 1.935,
48
  "step": 207
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_gen_len": 68.04347826086956,
53
- "eval_loss": 0.9288394451141357,
54
- "eval_rouge1": 39.7591,
55
- "eval_rouge2": 24.1983,
56
- "eval_rougeL": 32.2869,
57
- "eval_rougeLsum": 36.1493,
58
- "eval_runtime": 9.3367,
59
- "eval_samples_per_second": 7.39,
60
- "eval_steps_per_second": 1.928,
61
  "step": 276
62
  },
63
  {
64
  "epoch": 5.0,
65
- "eval_gen_len": 68.04347826086956,
66
- "eval_loss": 0.9288389682769775,
67
- "eval_rouge1": 39.7591,
68
- "eval_rouge2": 24.1983,
69
- "eval_rougeL": 32.2869,
70
- "eval_rougeLsum": 36.1493,
71
- "eval_runtime": 9.2668,
72
- "eval_samples_per_second": 7.446,
73
- "eval_steps_per_second": 1.942,
74
  "step": 345
75
  },
76
  {
77
  "epoch": 5.0,
78
  "step": 345,
79
- "total_flos": 1680723777945600.0,
80
- "train_loss": 0.8950336843297102,
81
- "train_runtime": 114.301,
82
- "train_samples_per_second": 12.073,
83
- "train_steps_per_second": 3.018
84
  }
85
  ],
86
  "logging_steps": 500,
@@ -88,7 +88,7 @@
88
  "num_input_tokens_seen": 0,
89
  "num_train_epochs": 5,
90
  "save_steps": 500,
91
- "total_flos": 1680723777945600.0,
92
  "train_batch_size": 4,
93
  "trial_name": null,
94
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_gen_len": 95.44927536231884,
14
+ "eval_loss": 0.09570997208356857,
15
+ "eval_rouge1": 72.6601,
16
+ "eval_rouge2": 71.6824,
17
+ "eval_rougeL": 72.6858,
18
+ "eval_rougeLsum": 72.4668,
19
+ "eval_runtime": 12.0064,
20
+ "eval_samples_per_second": 5.747,
21
+ "eval_steps_per_second": 1.499,
22
  "step": 69
23
  },
24
  {
25
  "epoch": 2.0,
26
+ "eval_gen_len": 92.01449275362319,
27
+ "eval_loss": 0.13447459042072296,
28
+ "eval_rouge1": 75.0063,
29
+ "eval_rouge2": 74.0782,
30
+ "eval_rougeL": 75.0597,
31
+ "eval_rougeLsum": 74.8943,
32
+ "eval_runtime": 11.945,
33
+ "eval_samples_per_second": 5.776,
34
+ "eval_steps_per_second": 1.507,
35
  "step": 138
36
  },
37
  {
38
  "epoch": 3.0,
39
+ "eval_gen_len": 85.46376811594203,
40
+ "eval_loss": 0.14119356870651245,
41
+ "eval_rouge1": 75.3012,
42
+ "eval_rouge2": 74.5492,
43
+ "eval_rougeL": 75.4246,
44
+ "eval_rougeLsum": 75.324,
45
+ "eval_runtime": 10.9494,
46
+ "eval_samples_per_second": 6.302,
47
+ "eval_steps_per_second": 1.644,
48
  "step": 207
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_gen_len": 85.04347826086956,
53
+ "eval_loss": 0.10889122635126114,
54
+ "eval_rouge1": 74.8426,
55
+ "eval_rouge2": 74.0317,
56
+ "eval_rougeL": 74.8939,
57
+ "eval_rougeLsum": 74.8128,
58
+ "eval_runtime": 11.2109,
59
+ "eval_samples_per_second": 6.155,
60
+ "eval_steps_per_second": 1.606,
61
  "step": 276
62
  },
63
  {
64
  "epoch": 5.0,
65
+ "eval_gen_len": 85.31884057971014,
66
+ "eval_loss": 0.12416736036539078,
67
+ "eval_rouge1": 75.3806,
68
+ "eval_rouge2": 74.6735,
69
+ "eval_rougeL": 75.5866,
70
+ "eval_rougeLsum": 75.5446,
71
+ "eval_runtime": 11.151,
72
+ "eval_samples_per_second": 6.188,
73
+ "eval_steps_per_second": 1.614,
74
  "step": 345
75
  },
76
  {
77
  "epoch": 5.0,
78
  "step": 345,
79
+ "total_flos": 2990604350914560.0,
80
+ "train_loss": 0.036720043679942256,
81
+ "train_runtime": 132.663,
82
+ "train_samples_per_second": 10.402,
83
+ "train_steps_per_second": 2.601
84
  }
85
  ],
86
  "logging_steps": 500,
 
88
  "num_input_tokens_seen": 0,
89
  "num_train_epochs": 5,
90
  "save_steps": 500,
91
+ "total_flos": 2990604350914560.0,
92
  "train_batch_size": 4,
93
  "trial_name": null,
94
  "trial_params": null