spacemanidol commited on
Commit
db47a13
1 Parent(s): 4360c45

Upload 13 files

Browse files
README.md CHANGED
@@ -2,7 +2,7 @@
2
  tags:
3
  - generated_from_trainer
4
  datasets:
5
- - xsum
6
  metrics:
7
  - rouge
8
  model-index:
@@ -12,15 +12,15 @@ model-index:
12
  name: Summarization
13
  type: summarization
14
  dataset:
15
- name: xsum
16
- type: xsum
17
- config: default
18
  split: validation
19
- args: default
20
  metrics:
21
  - name: Rouge1
22
  type: rouge
23
- value: 33.469
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -28,14 +28,14 @@ should probably proofread and complete it, then remove this comment. -->
28
 
29
  # small-6-4
30
 
31
- This model is a fine-tuned version of [x/small-6-4/](https://huggingface.co/x/small-6-4/) on the xsum dataset.
32
  It achieves the following results on the evaluation set:
33
- - Loss: 2.2026
34
- - Rouge1: 33.469
35
- - Rouge2: 11.4324
36
- - Rougel: 26.6495
37
- - Rougelsum: 26.6397
38
- - Gen Len: 27.4027
39
 
40
  ## Model description
41
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
5
+ - cnn_dailymail
6
  metrics:
7
  - rouge
8
  model-index:
 
12
  name: Summarization
13
  type: summarization
14
  dataset:
15
+ name: cnn_dailymail 3.0.0
16
+ type: cnn_dailymail
17
+ config: 3.0.0
18
  split: validation
19
+ args: 3.0.0
20
  metrics:
21
  - name: Rouge1
22
  type: rouge
23
+ value: 38.7509
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
28
 
29
  # small-6-4
30
 
31
+ This model is a fine-tuned version of [cnn/small-6-4/](https://huggingface.co/cnn/small-6-4/) on the cnn_dailymail 3.0.0 dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 1.7743
34
+ - Rouge1: 38.7509
35
+ - Rouge2: 17.2661
36
+ - Rougel: 27.9055
37
+ - Rougelsum: 36.0129
38
+ - Gen Len: 78.6349
39
 
40
  ## Model description
41
 
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_gen_len": 27.402666195815307,
4
- "eval_loss": 2.2025928497314453,
5
- "eval_rouge1": 33.469,
6
- "eval_rouge2": 11.4324,
7
- "eval_rougeL": 26.6495,
8
- "eval_rougeLsum": 26.6397,
9
- "eval_runtime": 804.3546,
10
- "eval_samples": 11327,
11
- "eval_samples_per_second": 14.082,
12
- "eval_steps_per_second": 3.521,
13
- "train_loss": 2.512529566078278,
14
- "train_runtime": 20397.6049,
15
- "train_samples": 204017,
16
- "train_samples_per_second": 30.006,
17
- "train_steps_per_second": 0.469
18
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_gen_len": 78.63487432675045,
4
+ "eval_loss": 1.7743135690689087,
5
+ "eval_rouge1": 38.7509,
6
+ "eval_rouge2": 17.2661,
7
+ "eval_rougeL": 27.9055,
8
+ "eval_rougeLsum": 36.0129,
9
+ "eval_runtime": 2205.6092,
10
+ "eval_samples": 13368,
11
+ "eval_samples_per_second": 6.061,
12
+ "eval_steps_per_second": 1.515,
13
+ "train_loss": 2.0691928055687137,
14
+ "train_runtime": 37382.5344,
15
+ "train_samples": 287113,
16
+ "train_samples_per_second": 23.041,
17
+ "train_steps_per_second": 0.36
18
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "x/small-6-4/",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "cnn/small-6-4/",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_gen_len": 27.402666195815307,
4
- "eval_loss": 2.2025928497314453,
5
- "eval_rouge1": 33.469,
6
- "eval_rouge2": 11.4324,
7
- "eval_rougeL": 26.6495,
8
- "eval_rougeLsum": 26.6397,
9
- "eval_runtime": 804.3546,
10
- "eval_samples": 11327,
11
- "eval_samples_per_second": 14.082,
12
- "eval_steps_per_second": 3.521
13
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_gen_len": 78.63487432675045,
4
+ "eval_loss": 1.7743135690689087,
5
+ "eval_rouge1": 38.7509,
6
+ "eval_rouge2": 17.2661,
7
+ "eval_rougeL": 27.9055,
8
+ "eval_rougeLsum": 36.0129,
9
+ "eval_runtime": 2205.6092,
10
+ "eval_samples": 13368,
11
+ "eval_samples_per_second": 6.061,
12
+ "eval_steps_per_second": 1.515
13
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af62300b698f4a3082b385d35ad102ffccf97bdc3245360855c3e36bcf5209c2
3
  size 270128413
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:830711ceac3c5b7638d8e427e46c75c0e1b7f83fc5d76de8331d38c4a54f7633
3
  size 270128413
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 2.512529566078278,
4
- "train_runtime": 20397.6049,
5
- "train_samples": 204017,
6
- "train_samples_per_second": 30.006,
7
- "train_steps_per_second": 0.469
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 2.0691928055687137,
4
+ "train_runtime": 37382.5344,
5
+ "train_samples": 287113,
6
+ "train_samples_per_second": 23.041,
7
+ "train_steps_per_second": 0.36
8
  }
trainer_state.json CHANGED
@@ -1,139 +1,181 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.999745123027154,
5
- "global_step": 9561,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.16,
12
  "learning_rate": 0.0001,
13
- "loss": 2.7302,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.31,
18
  "learning_rate": 0.0001,
19
- "loss": 2.6441,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 0.47,
24
  "learning_rate": 0.0001,
25
- "loss": 2.6096,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 0.63,
30
  "learning_rate": 0.0001,
31
- "loss": 2.5792,
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 0.78,
36
  "learning_rate": 0.0001,
37
- "loss": 2.5678,
38
  "step": 2500
39
  },
40
  {
41
- "epoch": 0.94,
42
  "learning_rate": 0.0001,
43
- "loss": 2.5517,
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 1.1,
48
  "learning_rate": 0.0001,
49
- "loss": 2.533,
50
  "step": 3500
51
  },
52
  {
53
- "epoch": 1.26,
54
  "learning_rate": 0.0001,
55
- "loss": 2.5076,
56
  "step": 4000
57
  },
58
  {
59
- "epoch": 1.41,
60
  "learning_rate": 0.0001,
61
- "loss": 2.4986,
62
  "step": 4500
63
  },
64
  {
65
- "epoch": 1.57,
66
  "learning_rate": 0.0001,
67
- "loss": 2.4899,
68
  "step": 5000
69
  },
70
  {
71
- "epoch": 1.73,
72
  "learning_rate": 0.0001,
73
- "loss": 2.4855,
74
  "step": 5500
75
  },
76
  {
77
- "epoch": 1.88,
78
  "learning_rate": 0.0001,
79
- "loss": 2.4764,
80
  "step": 6000
81
  },
82
  {
83
- "epoch": 2.04,
84
  "learning_rate": 0.0001,
85
- "loss": 2.4684,
86
  "step": 6500
87
  },
88
  {
89
- "epoch": 2.2,
90
  "learning_rate": 0.0001,
91
- "loss": 2.4426,
92
  "step": 7000
93
  },
94
  {
95
- "epoch": 2.35,
96
  "learning_rate": 0.0001,
97
- "loss": 2.4474,
98
  "step": 7500
99
  },
100
  {
101
- "epoch": 2.51,
102
  "learning_rate": 0.0001,
103
- "loss": 2.4284,
104
  "step": 8000
105
  },
106
  {
107
- "epoch": 2.67,
108
  "learning_rate": 0.0001,
109
- "loss": 2.431,
110
  "step": 8500
111
  },
112
  {
113
- "epoch": 2.82,
114
  "learning_rate": 0.0001,
115
- "loss": 2.4341,
116
  "step": 9000
117
  },
118
  {
119
- "epoch": 2.98,
120
  "learning_rate": 0.0001,
121
- "loss": 2.4217,
122
  "step": 9500
123
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  {
125
  "epoch": 3.0,
126
- "step": 9561,
127
- "total_flos": 1.5154775507718144e+17,
128
- "train_loss": 2.512529566078278,
129
- "train_runtime": 20397.6049,
130
- "train_samples_per_second": 30.006,
131
- "train_steps_per_second": 0.469
132
  }
133
  ],
134
- "max_steps": 9561,
135
  "num_train_epochs": 3,
136
- "total_flos": 1.5154775507718144e+17,
137
  "trial_name": null,
138
  "trial_params": null
139
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.999958205046044,
5
+ "global_step": 13458,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.11,
12
  "learning_rate": 0.0001,
13
+ "loss": 2.2308,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.22,
18
  "learning_rate": 0.0001,
19
+ "loss": 2.1659,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 0.33,
24
  "learning_rate": 0.0001,
25
+ "loss": 2.1386,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 0.45,
30
  "learning_rate": 0.0001,
31
+ "loss": 2.1317,
32
  "step": 2000
33
  },
34
  {
35
+ "epoch": 0.56,
36
  "learning_rate": 0.0001,
37
+ "loss": 2.1235,
38
  "step": 2500
39
  },
40
  {
41
+ "epoch": 0.67,
42
  "learning_rate": 0.0001,
43
+ "loss": 2.117,
44
  "step": 3000
45
  },
46
  {
47
+ "epoch": 0.78,
48
  "learning_rate": 0.0001,
49
+ "loss": 2.1029,
50
  "step": 3500
51
  },
52
  {
53
+ "epoch": 0.89,
54
  "learning_rate": 0.0001,
55
+ "loss": 2.0994,
56
  "step": 4000
57
  },
58
  {
59
+ "epoch": 1.0,
60
  "learning_rate": 0.0001,
61
+ "loss": 2.0893,
62
  "step": 4500
63
  },
64
  {
65
+ "epoch": 1.11,
66
  "learning_rate": 0.0001,
67
+ "loss": 2.0724,
68
  "step": 5000
69
  },
70
  {
71
+ "epoch": 1.23,
72
  "learning_rate": 0.0001,
73
+ "loss": 2.0689,
74
  "step": 5500
75
  },
76
  {
77
+ "epoch": 1.34,
78
  "learning_rate": 0.0001,
79
+ "loss": 2.0622,
80
  "step": 6000
81
  },
82
  {
83
+ "epoch": 1.45,
84
  "learning_rate": 0.0001,
85
+ "loss": 2.0587,
86
  "step": 6500
87
  },
88
  {
89
+ "epoch": 1.56,
90
  "learning_rate": 0.0001,
91
+ "loss": 2.056,
92
  "step": 7000
93
  },
94
  {
95
+ "epoch": 1.67,
96
  "learning_rate": 0.0001,
97
+ "loss": 2.0505,
98
  "step": 7500
99
  },
100
  {
101
+ "epoch": 1.78,
102
  "learning_rate": 0.0001,
103
+ "loss": 2.048,
104
  "step": 8000
105
  },
106
  {
107
+ "epoch": 1.89,
108
  "learning_rate": 0.0001,
109
+ "loss": 2.0461,
110
  "step": 8500
111
  },
112
  {
113
+ "epoch": 2.01,
114
  "learning_rate": 0.0001,
115
+ "loss": 2.0387,
116
  "step": 9000
117
  },
118
  {
119
+ "epoch": 2.12,
120
  "learning_rate": 0.0001,
121
+ "loss": 2.0286,
122
  "step": 9500
123
  },
124
+ {
125
+ "epoch": 2.23,
126
+ "learning_rate": 0.0001,
127
+ "loss": 2.0268,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 2.34,
132
+ "learning_rate": 0.0001,
133
+ "loss": 2.0224,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 2.45,
138
+ "learning_rate": 0.0001,
139
+ "loss": 2.0139,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 2.56,
144
+ "learning_rate": 0.0001,
145
+ "loss": 2.0187,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 2.67,
150
+ "learning_rate": 0.0001,
151
+ "loss": 2.0135,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 2.79,
156
+ "learning_rate": 0.0001,
157
+ "loss": 2.0193,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 2.9,
162
+ "learning_rate": 0.0001,
163
+ "loss": 2.0137,
164
+ "step": 13000
165
+ },
166
  {
167
  "epoch": 3.0,
168
+ "step": 13458,
169
+ "total_flos": 2.6528400748483584e+17,
170
+ "train_loss": 2.0691928055687137,
171
+ "train_runtime": 37382.5344,
172
+ "train_samples_per_second": 23.041,
173
+ "train_steps_per_second": 0.36
174
  }
175
  ],
176
+ "max_steps": 13458,
177
  "num_train_epochs": 3,
178
+ "total_flos": 2.6528400748483584e+17,
179
  "trial_name": null,
180
  "trial_params": null
181
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47a9fe7c4b1ce3f2d32cd85e3cc17358f4d71a65e7b35501bd96d66be068290d
3
- size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6ab77cad8510acffe866439060aebb1009f8605c7c65f9df0d1a735e22f2be
3
+ size 3707