EC2 Default User commited on
Commit
ecd081b
β€’
1 Parent(s): 2ce4c1a

Add model an remove ckpt.

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
checkpoint-5526/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1397835223b6207a32dbdc99adfcd45307400821dc1f4b6c0feee00947d8b68b
3
- size 1980790149
 
 
 
 
checkpoint-5526/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:177f090776e225c6058120d9a9745d56e588d540b14c97f836a5a195fb3901a4
3
- size 14575
 
 
 
 
checkpoint-5526/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:67e332c14e7fb333fad327e11feed99660d09aaf307f258c695c8dc79bc86585
3
- size 627
 
 
 
 
checkpoint-5526/trainer_state.json DELETED
@@ -1,121 +0,0 @@
1
- {
2
- "best_metric": 1.3720556497573853,
3
- "best_model_checkpoint": "flan-t5-base-samsum/checkpoint-5526",
4
- "epoch": 3.0,
5
- "global_step": 5526,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.27,
12
- "learning_rate": 4.728555917480999e-05,
13
- "loss": 1.4803,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.54,
18
- "learning_rate": 4.457111834961998e-05,
19
- "loss": 1.4552,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.81,
24
- "learning_rate": 4.185667752442997e-05,
25
- "loss": 1.4398,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 1.0,
30
- "eval_gen_len": 17.035409035409035,
31
- "eval_loss": 1.3823134899139404,
32
- "eval_rouge1": 47.2415,
33
- "eval_rouge2": 23.7419,
34
- "eval_rougeL": 39.5142,
35
- "eval_rougeLsum": 43.4177,
36
- "eval_runtime": 74.2005,
37
- "eval_samples_per_second": 11.038,
38
- "eval_steps_per_second": 1.388,
39
- "step": 1842
40
- },
41
- {
42
- "epoch": 1.09,
43
- "learning_rate": 3.914223669923996e-05,
44
- "loss": 1.4078,
45
- "step": 2000
46
- },
47
- {
48
- "epoch": 1.36,
49
- "learning_rate": 3.642779587404995e-05,
50
- "loss": 1.3341,
51
- "step": 2500
52
- },
53
- {
54
- "epoch": 1.63,
55
- "learning_rate": 3.3713355048859935e-05,
56
- "loss": 1.337,
57
- "step": 3000
58
- },
59
- {
60
- "epoch": 1.9,
61
- "learning_rate": 3.099891422366993e-05,
62
- "loss": 1.3564,
63
- "step": 3500
64
- },
65
- {
66
- "epoch": 2.0,
67
- "eval_gen_len": 17.307692307692307,
68
- "eval_loss": 1.3747227191925049,
69
- "eval_rouge1": 46.833,
70
- "eval_rouge2": 23.308,
71
- "eval_rougeL": 39.2838,
72
- "eval_rougeLsum": 42.9821,
73
- "eval_runtime": 74.9443,
74
- "eval_samples_per_second": 10.928,
75
- "eval_steps_per_second": 1.374,
76
- "step": 3684
77
- },
78
- {
79
- "epoch": 2.17,
80
- "learning_rate": 2.8284473398479917e-05,
81
- "loss": 1.3162,
82
- "step": 4000
83
- },
84
- {
85
- "epoch": 2.44,
86
- "learning_rate": 2.5570032573289905e-05,
87
- "loss": 1.2739,
88
- "step": 4500
89
- },
90
- {
91
- "epoch": 2.71,
92
- "learning_rate": 2.2855591748099893e-05,
93
- "loss": 1.277,
94
- "step": 5000
95
- },
96
- {
97
- "epoch": 2.99,
98
- "learning_rate": 2.0141150922909884e-05,
99
- "loss": 1.2776,
100
- "step": 5500
101
- },
102
- {
103
- "epoch": 3.0,
104
- "eval_gen_len": 17.24053724053724,
105
- "eval_loss": 1.3720556497573853,
106
- "eval_rouge1": 47.5,
107
- "eval_rouge2": 23.9237,
108
- "eval_rougeL": 40.0646,
109
- "eval_rougeLsum": 43.6387,
110
- "eval_runtime": 74.4867,
111
- "eval_samples_per_second": 10.995,
112
- "eval_steps_per_second": 1.383,
113
- "step": 5526
114
- }
115
- ],
116
- "max_steps": 9210,
117
- "num_train_epochs": 5,
118
- "total_flos": 3.026353594879181e+16,
119
- "trial_name": null,
120
- "trial_params": null
121
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-5526/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9945552c747ee998aaff412e61bf9ed72af622c7f04fa9ae25709456833f3bd8
3
- size 3643
 
 
 
 
checkpoint-9210/config.json DELETED
@@ -1,61 +0,0 @@
1
- {
2
- "_name_or_path": "google/flan-t5-base",
3
- "architectures": [
4
- "T5ForConditionalGeneration"
5
- ],
6
- "d_ff": 2048,
7
- "d_kv": 64,
8
- "d_model": 768,
9
- "decoder_start_token_id": 0,
10
- "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.1,
12
- "eos_token_id": 1,
13
- "feed_forward_proj": "gated-gelu",
14
- "initializer_factor": 1.0,
15
- "is_encoder_decoder": true,
16
- "is_gated_act": true,
17
- "layer_norm_epsilon": 1e-06,
18
- "model_type": "t5",
19
- "n_positions": 512,
20
- "num_decoder_layers": 12,
21
- "num_heads": 12,
22
- "num_layers": 12,
23
- "output_past": true,
24
- "pad_token_id": 0,
25
- "relative_attention_max_distance": 128,
26
- "relative_attention_num_buckets": 32,
27
- "task_specific_params": {
28
- "summarization": {
29
- "early_stopping": true,
30
- "length_penalty": 2.0,
31
- "max_length": 200,
32
- "min_length": 30,
33
- "no_repeat_ngram_size": 3,
34
- "num_beams": 4,
35
- "prefix": "summarize: "
36
- },
37
- "translation_en_to_de": {
38
- "early_stopping": true,
39
- "max_length": 300,
40
- "num_beams": 4,
41
- "prefix": "translate English to German: "
42
- },
43
- "translation_en_to_fr": {
44
- "early_stopping": true,
45
- "max_length": 300,
46
- "num_beams": 4,
47
- "prefix": "translate English to French: "
48
- },
49
- "translation_en_to_ro": {
50
- "early_stopping": true,
51
- "max_length": 300,
52
- "num_beams": 4,
53
- "prefix": "translate English to Romanian: "
54
- }
55
- },
56
- "tie_word_embeddings": false,
57
- "torch_dtype": "float32",
58
- "transformers_version": "4.27.4",
59
- "use_cache": true,
60
- "vocab_size": 32128
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-9210/generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "decoder_start_token_id": 0,
3
- "eos_token_id": 1,
4
- "pad_token_id": 0,
5
- "transformers_version": "4.27.4"
6
- }
 
 
 
 
 
 
 
checkpoint-9210/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a43ad9e9833b49ec56f72baff113a0d1761b7d63d87f3227e0e8fb4cbbfc125
3
- size 1980790149
 
 
 
 
checkpoint-9210/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8aa1df3c66854ea36cc36214186c8782f29dccd11dccd45abe2c05a71cf1fbf
3
- size 990408885
 
 
 
 
checkpoint-9210/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:22ff991c0a65e33ec5fe1c574d60db40b3e082781cb2cd3c0d21a9f06d131188
3
- size 14575
 
 
 
 
checkpoint-9210/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81cdb2c1c20df01faadc95a4b03fdefbf2d02893abb39c4f93c6a86a42372740
3
- size 627
 
 
 
 
checkpoint-9210/trainer_state.json DELETED
@@ -1,189 +0,0 @@
1
- {
2
- "best_metric": 1.3720556497573853,
3
- "best_model_checkpoint": "flan-t5-base-samsum/checkpoint-5526",
4
- "epoch": 5.0,
5
- "global_step": 9210,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.27,
12
- "learning_rate": 4.728555917480999e-05,
13
- "loss": 1.4803,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.54,
18
- "learning_rate": 4.457111834961998e-05,
19
- "loss": 1.4552,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.81,
24
- "learning_rate": 4.185667752442997e-05,
25
- "loss": 1.4398,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 1.0,
30
- "eval_gen_len": 17.035409035409035,
31
- "eval_loss": 1.3823134899139404,
32
- "eval_rouge1": 47.2415,
33
- "eval_rouge2": 23.7419,
34
- "eval_rougeL": 39.5142,
35
- "eval_rougeLsum": 43.4177,
36
- "eval_runtime": 74.2005,
37
- "eval_samples_per_second": 11.038,
38
- "eval_steps_per_second": 1.388,
39
- "step": 1842
40
- },
41
- {
42
- "epoch": 1.09,
43
- "learning_rate": 3.914223669923996e-05,
44
- "loss": 1.4078,
45
- "step": 2000
46
- },
47
- {
48
- "epoch": 1.36,
49
- "learning_rate": 3.642779587404995e-05,
50
- "loss": 1.3341,
51
- "step": 2500
52
- },
53
- {
54
- "epoch": 1.63,
55
- "learning_rate": 3.3713355048859935e-05,
56
- "loss": 1.337,
57
- "step": 3000
58
- },
59
- {
60
- "epoch": 1.9,
61
- "learning_rate": 3.099891422366993e-05,
62
- "loss": 1.3564,
63
- "step": 3500
64
- },
65
- {
66
- "epoch": 2.0,
67
- "eval_gen_len": 17.307692307692307,
68
- "eval_loss": 1.3747227191925049,
69
- "eval_rouge1": 46.833,
70
- "eval_rouge2": 23.308,
71
- "eval_rougeL": 39.2838,
72
- "eval_rougeLsum": 42.9821,
73
- "eval_runtime": 74.9443,
74
- "eval_samples_per_second": 10.928,
75
- "eval_steps_per_second": 1.374,
76
- "step": 3684
77
- },
78
- {
79
- "epoch": 2.17,
80
- "learning_rate": 2.8284473398479917e-05,
81
- "loss": 1.3162,
82
- "step": 4000
83
- },
84
- {
85
- "epoch": 2.44,
86
- "learning_rate": 2.5570032573289905e-05,
87
- "loss": 1.2739,
88
- "step": 4500
89
- },
90
- {
91
- "epoch": 2.71,
92
- "learning_rate": 2.2855591748099893e-05,
93
- "loss": 1.277,
94
- "step": 5000
95
- },
96
- {
97
- "epoch": 2.99,
98
- "learning_rate": 2.0141150922909884e-05,
99
- "loss": 1.2776,
100
- "step": 5500
101
- },
102
- {
103
- "epoch": 3.0,
104
- "eval_gen_len": 17.24053724053724,
105
- "eval_loss": 1.3720556497573853,
106
- "eval_rouge1": 47.5,
107
- "eval_rouge2": 23.9237,
108
- "eval_rougeL": 40.0646,
109
- "eval_rougeLsum": 43.6387,
110
- "eval_runtime": 74.4867,
111
- "eval_samples_per_second": 10.995,
112
- "eval_steps_per_second": 1.383,
113
- "step": 5526
114
- },
115
- {
116
- "epoch": 3.26,
117
- "learning_rate": 1.742671009771987e-05,
118
- "loss": 1.2209,
119
- "step": 6000
120
- },
121
- {
122
- "epoch": 3.53,
123
- "learning_rate": 1.471226927252986e-05,
124
- "loss": 1.2427,
125
- "step": 6500
126
- },
127
- {
128
- "epoch": 3.8,
129
- "learning_rate": 1.1997828447339848e-05,
130
- "loss": 1.2345,
131
- "step": 7000
132
- },
133
- {
134
- "epoch": 4.0,
135
- "eval_gen_len": 17.245421245421245,
136
- "eval_loss": 1.3743723630905151,
137
- "eval_rouge1": 47.5599,
138
- "eval_rouge2": 23.9714,
139
- "eval_rougeL": 40.06,
140
- "eval_rougeLsum": 43.8107,
141
- "eval_runtime": 74.1957,
142
- "eval_samples_per_second": 11.038,
143
- "eval_steps_per_second": 1.388,
144
- "step": 7368
145
- },
146
- {
147
- "epoch": 4.07,
148
- "learning_rate": 9.283387622149838e-06,
149
- "loss": 1.2388,
150
- "step": 7500
151
- },
152
- {
153
- "epoch": 4.34,
154
- "learning_rate": 6.568946796959827e-06,
155
- "loss": 1.2124,
156
- "step": 8000
157
- },
158
- {
159
- "epoch": 4.61,
160
- "learning_rate": 3.854505971769816e-06,
161
- "loss": 1.196,
162
- "step": 8500
163
- },
164
- {
165
- "epoch": 4.89,
166
- "learning_rate": 1.1400651465798045e-06,
167
- "loss": 1.194,
168
- "step": 9000
169
- },
170
- {
171
- "epoch": 5.0,
172
- "eval_gen_len": 17.246642246642246,
173
- "eval_loss": 1.376030683517456,
174
- "eval_rouge1": 47.7868,
175
- "eval_rouge2": 24.0949,
176
- "eval_rougeL": 40.2021,
177
- "eval_rougeLsum": 43.789,
178
- "eval_runtime": 74.584,
179
- "eval_samples_per_second": 10.981,
180
- "eval_steps_per_second": 1.381,
181
- "step": 9210
182
- }
183
- ],
184
- "max_steps": 9210,
185
- "num_train_epochs": 5,
186
- "total_flos": 5.043922658131968e+16,
187
- "trial_name": null,
188
- "trial_params": null
189
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-5526/config.json β†’ config.json RENAMED
File without changes
checkpoint-5526/generation_config.json β†’ generation_config.json RENAMED
File without changes
checkpoint-5526/pytorch_model.bin β†’ pytorch_model.bin RENAMED
File without changes
checkpoint-9210/training_args.bin β†’ training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9945552c747ee998aaff412e61bf9ed72af622c7f04fa9ae25709456833f3bd8
3
- size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dd2de24479f8b2ea417955eab126e916f7c203b800af18a8eb7c78b9f902135
3
+ size 3707