oMateos2020 commited on
Commit
3887733
1 Parent(s): fcaa1fc

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "_name_or_path": "google/pegasus-newsroom",
3
- "activation_dropout": 0.1,
4
- "activation_function": "relu",
5
- "add_bias_logits": false,
6
- "add_final_layer_norm": true,
7
- "architectures": [
8
- "PegasusForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.1,
11
- "bos_token_id": 0,
12
- "classif_dropout": 0.0,
13
- "classifier_dropout": 0.0,
14
- "d_model": 1024,
15
- "decoder_attention_heads": 16,
16
- "decoder_ffn_dim": 4096,
17
- "decoder_layerdrop": 0.0,
18
- "decoder_layers": 16,
19
- "decoder_start_token_id": 0,
20
- "dropout": 0.1,
21
- "encoder_attention_heads": 16,
22
- "encoder_ffn_dim": 4096,
23
- "encoder_layerdrop": 0.0,
24
- "encoder_layers": 16,
25
- "eos_token_id": 1,
26
- "extra_pos_embeddings": 1,
27
- "forced_eos_token_id": 1,
28
- "id2label": {
29
- "0": "LABEL_0",
30
- "1": "LABEL_1",
31
- "2": "LABEL_2"
32
- },
33
- "init_std": 0.02,
34
- "is_encoder_decoder": true,
35
- "label2id": {
36
- "LABEL_0": 0,
37
- "LABEL_1": 1,
38
- "LABEL_2": 2
39
- },
40
- "length_penalty": 0.8,
41
- "max_length": 128,
42
- "max_position_embeddings": 512,
43
- "min_length": 32,
44
- "model_type": "pegasus",
45
- "normalize_before": true,
46
- "normalize_embedding": false,
47
- "num_beams": 2,
48
- "num_hidden_layers": 16,
49
- "pad_token_id": 0,
50
- "scale_embedding": true,
51
- "static_position_embeddings": true,
52
- "torch_dtype": "float32",
53
- "transformers_version": "4.20.1",
54
- "use_cache": true,
55
- "vocab_size": 96103
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:229c943157d5a5e9e8991cc8900ef56099b6776944489977abfc2030ac563ae0
3
- size 5840398
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3ee858dd84837f7c4c8c190b5f93e8054b41ccf8a21bc8e242fa28cf589305f
3
- size 2279605745
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a14935a413905e218f61bc57662a288c912cfa6d0939815b278dfb74ad02841
3
- size 14439
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d635e22380cc34a06be3e38f1e8aab1cd60caff2247be4baed053a9eeadd0935
3
- size 559
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fb8c2cfdbb7789c5d212766c1d81f546c527c967cb92c197f611d2e7cbd5f8d
3
- size 623
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,110 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<mask_1>",
4
- "<unk_2>",
5
- "<unk_3>",
6
- "<unk_4>",
7
- "<unk_5>",
8
- "<unk_6>",
9
- "<unk_7>",
10
- "<unk_8>",
11
- "<unk_9>",
12
- "<unk_10>",
13
- "<unk_11>",
14
- "<unk_12>",
15
- "<unk_13>",
16
- "<unk_14>",
17
- "<unk_15>",
18
- "<unk_16>",
19
- "<unk_17>",
20
- "<unk_18>",
21
- "<unk_19>",
22
- "<unk_20>",
23
- "<unk_21>",
24
- "<unk_22>",
25
- "<unk_23>",
26
- "<unk_24>",
27
- "<unk_25>",
28
- "<unk_26>",
29
- "<unk_27>",
30
- "<unk_28>",
31
- "<unk_29>",
32
- "<unk_30>",
33
- "<unk_31>",
34
- "<unk_32>",
35
- "<unk_33>",
36
- "<unk_34>",
37
- "<unk_35>",
38
- "<unk_36>",
39
- "<unk_37>",
40
- "<unk_38>",
41
- "<unk_39>",
42
- "<unk_40>",
43
- "<unk_41>",
44
- "<unk_42>",
45
- "<unk_43>",
46
- "<unk_44>",
47
- "<unk_45>",
48
- "<unk_46>",
49
- "<unk_47>",
50
- "<unk_48>",
51
- "<unk_49>",
52
- "<unk_50>",
53
- "<unk_51>",
54
- "<unk_52>",
55
- "<unk_53>",
56
- "<unk_54>",
57
- "<unk_55>",
58
- "<unk_56>",
59
- "<unk_57>",
60
- "<unk_58>",
61
- "<unk_59>",
62
- "<unk_60>",
63
- "<unk_61>",
64
- "<unk_62>",
65
- "<unk_63>",
66
- "<unk_64>",
67
- "<unk_65>",
68
- "<unk_66>",
69
- "<unk_67>",
70
- "<unk_68>",
71
- "<unk_69>",
72
- "<unk_70>",
73
- "<unk_71>",
74
- "<unk_72>",
75
- "<unk_73>",
76
- "<unk_74>",
77
- "<unk_75>",
78
- "<unk_76>",
79
- "<unk_77>",
80
- "<unk_78>",
81
- "<unk_79>",
82
- "<unk_80>",
83
- "<unk_81>",
84
- "<unk_82>",
85
- "<unk_83>",
86
- "<unk_84>",
87
- "<unk_85>",
88
- "<unk_86>",
89
- "<unk_87>",
90
- "<unk_88>",
91
- "<unk_89>",
92
- "<unk_90>",
93
- "<unk_91>",
94
- "<unk_92>",
95
- "<unk_93>",
96
- "<unk_94>",
97
- "<unk_95>",
98
- "<unk_96>",
99
- "<unk_97>",
100
- "<unk_98>",
101
- "<unk_99>",
102
- "<unk_100>",
103
- "<unk_101>",
104
- "<unk_102>"
105
- ],
106
- "eos_token": "</s>",
107
- "mask_token": "<mask_2>",
108
- "pad_token": "<pad>",
109
- "unk_token": "<unk>"
110
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0015189ef36359283fec8b93cf6d9ce51bca37eb1101defc68a53b394913b96c
3
- size 1912529
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,118 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<mask_1>",
4
- "<unk_2>",
5
- "<unk_3>",
6
- "<unk_4>",
7
- "<unk_5>",
8
- "<unk_6>",
9
- "<unk_7>",
10
- "<unk_8>",
11
- "<unk_9>",
12
- "<unk_10>",
13
- "<unk_11>",
14
- "<unk_12>",
15
- "<unk_13>",
16
- "<unk_14>",
17
- "<unk_15>",
18
- "<unk_16>",
19
- "<unk_17>",
20
- "<unk_18>",
21
- "<unk_19>",
22
- "<unk_20>",
23
- "<unk_21>",
24
- "<unk_22>",
25
- "<unk_23>",
26
- "<unk_24>",
27
- "<unk_25>",
28
- "<unk_26>",
29
- "<unk_27>",
30
- "<unk_28>",
31
- "<unk_29>",
32
- "<unk_30>",
33
- "<unk_31>",
34
- "<unk_32>",
35
- "<unk_33>",
36
- "<unk_34>",
37
- "<unk_35>",
38
- "<unk_36>",
39
- "<unk_37>",
40
- "<unk_38>",
41
- "<unk_39>",
42
- "<unk_40>",
43
- "<unk_41>",
44
- "<unk_42>",
45
- "<unk_43>",
46
- "<unk_44>",
47
- "<unk_45>",
48
- "<unk_46>",
49
- "<unk_47>",
50
- "<unk_48>",
51
- "<unk_49>",
52
- "<unk_50>",
53
- "<unk_51>",
54
- "<unk_52>",
55
- "<unk_53>",
56
- "<unk_54>",
57
- "<unk_55>",
58
- "<unk_56>",
59
- "<unk_57>",
60
- "<unk_58>",
61
- "<unk_59>",
62
- "<unk_60>",
63
- "<unk_61>",
64
- "<unk_62>",
65
- "<unk_63>",
66
- "<unk_64>",
67
- "<unk_65>",
68
- "<unk_66>",
69
- "<unk_67>",
70
- "<unk_68>",
71
- "<unk_69>",
72
- "<unk_70>",
73
- "<unk_71>",
74
- "<unk_72>",
75
- "<unk_73>",
76
- "<unk_74>",
77
- "<unk_75>",
78
- "<unk_76>",
79
- "<unk_77>",
80
- "<unk_78>",
81
- "<unk_79>",
82
- "<unk_80>",
83
- "<unk_81>",
84
- "<unk_82>",
85
- "<unk_83>",
86
- "<unk_84>",
87
- "<unk_85>",
88
- "<unk_86>",
89
- "<unk_87>",
90
- "<unk_88>",
91
- "<unk_89>",
92
- "<unk_90>",
93
- "<unk_91>",
94
- "<unk_92>",
95
- "<unk_93>",
96
- "<unk_94>",
97
- "<unk_95>",
98
- "<unk_96>",
99
- "<unk_97>",
100
- "<unk_98>",
101
- "<unk_99>",
102
- "<unk_100>",
103
- "<unk_101>",
104
- "<unk_102>"
105
- ],
106
- "eos_token": "</s>",
107
- "full_tokenizer_file": null,
108
- "mask_token": "<mask_2>",
109
- "mask_token_sent": "<mask_1>",
110
- "model_max_length": 512,
111
- "name_or_path": "google/pegasus-newsroom",
112
- "offset": 103,
113
- "pad_token": "<pad>",
114
- "sp_model_kwargs": {},
115
- "special_tokens_map_file": null,
116
- "tokenizer_class": "PegasusTokenizer",
117
- "unk_token": "<unk>"
118
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "best_metric": 37.4459,
3
- "best_model_checkpoint": "pegasus-newsroom-cnn_full-adafactor-bs6/checkpoint-299",
4
- "epoch": 0.09997283346916598,
5
- "global_step": 299,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.1,
12
- "learning_rate": 7.839464882943145e-05,
13
- "loss": 3.3516,
14
- "step": 299
15
- },
16
- {
17
- "epoch": 0.1,
18
- "eval_gen_len": 75.2876,
19
- "eval_loss": 3.0071940422058105,
20
- "eval_rouge1": 37.4459,
21
- "eval_rouge2": 16.5527,
22
- "eval_rougeL": 26.0178,
23
- "eval_rougeLsum": 34.4955,
24
- "eval_runtime": 1262.9542,
25
- "eval_samples_per_second": 1.979,
26
- "eval_steps_per_second": 0.33,
27
- "step": 299
28
- }
29
- ],
30
- "max_steps": 2990,
31
- "num_train_epochs": 1,
32
- "total_flos": 4.146959327271322e+16,
33
- "trial_name": null,
34
- "trial_params": null
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c380d343d655a247010cd81ba7f73d2c000362c3c07fbc4014043dafee87866f
3
- size 3567
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3ee858dd84837f7c4c8c190b5f93e8054b41ccf8a21bc8e242fa28cf589305f
3
  size 2279605745
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e47e09c2798018a81069504ae2b7ea7a01b7ba5446e6e2793ed9e916e1772c03
3
  size 2279605745
trainer_state.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 38.5992,
3
+ "best_model_checkpoint": "pegasus-newsroom-cnn_full-adafactor-bs6/checkpoint-897",
4
+ "epoch": 0.6356132321902493,
5
+ "global_step": 1901,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 7.839464882943145e-05,
13
+ "loss": 3.3516,
14
+ "step": 299
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "eval_gen_len": 75.2876,
19
+ "eval_loss": 3.0071940422058105,
20
+ "eval_rouge1": 37.4459,
21
+ "eval_rouge2": 16.5527,
22
+ "eval_rougeL": 26.0178,
23
+ "eval_rougeLsum": 34.4955,
24
+ "eval_runtime": 1262.9542,
25
+ "eval_samples_per_second": 1.979,
26
+ "eval_steps_per_second": 0.33,
27
+ "step": 299
28
+ },
29
+ {
30
+ "epoch": 0.2,
31
+ "learning_rate": 0.00015812709030100337,
32
+ "loss": 3.1391,
33
+ "step": 598
34
+ },
35
+ {
36
+ "epoch": 0.2,
37
+ "eval_gen_len": 72.434,
38
+ "eval_loss": 2.974590539932251,
39
+ "eval_rouge1": 37.9369,
40
+ "eval_rouge2": 16.9058,
41
+ "eval_rougeL": 26.566,
42
+ "eval_rougeLsum": 35.0327,
43
+ "eval_runtime": 1247.518,
44
+ "eval_samples_per_second": 2.004,
45
+ "eval_steps_per_second": 0.334,
46
+ "step": 598
47
+ },
48
+ {
49
+ "epoch": 0.3,
50
+ "learning_rate": 0.0001405351170568562,
51
+ "loss": 3.1086,
52
+ "step": 897
53
+ },
54
+ {
55
+ "epoch": 0.3,
56
+ "eval_gen_len": 66.2268,
57
+ "eval_loss": 2.9494450092315674,
58
+ "eval_rouge1": 38.5992,
59
+ "eval_rouge2": 17.5271,
60
+ "eval_rougeL": 27.3099,
61
+ "eval_rougeLsum": 35.6531,
62
+ "eval_runtime": 1158.9422,
63
+ "eval_samples_per_second": 2.157,
64
+ "eval_steps_per_second": 0.36,
65
+ "step": 897
66
+ },
67
+ {
68
+ "epoch": 0.4,
69
+ "learning_rate": 0.00012060200668896322,
70
+ "loss": 3.1037,
71
+ "step": 1196
72
+ },
73
+ {
74
+ "epoch": 0.4,
75
+ "eval_gen_len": 70.6692,
76
+ "eval_loss": 2.95255184173584,
77
+ "eval_rouge1": 38.3805,
78
+ "eval_rouge2": 17.5267,
79
+ "eval_rougeL": 27.1742,
80
+ "eval_rougeLsum": 35.5035,
81
+ "eval_runtime": 1183.4051,
82
+ "eval_samples_per_second": 2.113,
83
+ "eval_steps_per_second": 0.352,
84
+ "step": 1196
85
+ },
86
+ {
87
+ "epoch": 0.5,
88
+ "learning_rate": 0.00010100334448160536,
89
+ "loss": 3.2294,
90
+ "step": 1495
91
+ },
92
+ {
93
+ "epoch": 0.5,
94
+ "eval_gen_len": 56.5128,
95
+ "eval_loss": 3.183689832687378,
96
+ "eval_rouge1": 36.2619,
97
+ "eval_rouge2": 15.599,
98
+ "eval_rougeL": 25.8333,
99
+ "eval_rougeLsum": 33.3692,
100
+ "eval_runtime": 997.3694,
101
+ "eval_samples_per_second": 2.507,
102
+ "eval_steps_per_second": 0.418,
103
+ "step": 1495
104
+ },
105
+ {
106
+ "epoch": 0.6,
107
+ "learning_rate": 8.127090301003345e-05,
108
+ "loss": 3.695,
109
+ "step": 1794
110
+ },
111
+ {
112
+ "epoch": 0.6,
113
+ "eval_gen_len": 47.1888,
114
+ "eval_loss": 3.4275190830230713,
115
+ "eval_rouge1": 33.0969,
116
+ "eval_rouge2": 13.6577,
117
+ "eval_rougeL": 23.9839,
118
+ "eval_rougeLsum": 30.6005,
119
+ "eval_runtime": 927.662,
120
+ "eval_samples_per_second": 2.695,
121
+ "eval_steps_per_second": 0.45,
122
+ "step": 1794
123
+ }
124
+ ],
125
+ "max_steps": 2990,
126
+ "num_train_epochs": 1,
127
+ "total_flos": 2.4881742419263488e+17,
128
+ "trial_name": null,
129
+ "trial_params": null
130
+ }