nbtpj commited on
Commit
ed6e800
1 Parent(s): 105ca5e

Training in progress, step 2500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/bart-base",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "EncoderDecoderForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.1,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 6,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "early_stopping": true,
22
+ "encoder_attention_heads": 12,
23
+ "encoder_ffn_dim": 3072,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
+ "eos_token_id": 2,
27
+ "forced_bos_token_id": 0,
28
+ "forced_eos_token_id": 2,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "max_position_embeddings": 1024,
43
+ "model_type": "bart",
44
+ "no_repeat_ngram_size": 3,
45
+ "normalize_before": false,
46
+ "normalize_embedding": true,
47
+ "num_beams": 4,
48
+ "num_hidden_layers": 6,
49
+ "pad_token_id": 1,
50
+ "scale_embedding": false,
51
+ "task_specific_params": {
52
+ "summarization": {
53
+ "length_penalty": 1.0,
54
+ "max_length": 128,
55
+ "min_length": 12,
56
+ "num_beams": 4
57
+ },
58
+ "summarization_cnn": {
59
+ "length_penalty": 2.0,
60
+ "max_length": 142,
61
+ "min_length": 56,
62
+ "num_beams": 4
63
+ },
64
+ "summarization_xsum": {
65
+ "length_penalty": 1.0,
66
+ "max_length": 62,
67
+ "min_length": 11,
68
+ "num_beams": 6
69
+ }
70
+ },
71
+ "torch_dtype": "float32",
72
+ "transformers_version": "4.20.1",
73
+ "use_cache": true,
74
+ "vocab_size": 50265
75
+ }
last-checkpoint/config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/bart-base",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "EncoderDecoderForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.1,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 6,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "early_stopping": true,
22
+ "encoder_attention_heads": 12,
23
+ "encoder_ffn_dim": 3072,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
+ "eos_token_id": 2,
27
+ "forced_bos_token_id": 0,
28
+ "forced_eos_token_id": 2,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "max_position_embeddings": 1024,
43
+ "model_type": "bart",
44
+ "no_repeat_ngram_size": 3,
45
+ "normalize_before": false,
46
+ "normalize_embedding": true,
47
+ "num_beams": 4,
48
+ "num_hidden_layers": 6,
49
+ "pad_token_id": 1,
50
+ "scale_embedding": false,
51
+ "task_specific_params": {
52
+ "summarization": {
53
+ "length_penalty": 1.0,
54
+ "max_length": 128,
55
+ "min_length": 12,
56
+ "num_beams": 4
57
+ },
58
+ "summarization_cnn": {
59
+ "length_penalty": 2.0,
60
+ "max_length": 142,
61
+ "min_length": 56,
62
+ "num_beams": 4
63
+ },
64
+ "summarization_xsum": {
65
+ "length_penalty": 1.0,
66
+ "max_length": 62,
67
+ "min_length": 11,
68
+ "num_beams": 6
69
+ }
70
+ },
71
+ "torch_dtype": "float32",
72
+ "transformers_version": "4.20.1",
73
+ "use_cache": true,
74
+ "vocab_size": 50265
75
+ }
last-checkpoint/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e70a5b8d2b6d8bb2b71cbf79cabc3993446edecaa7b23b0dea42478b41078d9
3
+ size 1115513717
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fde73781876577ba51e23daaaa70298a6d5d24f8539c4cbbc62cc8be970fbbf
3
+ size 557969145
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d97bff542b3a181a6f90fac3fb93bd368232f8c16c24b45de7dd660874b059f
3
+ size 15459
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17bf1eb09751eea54d404e38767abbd71f94a7738ff7edea23400551f49f406c
3
+ size 623
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
last-checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 1024,
9
+ "name_or_path": "facebook/bart-base",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "BartTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.14746652509880256,
5
+ "global_step": 2500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 4.96526184702104e-05,
13
+ "loss": 1.1303,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "learning_rate": 4.932490004588058e-05,
19
+ "loss": 0.8589,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.02,
24
+ "learning_rate": 4.8997181621550765e-05,
25
+ "loss": 0.9136,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.02,
30
+ "learning_rate": 4.866946319722095e-05,
31
+ "loss": 0.8209,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.03,
36
+ "learning_rate": 4.8341744772891136e-05,
37
+ "loss": 0.8212,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.04,
42
+ "learning_rate": 4.801402634856132e-05,
43
+ "loss": 0.7584,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 0.04,
48
+ "learning_rate": 4.768630792423151e-05,
49
+ "loss": 0.8386,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 0.05,
54
+ "learning_rate": 4.735858949990168e-05,
55
+ "loss": 0.7838,
56
+ "step": 800
57
+ },
58
+ {
59
+ "epoch": 0.05,
60
+ "learning_rate": 4.703087107557187e-05,
61
+ "loss": 0.7109,
62
+ "step": 900
63
+ },
64
+ {
65
+ "epoch": 0.06,
66
+ "learning_rate": 4.670315265124205e-05,
67
+ "loss": 0.8817,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 0.06,
72
+ "learning_rate": 4.637543422691224e-05,
73
+ "loss": 0.7,
74
+ "step": 1100
75
+ },
76
+ {
77
+ "epoch": 0.07,
78
+ "learning_rate": 4.6047715802582424e-05,
79
+ "loss": 0.7984,
80
+ "step": 1200
81
+ },
82
+ {
83
+ "epoch": 0.08,
84
+ "learning_rate": 4.571999737825261e-05,
85
+ "loss": 0.7983,
86
+ "step": 1300
87
+ },
88
+ {
89
+ "epoch": 0.08,
90
+ "learning_rate": 4.539227895392279e-05,
91
+ "loss": 0.8209,
92
+ "step": 1400
93
+ },
94
+ {
95
+ "epoch": 0.09,
96
+ "learning_rate": 4.506456052959298e-05,
97
+ "loss": 0.7698,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 0.09,
102
+ "learning_rate": 4.473684210526316e-05,
103
+ "loss": 0.6812,
104
+ "step": 1600
105
+ },
106
+ {
107
+ "epoch": 0.1,
108
+ "learning_rate": 4.440912368093335e-05,
109
+ "loss": 0.7507,
110
+ "step": 1700
111
+ },
112
+ {
113
+ "epoch": 0.11,
114
+ "learning_rate": 4.408140525660353e-05,
115
+ "loss": 0.7081,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 0.11,
120
+ "learning_rate": 4.375368683227371e-05,
121
+ "loss": 0.809,
122
+ "step": 1900
123
+ },
124
+ {
125
+ "epoch": 0.12,
126
+ "learning_rate": 4.3425968407943895e-05,
127
+ "loss": 0.7504,
128
+ "step": 2000
129
+ },
130
+ {
131
+ "epoch": 0.12,
132
+ "learning_rate": 4.3098249983614084e-05,
133
+ "loss": 0.764,
134
+ "step": 2100
135
+ },
136
+ {
137
+ "epoch": 0.13,
138
+ "learning_rate": 4.2770531559284266e-05,
139
+ "loss": 0.7374,
140
+ "step": 2200
141
+ },
142
+ {
143
+ "epoch": 0.14,
144
+ "learning_rate": 4.244281313495445e-05,
145
+ "loss": 0.786,
146
+ "step": 2300
147
+ },
148
+ {
149
+ "epoch": 0.14,
150
+ "learning_rate": 4.211509471062463e-05,
151
+ "loss": 0.8193,
152
+ "step": 2400
153
+ },
154
+ {
155
+ "epoch": 0.15,
156
+ "learning_rate": 4.178737628629482e-05,
157
+ "loss": 0.6467,
158
+ "step": 2500
159
+ }
160
+ ],
161
+ "max_steps": 30516,
162
+ "num_train_epochs": 2,
163
+ "total_flos": 1.527980033728512e+16,
164
+ "trial_name": null,
165
+ "trial_params": null
166
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d8747e43721b80b6e9bd368539717424fbb7049d6db13269fc09fffe501769
3
+ size 3311
last-checkpoint/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fde73781876577ba51e23daaaa70298a6d5d24f8539c4cbbc62cc8be970fbbf
3
+ size 557969145
runs/Dec05_03-30-37_fbdce2302f52/1670211053.077461/events.out.tfevents.1670211053.fbdce2302f52.24.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17196ec21a82f76e217c7ffce0ac112d7a1377e731f7e4e715396e0958931099
3
+ size 5371
runs/Dec05_03-30-37_fbdce2302f52/1670211070.5940251/events.out.tfevents.1670211070.fbdce2302f52.24.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eeeeee803174737b1d3754f00658c4cff8ee63fea0060026cd174d92f1b22d4
3
+ size 5371
runs/Dec05_03-30-37_fbdce2302f52/events.out.tfevents.1670211053.fbdce2302f52.24.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26ca8396fc1d5ed47cc18d718589ef3682bd76d405c05f1616f84f6bcb3e1620
3
+ size 13328
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 1024,
9
+ "name_or_path": "facebook/bart-base",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "BartTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d8747e43721b80b6e9bd368539717424fbb7049d6db13269fc09fffe501769
3
+ size 3311
vocab.json ADDED
The diff for this file is too large to render. See raw diff