chibao24 commited on
Commit
9013b5d
1 Parent(s): f6cf9ed

Upload folder using huggingface_hub

Browse files
checkpoint-5000/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "huggingface-course/mt5-finetuned-amazon-en-es",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 1024,
8
+ "d_kv": 64,
9
+ "d_model": 512,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "mt5",
20
+ "num_decoder_layers": 8,
21
+ "num_heads": 6,
22
+ "num_layers": 8,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.41.2",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
checkpoint-5000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.41.2"
6
+ }
checkpoint-5000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db18d0ebb02211516f5aa82cf32b01f36fae049bcad07b767416a6491863e396
3
+ size 1200729512
checkpoint-5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ead2d29f9d1e68de4337a111386e3dc58a93e92cb540f028d65fff2ebb8783
3
+ size 2879866
checkpoint-5000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f4d58b37e5548e4c4c86278d1b79aeccd7f51a8699348b00972091853eaa6d
3
+ size 14244
checkpoint-5000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfcf346ffa321c1893d921e3994abb0018919e4e051735a9c02d5a32adaf3ba
3
+ size 1064
checkpoint-5000/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-5000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
checkpoint-5000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee660ae764cd56ecac9dbe82d766502034efac119fc579414afcd68d4f6b922
3
+ size 16315191
checkpoint-5000/tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "clean_up_tokenization_spaces": true,
30
+ "eos_token": "</s>",
31
+ "extra_ids": 0,
32
+ "max_length": 30,
33
+ "model_max_length": 1000000000000000019884624838656,
34
+ "pad_token": "<pad>",
35
+ "sp_model_kwargs": {},
36
+ "stride": 0,
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "truncation_side": "right",
39
+ "truncation_strategy": "longest_first",
40
+ "unk_token": "<unk>"
41
+ }
checkpoint-5000/trainer_state.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
+ "eval_steps": 75,
6
+ "global_step": 5000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.6603230237960815,
14
+ "learning_rate": 0.00020833333333333335,
15
+ "loss": 2.7781,
16
+ "step": 625
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_loss": 1.839109182357788,
21
+ "eval_rouge1": 28.5024,
22
+ "eval_rouge2": 11.2717,
23
+ "eval_rougeL": 22.108,
24
+ "eval_rougeLsum": 22.4361,
25
+ "eval_runtime": 722.0927,
26
+ "eval_samples_per_second": 13.849,
27
+ "eval_steps_per_second": 0.866,
28
+ "step": 625
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 1.18350088596344,
33
+ "learning_rate": 0.0004166666666666667,
34
+ "loss": 2.0622,
35
+ "step": 1250
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_loss": 1.7575578689575195,
40
+ "eval_rouge1": 28.0245,
41
+ "eval_rouge2": 10.6112,
42
+ "eval_rougeL": 21.7353,
43
+ "eval_rougeLsum": 22.0685,
44
+ "eval_runtime": 728.2015,
45
+ "eval_samples_per_second": 13.732,
46
+ "eval_steps_per_second": 0.858,
47
+ "step": 1250
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 0.9635187983512878,
52
+ "learning_rate": 0.000625,
53
+ "loss": 1.8636,
54
+ "step": 1875
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_loss": 1.617906928062439,
59
+ "eval_rouge1": 27.353,
60
+ "eval_rouge2": 10.6238,
61
+ "eval_rougeL": 21.4686,
62
+ "eval_rougeLsum": 21.7512,
63
+ "eval_runtime": 730.8407,
64
+ "eval_samples_per_second": 13.683,
65
+ "eval_steps_per_second": 0.855,
66
+ "step": 1875
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 1.1745034456253052,
71
+ "learning_rate": 0.0008333333333333334,
72
+ "loss": 1.7408,
73
+ "step": 2500
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_loss": 1.6142878532409668,
78
+ "eval_rouge1": 28.0928,
79
+ "eval_rouge2": 11.2857,
80
+ "eval_rougeL": 22.06,
81
+ "eval_rougeLsum": 22.3629,
82
+ "eval_runtime": 802.3401,
83
+ "eval_samples_per_second": 12.464,
84
+ "eval_steps_per_second": 0.779,
85
+ "step": 2500
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 0.7837355136871338,
90
+ "learning_rate": 0.0009615384615384616,
91
+ "loss": 1.6492,
92
+ "step": 3125
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_loss": 1.5411357879638672,
97
+ "eval_rouge1": 27.8209,
98
+ "eval_rouge2": 10.9184,
99
+ "eval_rougeL": 21.6819,
100
+ "eval_rougeLsum": 21.9773,
101
+ "eval_runtime": 711.0964,
102
+ "eval_samples_per_second": 14.063,
103
+ "eval_steps_per_second": 0.879,
104
+ "step": 3125
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 0.5993546843528748,
109
+ "learning_rate": 0.0007692307692307693,
110
+ "loss": 1.5448,
111
+ "step": 3750
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_loss": 1.4802035093307495,
116
+ "eval_rouge1": 28.0433,
117
+ "eval_rouge2": 11.4232,
118
+ "eval_rougeL": 22.0696,
119
+ "eval_rougeLsum": 22.373,
120
+ "eval_runtime": 728.4308,
121
+ "eval_samples_per_second": 13.728,
122
+ "eval_steps_per_second": 0.858,
123
+ "step": 3750
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 0.7141011357307434,
128
+ "learning_rate": 0.0005769230769230769,
129
+ "loss": 1.4454,
130
+ "step": 4375
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_loss": 1.462142825126648,
135
+ "eval_rouge1": 27.8552,
136
+ "eval_rouge2": 11.1708,
137
+ "eval_rougeL": 21.8958,
138
+ "eval_rougeLsum": 22.1949,
139
+ "eval_runtime": 640.7723,
140
+ "eval_samples_per_second": 15.606,
141
+ "eval_steps_per_second": 0.975,
142
+ "step": 4375
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "grad_norm": 0.6400988101959229,
147
+ "learning_rate": 0.00038461538461538467,
148
+ "loss": 1.3636,
149
+ "step": 5000
150
+ }
151
+ ],
152
+ "logging_steps": 625,
153
+ "max_steps": 6250,
154
+ "num_input_tokens_seen": 0,
155
+ "num_train_epochs": 10,
156
+ "save_steps": 500,
157
+ "stateful_callbacks": {
158
+ "TrainerControl": {
159
+ "args": {
160
+ "should_epoch_stop": false,
161
+ "should_evaluate": false,
162
+ "should_log": false,
163
+ "should_save": true,
164
+ "should_training_stop": false
165
+ },
166
+ "attributes": {}
167
+ }
168
+ },
169
+ "total_flos": 4.22999752704e+16,
170
+ "train_batch_size": 16,
171
+ "trial_name": null,
172
+ "trial_params": null
173
+ }
checkpoint-5000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5223adea68bac22d3e503304376f99d2113ed58ad54d362e129fa184482b15
3
+ size 5240
checkpoint-5500/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "huggingface-course/mt5-finetuned-amazon-en-es",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 1024,
8
+ "d_kv": 64,
9
+ "d_model": 512,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "mt5",
20
+ "num_decoder_layers": 8,
21
+ "num_heads": 6,
22
+ "num_layers": 8,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.41.2",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
checkpoint-5500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.41.2"
6
+ }
checkpoint-5500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7544a75019487033c3697fe99b3d493fc268f49f4f76ff1509cb98a95895c400
3
+ size 1200729512
checkpoint-5500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92780dc81daec4c9e2c8e024f806a36df6e3d35eec087590f361167876766d95
3
+ size 2879866
checkpoint-5500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eacade89a99076612b2685c06f1897e40328f5d9e642936aed8178e83d63949b
3
+ size 14244
checkpoint-5500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e85770e7a5d92bd875c851b84e34b820f78c9d6ff7a152f8bd524e42bd0243
3
+ size 1064
checkpoint-5500/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-5500/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
checkpoint-5500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee660ae764cd56ecac9dbe82d766502034efac119fc579414afcd68d4f6b922
3
+ size 16315191
checkpoint-5500/tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "clean_up_tokenization_spaces": true,
30
+ "eos_token": "</s>",
31
+ "extra_ids": 0,
32
+ "max_length": 30,
33
+ "model_max_length": 1000000000000000019884624838656,
34
+ "pad_token": "<pad>",
35
+ "sp_model_kwargs": {},
36
+ "stride": 0,
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "truncation_side": "right",
39
+ "truncation_strategy": "longest_first",
40
+ "unk_token": "<unk>"
41
+ }
checkpoint-5500/trainer_state.json ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 8.8,
5
+ "eval_steps": 75,
6
+ "global_step": 5500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.6603230237960815,
14
+ "learning_rate": 0.00020833333333333335,
15
+ "loss": 2.7781,
16
+ "step": 625
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_loss": 1.839109182357788,
21
+ "eval_rouge1": 28.5024,
22
+ "eval_rouge2": 11.2717,
23
+ "eval_rougeL": 22.108,
24
+ "eval_rougeLsum": 22.4361,
25
+ "eval_runtime": 722.0927,
26
+ "eval_samples_per_second": 13.849,
27
+ "eval_steps_per_second": 0.866,
28
+ "step": 625
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 1.18350088596344,
33
+ "learning_rate": 0.0004166666666666667,
34
+ "loss": 2.0622,
35
+ "step": 1250
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_loss": 1.7575578689575195,
40
+ "eval_rouge1": 28.0245,
41
+ "eval_rouge2": 10.6112,
42
+ "eval_rougeL": 21.7353,
43
+ "eval_rougeLsum": 22.0685,
44
+ "eval_runtime": 728.2015,
45
+ "eval_samples_per_second": 13.732,
46
+ "eval_steps_per_second": 0.858,
47
+ "step": 1250
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 0.9635187983512878,
52
+ "learning_rate": 0.000625,
53
+ "loss": 1.8636,
54
+ "step": 1875
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_loss": 1.617906928062439,
59
+ "eval_rouge1": 27.353,
60
+ "eval_rouge2": 10.6238,
61
+ "eval_rougeL": 21.4686,
62
+ "eval_rougeLsum": 21.7512,
63
+ "eval_runtime": 730.8407,
64
+ "eval_samples_per_second": 13.683,
65
+ "eval_steps_per_second": 0.855,
66
+ "step": 1875
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 1.1745034456253052,
71
+ "learning_rate": 0.0008333333333333334,
72
+ "loss": 1.7408,
73
+ "step": 2500
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_loss": 1.6142878532409668,
78
+ "eval_rouge1": 28.0928,
79
+ "eval_rouge2": 11.2857,
80
+ "eval_rougeL": 22.06,
81
+ "eval_rougeLsum": 22.3629,
82
+ "eval_runtime": 802.3401,
83
+ "eval_samples_per_second": 12.464,
84
+ "eval_steps_per_second": 0.779,
85
+ "step": 2500
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 0.7837355136871338,
90
+ "learning_rate": 0.0009615384615384616,
91
+ "loss": 1.6492,
92
+ "step": 3125
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_loss": 1.5411357879638672,
97
+ "eval_rouge1": 27.8209,
98
+ "eval_rouge2": 10.9184,
99
+ "eval_rougeL": 21.6819,
100
+ "eval_rougeLsum": 21.9773,
101
+ "eval_runtime": 711.0964,
102
+ "eval_samples_per_second": 14.063,
103
+ "eval_steps_per_second": 0.879,
104
+ "step": 3125
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 0.5993546843528748,
109
+ "learning_rate": 0.0007692307692307693,
110
+ "loss": 1.5448,
111
+ "step": 3750
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_loss": 1.4802035093307495,
116
+ "eval_rouge1": 28.0433,
117
+ "eval_rouge2": 11.4232,
118
+ "eval_rougeL": 22.0696,
119
+ "eval_rougeLsum": 22.373,
120
+ "eval_runtime": 728.4308,
121
+ "eval_samples_per_second": 13.728,
122
+ "eval_steps_per_second": 0.858,
123
+ "step": 3750
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 0.7141011357307434,
128
+ "learning_rate": 0.0005769230769230769,
129
+ "loss": 1.4454,
130
+ "step": 4375
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_loss": 1.462142825126648,
135
+ "eval_rouge1": 27.8552,
136
+ "eval_rouge2": 11.1708,
137
+ "eval_rougeL": 21.8958,
138
+ "eval_rougeLsum": 22.1949,
139
+ "eval_runtime": 640.7723,
140
+ "eval_samples_per_second": 15.606,
141
+ "eval_steps_per_second": 0.975,
142
+ "step": 4375
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "grad_norm": 0.6400988101959229,
147
+ "learning_rate": 0.00038461538461538467,
148
+ "loss": 1.3636,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "eval_loss": 1.4522408246994019,
154
+ "eval_rouge1": 28.3264,
155
+ "eval_rouge2": 11.7945,
156
+ "eval_rougeL": 22.3563,
157
+ "eval_rougeLsum": 22.6524,
158
+ "eval_runtime": 715.9922,
159
+ "eval_samples_per_second": 13.967,
160
+ "eval_steps_per_second": 0.873,
161
+ "step": 5000
162
+ }
163
+ ],
164
+ "logging_steps": 625,
165
+ "max_steps": 6250,
166
+ "num_input_tokens_seen": 0,
167
+ "num_train_epochs": 10,
168
+ "save_steps": 500,
169
+ "stateful_callbacks": {
170
+ "TrainerControl": {
171
+ "args": {
172
+ "should_epoch_stop": false,
173
+ "should_evaluate": false,
174
+ "should_log": false,
175
+ "should_save": true,
176
+ "should_training_stop": false
177
+ },
178
+ "attributes": {}
179
+ }
180
+ },
181
+ "total_flos": 4.652997279744e+16,
182
+ "train_batch_size": 16,
183
+ "trial_name": null,
184
+ "trial_params": null
185
+ }
checkpoint-5500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5223adea68bac22d3e503304376f99d2113ed58ad54d362e129fa184482b15
3
+ size 5240
checkpoint-6000/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "huggingface-course/mt5-finetuned-amazon-en-es",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 1024,
8
+ "d_kv": 64,
9
+ "d_model": 512,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "mt5",
20
+ "num_decoder_layers": 8,
21
+ "num_heads": 6,
22
+ "num_layers": 8,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.41.2",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
checkpoint-6000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.41.2"
6
+ }
checkpoint-6000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f623f4a01b722905510276a1a06b7b68cd255827e342e4c3ca4ddee56ed498
3
+ size 1200729512
checkpoint-6000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a089a3a25c69d2de6113cea9c64a35519129aaf471e2172dfc6e07e6723a273b
3
+ size 2879866
checkpoint-6000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656e79bc514cbc36d830d5dd590d52aac19362cdb25a3184fb8a1d03468f9a60
3
+ size 14244
checkpoint-6000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:608dba54e3cf2298f5418707e0dd61313fe32d937145e62ea0bb8a1f2fd326d8
3
+ size 1064
checkpoint-6000/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-6000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
checkpoint-6000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee660ae764cd56ecac9dbe82d766502034efac119fc579414afcd68d4f6b922
3
+ size 16315191
checkpoint-6000/tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "clean_up_tokenization_spaces": true,
30
+ "eos_token": "</s>",
31
+ "extra_ids": 0,
32
+ "max_length": 30,
33
+ "model_max_length": 1000000000000000019884624838656,
34
+ "pad_token": "<pad>",
35
+ "sp_model_kwargs": {},
36
+ "stride": 0,
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "truncation_side": "right",
39
+ "truncation_strategy": "longest_first",
40
+ "unk_token": "<unk>"
41
+ }
checkpoint-6000/trainer_state.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.6,
5
+ "eval_steps": 75,
6
+ "global_step": 6000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.6603230237960815,
14
+ "learning_rate": 0.00020833333333333335,
15
+ "loss": 2.7781,
16
+ "step": 625
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_loss": 1.839109182357788,
21
+ "eval_rouge1": 28.5024,
22
+ "eval_rouge2": 11.2717,
23
+ "eval_rougeL": 22.108,
24
+ "eval_rougeLsum": 22.4361,
25
+ "eval_runtime": 722.0927,
26
+ "eval_samples_per_second": 13.849,
27
+ "eval_steps_per_second": 0.866,
28
+ "step": 625
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 1.18350088596344,
33
+ "learning_rate": 0.0004166666666666667,
34
+ "loss": 2.0622,
35
+ "step": 1250
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_loss": 1.7575578689575195,
40
+ "eval_rouge1": 28.0245,
41
+ "eval_rouge2": 10.6112,
42
+ "eval_rougeL": 21.7353,
43
+ "eval_rougeLsum": 22.0685,
44
+ "eval_runtime": 728.2015,
45
+ "eval_samples_per_second": 13.732,
46
+ "eval_steps_per_second": 0.858,
47
+ "step": 1250
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 0.9635187983512878,
52
+ "learning_rate": 0.000625,
53
+ "loss": 1.8636,
54
+ "step": 1875
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_loss": 1.617906928062439,
59
+ "eval_rouge1": 27.353,
60
+ "eval_rouge2": 10.6238,
61
+ "eval_rougeL": 21.4686,
62
+ "eval_rougeLsum": 21.7512,
63
+ "eval_runtime": 730.8407,
64
+ "eval_samples_per_second": 13.683,
65
+ "eval_steps_per_second": 0.855,
66
+ "step": 1875
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 1.1745034456253052,
71
+ "learning_rate": 0.0008333333333333334,
72
+ "loss": 1.7408,
73
+ "step": 2500
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_loss": 1.6142878532409668,
78
+ "eval_rouge1": 28.0928,
79
+ "eval_rouge2": 11.2857,
80
+ "eval_rougeL": 22.06,
81
+ "eval_rougeLsum": 22.3629,
82
+ "eval_runtime": 802.3401,
83
+ "eval_samples_per_second": 12.464,
84
+ "eval_steps_per_second": 0.779,
85
+ "step": 2500
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 0.7837355136871338,
90
+ "learning_rate": 0.0009615384615384616,
91
+ "loss": 1.6492,
92
+ "step": 3125
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_loss": 1.5411357879638672,
97
+ "eval_rouge1": 27.8209,
98
+ "eval_rouge2": 10.9184,
99
+ "eval_rougeL": 21.6819,
100
+ "eval_rougeLsum": 21.9773,
101
+ "eval_runtime": 711.0964,
102
+ "eval_samples_per_second": 14.063,
103
+ "eval_steps_per_second": 0.879,
104
+ "step": 3125
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 0.5993546843528748,
109
+ "learning_rate": 0.0007692307692307693,
110
+ "loss": 1.5448,
111
+ "step": 3750
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_loss": 1.4802035093307495,
116
+ "eval_rouge1": 28.0433,
117
+ "eval_rouge2": 11.4232,
118
+ "eval_rougeL": 22.0696,
119
+ "eval_rougeLsum": 22.373,
120
+ "eval_runtime": 728.4308,
121
+ "eval_samples_per_second": 13.728,
122
+ "eval_steps_per_second": 0.858,
123
+ "step": 3750
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 0.7141011357307434,
128
+ "learning_rate": 0.0005769230769230769,
129
+ "loss": 1.4454,
130
+ "step": 4375
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_loss": 1.462142825126648,
135
+ "eval_rouge1": 27.8552,
136
+ "eval_rouge2": 11.1708,
137
+ "eval_rougeL": 21.8958,
138
+ "eval_rougeLsum": 22.1949,
139
+ "eval_runtime": 640.7723,
140
+ "eval_samples_per_second": 15.606,
141
+ "eval_steps_per_second": 0.975,
142
+ "step": 4375
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "grad_norm": 0.6400988101959229,
147
+ "learning_rate": 0.00038461538461538467,
148
+ "loss": 1.3636,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "eval_loss": 1.4522408246994019,
154
+ "eval_rouge1": 28.3264,
155
+ "eval_rouge2": 11.7945,
156
+ "eval_rougeL": 22.3563,
157
+ "eval_rougeLsum": 22.6524,
158
+ "eval_runtime": 715.9922,
159
+ "eval_samples_per_second": 13.967,
160
+ "eval_steps_per_second": 0.873,
161
+ "step": 5000
162
+ },
163
+ {
164
+ "epoch": 9.0,
165
+ "grad_norm": 0.6958301067352295,
166
+ "learning_rate": 0.00019230769230769233,
167
+ "loss": 1.2978,
168
+ "step": 5625
169
+ },
170
+ {
171
+ "epoch": 9.0,
172
+ "eval_loss": 1.4346853494644165,
173
+ "eval_rouge1": 28.444,
174
+ "eval_rouge2": 11.9388,
175
+ "eval_rougeL": 22.4279,
176
+ "eval_rougeLsum": 22.7344,
177
+ "eval_runtime": 652.829,
178
+ "eval_samples_per_second": 15.318,
179
+ "eval_steps_per_second": 0.957,
180
+ "step": 5625
181
+ }
182
+ ],
183
+ "logging_steps": 625,
184
+ "max_steps": 6250,
185
+ "num_input_tokens_seen": 0,
186
+ "num_train_epochs": 10,
187
+ "save_steps": 500,
188
+ "stateful_callbacks": {
189
+ "TrainerControl": {
190
+ "args": {
191
+ "should_epoch_stop": false,
192
+ "should_evaluate": false,
193
+ "should_log": false,
194
+ "should_save": true,
195
+ "should_training_stop": false
196
+ },
197
+ "attributes": {}
198
+ }
199
+ },
200
+ "total_flos": 5.075997032448e+16,
201
+ "train_batch_size": 16,
202
+ "trial_name": null,
203
+ "trial_params": null
204
+ }
checkpoint-6000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5223adea68bac22d3e503304376f99d2113ed58ad54d362e129fa184482b15
3
+ size 5240
generation_config.json CHANGED
@@ -2,5 +2,9 @@
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
- "transformers_version": "4.41.2"
 
 
 
 
6
  }
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.41.2",
6
+ "max_length": 256,
7
+ "num_beams": 5,
8
+ "no_repeat_ngram_size": 2,
9
+ "early_stopping": 1
10
  }