eglkan1 commited on
Commit
9a8cacd
1 Parent(s): 61d5e08

End of training

Browse files
README.md ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/mt5-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - rouge
8
+ model-index:
9
+ - name: mt5-translated-lithuanian-simplifier
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # mt5-translated-lithuanian-simplifier
17
+
18
+ This model is a fine-tuned version of [google/mt5-base](https://huggingface.co/google/mt5-base) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.0761
21
+ - Rouge1: 0.7877
22
+ - Rouge2: 0.6566
23
+ - Rougel: 0.7845
24
+ - Gen Len: 49.2293
25
+
26
+ ## Model description
27
+
28
+ More information needed
29
+
30
+ ## Intended uses & limitations
31
+
32
+ More information needed
33
+
34
+ ## Training and evaluation data
35
+
36
+ More information needed
37
+
38
+ ## Training procedure
39
+
40
+ ### Training hyperparameters
41
+
42
+ The following hyperparameters were used during training:
43
+ - learning_rate: 0.0001
44
+ - train_batch_size: 4
45
+ - eval_batch_size: 4
46
+ - seed: 42
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: linear
49
+ - lr_scheduler_warmup_steps: 500
50
+ - num_epochs: 8
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Gen Len |
55
+ |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|:------:|:-------:|
56
+ | 23.9322 | 0.1 | 200 | 19.1649 | 0.016 | 0.0004 | 0.0146 | 512.0 |
57
+ | 2.5416 | 0.19 | 400 | 1.4406 | 0.035 | 0.0002 | 0.0345 | 51.3394 |
58
+ | 0.7449 | 0.29 | 600 | 0.7221 | 0.0021 | 0.0 | 0.0021 | 50.2293 |
59
+ | 0.4405 | 0.38 | 800 | 0.2164 | 0.5491 | 0.3593 | 0.5367 | 49.4955 |
60
+ | 0.177 | 0.48 | 1000 | 0.1672 | 0.6294 | 0.4636 | 0.6209 | 49.2293 |
61
+ | 0.1838 | 0.57 | 1200 | 0.1561 | 0.6214 | 0.4375 | 0.613 | 49.2293 |
62
+ | 0.1471 | 0.67 | 1400 | 0.1295 | 0.7071 | 0.5673 | 0.6998 | 49.2293 |
63
+ | 0.1622 | 0.77 | 1600 | 0.1229 | 0.6929 | 0.5402 | 0.6858 | 49.2293 |
64
+ | 0.1255 | 0.86 | 1800 | 0.1192 | 0.7044 | 0.5547 | 0.6978 | 49.2293 |
65
+ | 0.1281 | 0.96 | 2000 | 0.1150 | 0.7169 | 0.5718 | 0.7103 | 49.2293 |
66
+ | 0.1561 | 1.05 | 2200 | 0.1088 | 0.7165 | 0.5688 | 0.7108 | 49.2293 |
67
+ | 0.145 | 1.15 | 2400 | 0.1064 | 0.7321 | 0.5921 | 0.7263 | 49.2293 |
68
+ | 0.1207 | 1.25 | 2600 | 0.1030 | 0.7348 | 0.5957 | 0.7291 | 49.2293 |
69
+ | 0.1151 | 1.34 | 2800 | 0.1014 | 0.7289 | 0.5859 | 0.7239 | 49.2293 |
70
+ | 0.1001 | 1.44 | 3000 | 0.0983 | 0.7402 | 0.6003 | 0.7349 | 49.2293 |
71
+ | 0.1354 | 1.53 | 3200 | 0.0963 | 0.738 | 0.598 | 0.7332 | 49.2293 |
72
+ | 0.1092 | 1.63 | 3400 | 0.0978 | 0.7446 | 0.607 | 0.7394 | 49.2293 |
73
+ | 0.1109 | 1.72 | 3600 | 0.0973 | 0.7427 | 0.6034 | 0.7377 | 49.2293 |
74
+ | 0.1083 | 1.82 | 3800 | 0.0950 | 0.7479 | 0.6094 | 0.7432 | 49.2293 |
75
+ | 0.1348 | 1.92 | 4000 | 0.0958 | 0.7498 | 0.6121 | 0.745 | 49.2293 |
76
+ | 0.1004 | 2.01 | 4200 | 0.0898 | 0.7539 | 0.6152 | 0.7494 | 49.2293 |
77
+ | 0.1131 | 2.11 | 4400 | 0.0925 | 0.753 | 0.6154 | 0.7488 | 49.2293 |
78
+ | 0.1312 | 2.2 | 4600 | 0.0919 | 0.755 | 0.6183 | 0.7508 | 49.2293 |
79
+ | 0.1139 | 2.3 | 4800 | 0.0908 | 0.756 | 0.6182 | 0.7518 | 49.2293 |
80
+ | 0.1168 | 2.39 | 5000 | 0.0880 | 0.7574 | 0.6202 | 0.7533 | 49.2293 |
81
+ | 0.0793 | 2.49 | 5200 | 0.0897 | 0.7575 | 0.6193 | 0.7531 | 49.2293 |
82
+ | 0.0869 | 2.59 | 5400 | 0.0866 | 0.7605 | 0.6228 | 0.7564 | 49.2293 |
83
+ | 0.1053 | 2.68 | 5600 | 0.0870 | 0.7594 | 0.6203 | 0.7551 | 49.2293 |
84
+ | 0.0889 | 2.78 | 5800 | 0.0893 | 0.7609 | 0.6237 | 0.7568 | 49.2293 |
85
+ | 0.0982 | 2.87 | 6000 | 0.0873 | 0.7637 | 0.6279 | 0.7599 | 49.2293 |
86
+ | 0.0838 | 2.97 | 6200 | 0.0846 | 0.7665 | 0.6309 | 0.7626 | 49.2293 |
87
+ | 0.0829 | 3.07 | 6400 | 0.0844 | 0.7665 | 0.6315 | 0.7629 | 49.2293 |
88
+ | 0.068 | 3.16 | 6600 | 0.0836 | 0.7695 | 0.6358 | 0.7658 | 49.2293 |
89
+ | 0.0747 | 3.26 | 6800 | 0.0848 | 0.7675 | 0.6322 | 0.7639 | 49.2293 |
90
+ | 0.0792 | 3.35 | 7000 | 0.0840 | 0.7691 | 0.6342 | 0.7656 | 49.2293 |
91
+ | 0.0739 | 3.45 | 7200 | 0.0820 | 0.7713 | 0.6365 | 0.7676 | 49.2293 |
92
+ | 0.0793 | 3.54 | 7400 | 0.0813 | 0.7723 | 0.6374 | 0.7685 | 49.2293 |
93
+ | 0.0908 | 3.64 | 7600 | 0.0819 | 0.7731 | 0.6388 | 0.7696 | 49.2293 |
94
+ | 0.1125 | 3.74 | 7800 | 0.0811 | 0.774 | 0.6402 | 0.7705 | 49.2293 |
95
+ | 0.1231 | 3.83 | 8000 | 0.0805 | 0.7736 | 0.6391 | 0.7699 | 49.2293 |
96
+ | 0.0805 | 3.93 | 8200 | 0.0806 | 0.7736 | 0.6383 | 0.7698 | 49.2293 |
97
+ | 0.0798 | 4.02 | 8400 | 0.0806 | 0.7758 | 0.6413 | 0.7726 | 49.2293 |
98
+ | 0.061 | 4.12 | 8600 | 0.0807 | 0.7738 | 0.6391 | 0.7705 | 49.2293 |
99
+ | 0.0636 | 4.21 | 8800 | 0.0810 | 0.7763 | 0.6424 | 0.7731 | 49.2293 |
100
+ | 0.0813 | 4.31 | 9000 | 0.0798 | 0.7765 | 0.6418 | 0.7731 | 49.2293 |
101
+ | 0.0664 | 4.41 | 9200 | 0.0804 | 0.7779 | 0.6441 | 0.7744 | 49.2293 |
102
+ | 0.077 | 4.5 | 9400 | 0.0783 | 0.7775 | 0.6432 | 0.774 | 49.2293 |
103
+ | 0.0769 | 4.6 | 9600 | 0.0788 | 0.7786 | 0.6446 | 0.7752 | 49.2293 |
104
+ | 0.0874 | 4.69 | 9800 | 0.0796 | 0.7782 | 0.6455 | 0.7749 | 49.2293 |
105
+ | 0.0682 | 4.79 | 10000 | 0.0784 | 0.7783 | 0.6452 | 0.7752 | 49.2293 |
106
+ | 0.0649 | 4.89 | 10200 | 0.0781 | 0.7788 | 0.6453 | 0.7757 | 49.2293 |
107
+ | 0.0594 | 4.98 | 10400 | 0.0791 | 0.7795 | 0.6468 | 0.7762 | 49.2293 |
108
+ | 0.1001 | 5.08 | 10600 | 0.0775 | 0.7794 | 0.6464 | 0.7762 | 49.2293 |
109
+ | 0.065 | 5.17 | 10800 | 0.0794 | 0.7794 | 0.6474 | 0.7762 | 49.2293 |
110
+ | 0.0505 | 5.27 | 11000 | 0.0787 | 0.7809 | 0.6481 | 0.7775 | 49.2293 |
111
+ | 0.0904 | 5.36 | 11200 | 0.0772 | 0.7825 | 0.6504 | 0.7793 | 49.2293 |
112
+ | 0.0782 | 5.46 | 11400 | 0.0777 | 0.7835 | 0.651 | 0.7803 | 49.2293 |
113
+ | 0.0758 | 5.56 | 11600 | 0.0774 | 0.7823 | 0.6505 | 0.7792 | 49.2293 |
114
+ | 0.0685 | 5.65 | 11800 | 0.0778 | 0.7819 | 0.6498 | 0.7787 | 49.2293 |
115
+ | 0.0664 | 5.75 | 12000 | 0.0774 | 0.7818 | 0.6493 | 0.7786 | 49.2293 |
116
+ | 0.0841 | 5.84 | 12200 | 0.0770 | 0.7848 | 0.6527 | 0.7813 | 49.2293 |
117
+ | 0.0867 | 5.94 | 12400 | 0.0765 | 0.7844 | 0.6522 | 0.7812 | 49.2293 |
118
+ | 0.0572 | 6.03 | 12600 | 0.0772 | 0.7849 | 0.6522 | 0.7816 | 49.2293 |
119
+ | 0.0554 | 6.13 | 12800 | 0.0775 | 0.7844 | 0.6526 | 0.7812 | 49.2293 |
120
+ | 0.0725 | 6.23 | 13000 | 0.0774 | 0.7851 | 0.6534 | 0.7822 | 49.2293 |
121
+ | 0.0952 | 6.32 | 13200 | 0.0778 | 0.7848 | 0.6527 | 0.7817 | 49.2293 |
122
+ | 0.0795 | 6.42 | 13400 | 0.0764 | 0.7858 | 0.6542 | 0.7826 | 49.2293 |
123
+ | 0.0682 | 6.51 | 13600 | 0.0772 | 0.7852 | 0.6527 | 0.7819 | 49.2293 |
124
+ | 0.0483 | 6.61 | 13800 | 0.0777 | 0.785 | 0.6525 | 0.7815 | 49.2293 |
125
+ | 0.0725 | 6.7 | 14000 | 0.0767 | 0.7864 | 0.6545 | 0.7831 | 49.2293 |
126
+ | 0.0675 | 6.8 | 14200 | 0.0773 | 0.786 | 0.6551 | 0.7827 | 49.2293 |
127
+ | 0.0706 | 6.9 | 14400 | 0.0758 | 0.7867 | 0.6556 | 0.7837 | 49.2293 |
128
+ | 0.0785 | 6.99 | 14600 | 0.0772 | 0.7866 | 0.6559 | 0.7835 | 49.2293 |
129
+ | 0.0796 | 7.09 | 14800 | 0.0763 | 0.7872 | 0.6564 | 0.7841 | 49.2293 |
130
+ | 0.0761 | 7.18 | 15000 | 0.0757 | 0.7879 | 0.6566 | 0.7848 | 49.2293 |
131
+ | 0.0598 | 7.28 | 15200 | 0.0758 | 0.788 | 0.6568 | 0.7849 | 49.2293 |
132
+ | 0.0587 | 7.38 | 15400 | 0.0768 | 0.7872 | 0.6556 | 0.7839 | 49.2293 |
133
+ | 0.0859 | 7.47 | 15600 | 0.0765 | 0.7875 | 0.6559 | 0.7842 | 49.2293 |
134
+ | 0.061 | 7.57 | 15800 | 0.0764 | 0.7876 | 0.6564 | 0.7845 | 49.2293 |
135
+ | 0.0718 | 7.66 | 16000 | 0.0764 | 0.7871 | 0.6558 | 0.784 | 49.2293 |
136
+ | 0.0695 | 7.76 | 16200 | 0.0763 | 0.7873 | 0.656 | 0.7842 | 49.2293 |
137
+ | 0.0678 | 7.85 | 16400 | 0.0762 | 0.7875 | 0.6565 | 0.7844 | 49.2293 |
138
+ | 0.0751 | 7.95 | 16600 | 0.0761 | 0.7877 | 0.6566 | 0.7845 | 49.2293 |
139
+
140
+
141
+ ### Framework versions
142
+
143
+ - Transformers 4.36.2
144
+ - Pytorch 2.1.1
145
+ - Datasets 2.16.1
146
+ - Tokenizers 0.15.0
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 12,
21
+ "num_heads": 12,
22
+ "num_layers": 12,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "tokenizer_class": "T5Tokenizer",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.36.2",
31
+ "use_cache": true,
32
+ "vocab_size": 250112
33
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.36.2"
7
+ }
logs/events.out.tfevents.1706093926.ESBeastModeOn.23992.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6a1fde886999a4e0866d47ce162519d5b3e9be5b2f2c7b75a9cc12ad6bd325
3
+ size 306405
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f93b8ff6eeea2265910a88603f0fed2e4e9cf2d16fbaf2188b0365bcb4ee60
3
+ size 2329638768
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "clean_up_tokenization_spaces": true,
30
+ "eos_token": "</s>",
31
+ "extra_ids": 0,
32
+ "legacy": true,
33
+ "model_max_length": 1000000000000000019884624838656,
34
+ "pad_token": "<pad>",
35
+ "sp_model_kwargs": {},
36
+ "tokenizer_class": "T5Tokenizer",
37
+ "unk_token": "<unk>"
38
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79116439cccaae422d028f86eb2a0a7ce5026cbccf411c2b323010e359fbfac6
3
+ size 4792