xshubhamx commited on
Commit
055f475
1 Parent(s): c858136

End of training

Browse files
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/flan-t5-small
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: flan-t5-small-20-epochs-fine-tune
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # flan-t5-small-20-epochs-fine-tune
17
+
18
+ This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 1.1136
21
+ - Accuracy: 1
22
+ - F1 Micro: 1
23
+ - F1 Macro: 1
24
+ - F1 Weighted: 1
25
+
26
+ ## Model description
27
+
28
+ More information needed
29
+
30
+ ## Intended uses & limitations
31
+
32
+ More information needed
33
+
34
+ ## Training and evaluation data
35
+
36
+ More information needed
37
+
38
+ ## Training procedure
39
+
40
+ ### Training hyperparameters
41
+
42
+ The following hyperparameters were used during training:
43
+ - learning_rate: 5e-05
44
+ - train_batch_size: 8
45
+ - eval_batch_size: 8
46
+ - seed: 42
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: linear
49
+ - num_epochs: 20
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Micro | F1 Macro | F1 Weighted |
54
+ |:-------------:|:-----:|:-----:|:---------------:|:--------:|:--------:|:--------:|:-----------:|
55
+ | 2.0193 | 1.0 | 643 | 1.3910 | 1 | 1 | 1 | 1 |
56
+ | 1.3609 | 2.0 | 1286 | 0.9353 | 1 | 1 | 1 | 1 |
57
+ | 1.0198 | 3.0 | 1929 | 0.8408 | 1 | 1 | 1 | 1 |
58
+ | 0.7264 | 4.0 | 2572 | 0.7945 | 1 | 1 | 1 | 1 |
59
+ | 0.6595 | 5.0 | 3215 | 0.7847 | 1 | 1 | 1 | 1 |
60
+ | 0.5689 | 6.0 | 3858 | 0.7898 | 1 | 1 | 1 | 1 |
61
+ | 0.4745 | 7.0 | 4501 | 0.7969 | 1 | 1 | 1 | 1 |
62
+ | 0.4204 | 8.0 | 5144 | 0.8213 | 1 | 1 | 1 | 1 |
63
+ | 0.403 | 9.0 | 5787 | 0.8726 | 1 | 1 | 1 | 1 |
64
+ | 0.3578 | 10.0 | 6430 | 0.8777 | 1 | 1 | 1 | 1 |
65
+ | 0.3238 | 11.0 | 7073 | 0.9143 | 1 | 1 | 1 | 1 |
66
+ | 0.287 | 12.0 | 7716 | 0.9656 | 1 | 1 | 1 | 1 |
67
+ | 0.2903 | 13.0 | 8359 | 0.9580 | 1 | 1 | 1 | 1 |
68
+ | 0.2463 | 14.0 | 9002 | 1.0306 | 1 | 1 | 1 | 1 |
69
+ | 0.2318 | 15.0 | 9645 | 1.0428 | 1 | 1 | 1 | 1 |
70
+ | 0.2265 | 16.0 | 10288 | 1.0483 | 1 | 1 | 1 | 1 |
71
+ | 0.1954 | 17.0 | 10931 | 1.0825 | 1 | 1 | 1 | 1 |
72
+ | 0.191 | 18.0 | 11574 | 1.0972 | 1 | 1 | 1 | 1 |
73
+ | 0.1774 | 19.0 | 12217 | 1.1163 | 1 | 1 | 1 | 1 |
74
+ | 0.1847 | 20.0 | 12860 | 1.1136 | 1 | 1 | 1 | 1 |
75
+
76
+
77
+ ### Framework versions
78
+
79
+ - Transformers 4.38.2
80
+ - Pytorch 2.1.2
81
+ - Datasets 2.1.0
82
+ - Tokenizers 0.15.2
config.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-small",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 1024,
8
+ "d_kv": 64,
9
+ "d_model": 512,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "id2label": {
16
+ "0": "LABEL_0",
17
+ "1": "LABEL_1",
18
+ "2": "LABEL_2",
19
+ "3": "LABEL_3",
20
+ "4": "LABEL_4",
21
+ "5": "LABEL_5",
22
+ "6": "LABEL_6",
23
+ "7": "LABEL_7",
24
+ "8": "LABEL_8",
25
+ "9": "LABEL_9",
26
+ "10": "LABEL_10",
27
+ "11": "LABEL_11",
28
+ "12": "LABEL_12",
29
+ "13": "LABEL_13",
30
+ "14": "LABEL_14"
31
+ },
32
+ "initializer_factor": 1.0,
33
+ "is_encoder_decoder": true,
34
+ "is_gated_act": true,
35
+ "label2id": {
36
+ "LABEL_0": 0,
37
+ "LABEL_1": 1,
38
+ "LABEL_10": 10,
39
+ "LABEL_11": 11,
40
+ "LABEL_12": 12,
41
+ "LABEL_13": 13,
42
+ "LABEL_14": 14,
43
+ "LABEL_2": 2,
44
+ "LABEL_3": 3,
45
+ "LABEL_4": 4,
46
+ "LABEL_5": 5,
47
+ "LABEL_6": 6,
48
+ "LABEL_7": 7,
49
+ "LABEL_8": 8,
50
+ "LABEL_9": 9
51
+ },
52
+ "layer_norm_epsilon": 1e-06,
53
+ "model_type": "t5",
54
+ "n_positions": 512,
55
+ "num_decoder_layers": 8,
56
+ "num_heads": 6,
57
+ "num_layers": 8,
58
+ "output_past": true,
59
+ "pad_token_id": 0,
60
+ "problem_type": "single_label_classification",
61
+ "relative_attention_max_distance": 128,
62
+ "relative_attention_num_buckets": 32,
63
+ "task_specific_params": {
64
+ "summarization": {
65
+ "early_stopping": true,
66
+ "length_penalty": 2.0,
67
+ "max_length": 200,
68
+ "min_length": 30,
69
+ "no_repeat_ngram_size": 3,
70
+ "num_beams": 4,
71
+ "prefix": "summarize: "
72
+ },
73
+ "translation_en_to_de": {
74
+ "early_stopping": true,
75
+ "max_length": 300,
76
+ "num_beams": 4,
77
+ "prefix": "translate English to German: "
78
+ },
79
+ "translation_en_to_fr": {
80
+ "early_stopping": true,
81
+ "max_length": 300,
82
+ "num_beams": 4,
83
+ "prefix": "translate English to French: "
84
+ },
85
+ "translation_en_to_ro": {
86
+ "early_stopping": true,
87
+ "max_length": 300,
88
+ "num_beams": 4,
89
+ "prefix": "translate English to Romanian: "
90
+ }
91
+ },
92
+ "tie_word_embeddings": false,
93
+ "torch_dtype": "float32",
94
+ "transformers_version": "4.38.2",
95
+ "use_cache": true,
96
+ "vocab_size": 32128
97
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a327801b6d09083d06c396aa309748558e1a220791cfbd285204f296027db7a7
3
+ size 243152884
runs/Apr07_12-10-23_6b1da69729bf/events.out.tfevents.1712491854.6b1da69729bf.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b611263388bd144afbcd38d3050025ebc61581ef015d3e45713594ae61c15c
3
+ size 21407
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1b3d5beca7a74bc163e0ef81d85c68f551c2091cd791eb78e150a1350fa3fb
3
+ size 4920