BeenaSamuel commited on
Commit
e7cf2d3
1 Parent(s): ee45355

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: t5-base
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,13 +15,13 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # results_t5base
17
 
18
- This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.3062
21
- - Rouge1: 0.9157
22
- - Rouge2: 0.8521
23
- - Rougel: 0.8998
24
- - Gen Len: 233.0899
25
 
26
  ## Model description
27
 
@@ -51,24 +51,24 @@ The following hyperparameters were used during training:
51
 
52
  ### Training results
53
 
54
- | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Gen Len |
55
- |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:--------:|
56
- | 0.5016 | 0.9 | 200 | 0.3833 | 0.8981 | 0.8255 | 0.8793 | 232.7416 |
57
- | 0.3662 | 1.79 | 400 | 0.3419 | 0.9046 | 0.835 | 0.8874 | 233.0899 |
58
- | 0.3066 | 2.69 | 600 | 0.3249 | 0.9083 | 0.84 | 0.8911 | 233.0899 |
59
- | 0.3385 | 3.59 | 800 | 0.3178 | 0.9107 | 0.8436 | 0.8939 | 233.0899 |
60
- | 0.2169 | 4.48 | 1000 | 0.3131 | 0.9122 | 0.8458 | 0.8955 | 233.0899 |
61
- | 0.2771 | 5.38 | 1200 | 0.3089 | 0.913 | 0.8474 | 0.8962 | 233.0899 |
62
- | 0.2718 | 6.28 | 1400 | 0.3090 | 0.914 | 0.85 | 0.898 | 233.0899 |
63
- | 0.23 | 7.17 | 1600 | 0.3066 | 0.9145 | 0.8506 | 0.8983 | 233.0899 |
64
- | 0.27 | 8.07 | 1800 | 0.3057 | 0.9155 | 0.8521 | 0.8997 | 233.0876 |
65
- | 0.2803 | 8.97 | 2000 | 0.3052 | 0.9152 | 0.8514 | 0.8992 | 233.0899 |
66
- | 0.228 | 9.87 | 2200 | 0.3062 | 0.9157 | 0.8521 | 0.8998 | 233.0899 |
67
 
68
 
69
  ### Framework versions
70
 
71
- - Transformers 4.38.2
72
  - Pytorch 2.1.2
73
- - Datasets 2.18.0
74
- - Tokenizers 0.15.2
 
1
  ---
2
  license: apache-2.0
3
+ base_model: t5-small
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # results_t5base
17
 
18
+ This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3660
21
+ - Rouge1: 0.904
22
+ - Rouge2: 0.8349
23
+ - Rougel: 0.8863
24
+ - Gen Len: 237.7528
25
 
26
  ## Model description
27
 
 
51
 
52
  ### Training results
53
 
54
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Gen Len |
55
+ |:-------------:|:------:|:----:|:---------------:|:------:|:------:|:------:|:--------:|
56
+ | 0.6675 | 0.8969 | 200 | 0.5012 | 0.8797 | 0.7929 | 0.8578 | 236.6854 |
57
+ | 0.5426 | 1.7937 | 400 | 0.4133 | 0.8937 | 0.8192 | 0.8751 | 237.7101 |
58
+ | 0.2768 | 2.6906 | 600 | 0.3971 | 0.8984 | 0.8262 | 0.8797 | 237.7551 |
59
+ | 0.4136 | 3.5874 | 800 | 0.3864 | 0.9001 | 0.8295 | 0.8824 | 237.7483 |
60
+ | 0.3067 | 4.4843 | 1000 | 0.3815 | 0.9011 | 0.8307 | 0.8833 | 237.7506 |
61
+ | 0.4425 | 5.3812 | 1200 | 0.3735 | 0.9015 | 0.8319 | 0.884 | 237.7528 |
62
+ | 0.4285 | 6.2780 | 1400 | 0.3720 | 0.9026 | 0.8334 | 0.885 | 237.7528 |
63
+ | 0.3025 | 7.1749 | 1600 | 0.3687 | 0.9039 | 0.8345 | 0.8859 | 237.7528 |
64
+ | 0.2699 | 8.0717 | 1800 | 0.3681 | 0.9034 | 0.8341 | 0.8857 | 237.7528 |
65
+ | 0.4072 | 8.9686 | 2000 | 0.3657 | 0.9039 | 0.8349 | 0.8862 | 237.7528 |
66
+ | 0.4555 | 9.8655 | 2200 | 0.3660 | 0.904 | 0.8349 | 0.8863 | 237.7528 |
67
 
68
 
69
  ### Framework versions
70
 
71
+ - Transformers 4.40.2
72
  - Pytorch 2.1.2
73
+ - Datasets 2.19.1
74
+ - Tokenizers 0.19.1
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "t5-base",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 3072,
8
  "d_kv": 64,
9
- "d_model": 768,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
@@ -18,9 +18,9 @@
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
@@ -55,7 +55,7 @@
55
  }
56
  },
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.38.2",
59
  "use_cache": true,
60
  "vocab_size": 32128
61
  }
 
1
  {
2
+ "_name_or_path": "t5-small",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
  "d_kv": 64,
9
+ "d_model": 512,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
 
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
+ "num_decoder_layers": 6,
22
+ "num_heads": 8,
23
+ "num_layers": 6,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
 
55
  }
56
  },
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.40.2",
59
  "use_cache": true,
60
  "vocab_size": 32128
61
  }
events.out.tfevents.1715613741.a6a2816c89e4.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e613a39afeadc25ca266776865c6c7f7af3bc499b3671027063f3d0c73d377
3
+ size 58011
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.38.2"
7
  }
 
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.40.2"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5b48ee1cf3ded2aafeaf278c1bd0a9e0d33aa334adcf34d0a0bebe10abfd05c
3
- size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b06469c857c5dfcdf363618d39087e97d6af82c123e1115e540f9622cdb155f
3
+ size 242041896
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:012086e17505ac1827191e6eb46058b81c97a7e60a895d398156809b076300c4
3
- size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406ccf800ef08e9506704fe0465a4487c6b5070170f3a004c0a36d2b1833ee44
3
+ size 4920