Thomas Simonini commited on
Commit
bdde52f
1 Parent(s): 3614c20

t5-end2end-questions-generation

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +86 -0
  3. config.json +58 -0
  4. pytorch_model.bin +3 -0
  5. training_args.bin +3 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - squad
7
+ model-index:
8
+ - name: t5-end2end-question-generation
9
+ results:
10
+ - task:
11
+ name: Sequence-to-sequence Language Modeling
12
+ type: text2text-generation
13
+ dataset:
14
+ name: squad
15
+ type: squad
16
+ args: plain_text
17
+ ---
18
+
19
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
20
+ should probably proofread and complete it, then remove this comment. -->
21
+
22
+ # t5-end2end-question-generation
23
+
24
+ This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the squad dataset.
25
+ It achieves the following results on the evaluation set:
26
+ - Loss: 1.5691
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 0.0001
46
+ - train_batch_size: 4
47
+ - eval_batch_size: 4
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 16
50
+ - total_train_batch_size: 64
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - num_epochs: 7
54
+
55
+ ### Training results
56
+
57
+ | Training Loss | Epoch | Step | Validation Loss |
58
+ |:-------------:|:-----:|:----:|:---------------:|
59
+ | 2.5834 | 0.34 | 100 | 1.9107 |
60
+ | 1.9642 | 0.68 | 200 | 1.7227 |
61
+ | 1.8526 | 1.02 | 300 | 1.6627 |
62
+ | 1.7383 | 1.36 | 400 | 1.6354 |
63
+ | 1.7223 | 1.69 | 500 | 1.6154 |
64
+ | 1.6871 | 2.03 | 600 | 1.6096 |
65
+ | 1.6309 | 2.37 | 700 | 1.6048 |
66
+ | 1.6242 | 2.71 | 800 | 1.5923 |
67
+ | 1.6226 | 3.05 | 900 | 1.5855 |
68
+ | 1.5645 | 3.39 | 1000 | 1.5874 |
69
+ | 1.5705 | 3.73 | 1100 | 1.5822 |
70
+ | 1.5543 | 4.07 | 1200 | 1.5817 |
71
+ | 1.5284 | 4.41 | 1300 | 1.5841 |
72
+ | 1.5275 | 4.75 | 1400 | 1.5741 |
73
+ | 1.5269 | 5.08 | 1500 | 1.5715 |
74
+ | 1.5079 | 5.42 | 1600 | 1.5701 |
75
+ | 1.4876 | 5.76 | 1700 | 1.5754 |
76
+ | 1.498 | 6.1 | 1800 | 1.5699 |
77
+ | 1.4852 | 6.44 | 1900 | 1.5693 |
78
+ | 1.4776 | 6.78 | 2000 | 1.5691 |
79
+
80
+
81
+ ### Framework versions
82
+
83
+ - Transformers 4.10.3
84
+ - Pytorch 1.9.0+cu102
85
+ - Datasets 1.12.1
86
+ - Tokenizers 0.10.3
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 3072,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "relu",
13
+ "gradient_checkpointing": false,
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "layer_norm_epsilon": 1e-06,
17
+ "model_type": "t5",
18
+ "n_positions": 512,
19
+ "num_decoder_layers": 12,
20
+ "num_heads": 12,
21
+ "num_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "relative_attention_num_buckets": 32,
25
+ "task_specific_params": {
26
+ "summarization": {
27
+ "early_stopping": true,
28
+ "length_penalty": 2.0,
29
+ "max_length": 200,
30
+ "min_length": 30,
31
+ "no_repeat_ngram_size": 3,
32
+ "num_beams": 4,
33
+ "prefix": "summarize: "
34
+ },
35
+ "translation_en_to_de": {
36
+ "early_stopping": true,
37
+ "max_length": 300,
38
+ "num_beams": 4,
39
+ "prefix": "translate English to German: "
40
+ },
41
+ "translation_en_to_fr": {
42
+ "early_stopping": true,
43
+ "max_length": 300,
44
+ "num_beams": 4,
45
+ "prefix": "translate English to French: "
46
+ },
47
+ "translation_en_to_ro": {
48
+ "early_stopping": true,
49
+ "max_length": 300,
50
+ "num_beams": 4,
51
+ "prefix": "translate English to Romanian: "
52
+ }
53
+ },
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.10.3",
56
+ "use_cache": true,
57
+ "vocab_size": 32101
58
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeaf116a47c0aa8760ab159f859c2cd6c06c6eb55afcca18f2ef9b44053f49e6
3
+ size 891647935
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f73781b96870b20b64ff8e88639710bade397a4aeff84d690387dae89421ec1
3
+ size 2671