sakt90 commited on
Commit
c6d2087
1 Parent(s): 3aebe9b

End of training

Browse files
README.md CHANGED
@@ -17,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.4800
21
- - Bleu: 6.565
22
- - Gen Len: 17.5641
23
 
24
  ## Model description
25
 
@@ -38,22 +38,24 @@ More information needed
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
- - learning_rate: 0.0001
42
- - train_batch_size: 32
43
- - eval_batch_size: 32
44
  - seed: 42
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
- - num_epochs: 3
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
- | Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
53
- |:-------------:|:-----:|:----:|:---------------:|:------:|:-------:|
54
- | 1.7648 | 1.0 | 2542 | 1.5394 | 6.1939 | 17.572 |
55
- | 1.6761 | 2.0 | 5084 | 1.4921 | 6.5053 | 17.5665 |
56
- | 1.6496 | 3.0 | 7626 | 1.4800 | 6.565 | 17.5641 |
 
 
57
 
58
 
59
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.5191
21
+ - Bleu: 6.3813
22
+ - Gen Len: 17.539
23
 
24
  ## Model description
25
 
 
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
+ - learning_rate: 2e-05
42
+ - train_batch_size: 16
43
+ - eval_batch_size: 16
44
  - seed: 42
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
+ - num_epochs: 5
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
+ | Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
53
+ |:-------------:|:-----:|:-----:|:---------------:|:------:|:-------:|
54
+ | 1.8456 | 1.0 | 6355 | 1.6112 | 5.7972 | 17.5672 |
55
+ | 1.7857 | 2.0 | 12710 | 1.5620 | 6.1557 | 17.5515 |
56
+ | 1.7359 | 3.0 | 19065 | 1.5358 | 6.2797 | 17.5462 |
57
+ | 1.7219 | 4.0 | 25420 | 1.5226 | 6.3581 | 17.5427 |
58
+ | 1.7219 | 5.0 | 31775 | 1.5191 | 6.3813 | 17.539 |
59
 
60
 
61
  ### Framework versions
config.json CHANGED
@@ -55,7 +55,7 @@
55
  }
56
  },
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.39.0",
59
  "use_cache": true,
60
  "vocab_size": 32128
61
  }
 
55
  }
56
  },
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.40.1",
59
  "use_cache": true,
60
  "vocab_size": 32128
61
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f078b019febfa528c2d1afc03f6a276881c487813d399020b0e687394e9468fa
3
  size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a1a88f77a33b53d7f668affbc94b5de6cb8427cd76efd6bfdf358bb1f20a52
3
  size 242041896
runs/May01_21-22-28_a8d4deb0cc84/events.out.tfevents.1714598549.a8d4deb0cc84.153.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de403ac9666a5fff081a86807211d59104d527795fbcdd95a55b5f5deffedc31
3
- size 20606
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db19c5cb7f54e6aed51283cca67bf4e09677915c239c96e6495f28ab17c6c21e
3
+ size 21343
tokenizer.json CHANGED
@@ -949,8 +949,8 @@
949
  {
950
  "type": "Metaspace",
951
  "replacement": "▁",
952
- "add_prefix_space": true,
953
- "prepend_scheme": "always"
954
  }
955
  ]
956
  },
@@ -1011,8 +1011,8 @@
1011
  "decoder": {
1012
  "type": "Metaspace",
1013
  "replacement": "▁",
1014
- "add_prefix_space": true,
1015
- "prepend_scheme": "always"
1016
  },
1017
  "model": {
1018
  "type": "Unigram",
 
949
  {
950
  "type": "Metaspace",
951
  "replacement": "▁",
952
+ "prepend_scheme": "always",
953
+ "split": true
954
  }
955
  ]
956
  },
 
1011
  "decoder": {
1012
  "type": "Metaspace",
1013
  "replacement": "▁",
1014
+ "prepend_scheme": "always",
1015
+ "split": true
1016
  },
1017
  "model": {
1018
  "type": "Unigram",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff3cfa8fd6e9a29141be3badbe1e4bd488ec01de78c7db62942081f8ecceb5cf
3
- size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45c36594ab696d7b9b4d714230a85c2bfc0cbc438654bf1398a104f054fab7c
3
+ size 5176