DatPySci commited on
Commit
34fd191
1 Parent(s): cad54d6

End of training

Browse files
README.md CHANGED
@@ -39,12 +39,12 @@ More information needed
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-07
41
  - train_batch_size: 8
42
- - eval_batch_size: 8
43
  - seed: 42
44
  - distributed_type: multi-GPU
45
  - num_devices: 8
46
  - total_train_batch_size: 64
47
- - total_eval_batch_size: 64
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: linear
50
  - lr_scheduler_warmup_ratio: 0.1
@@ -58,5 +58,5 @@ The following hyperparameters were used during training:
58
 
59
  - Transformers 4.39.3
60
  - Pytorch 2.1.2+cu121
61
- - Datasets 2.14.7
62
  - Tokenizers 0.15.2
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-07
41
  - train_batch_size: 8
42
+ - eval_batch_size: 4
43
  - seed: 42
44
  - distributed_type: multi-GPU
45
  - num_devices: 8
46
  - total_train_batch_size: 64
47
+ - total_eval_batch_size: 32
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: linear
50
  - lr_scheduler_warmup_ratio: 0.1
 
58
 
59
  - Transformers 4.39.3
60
  - Pytorch 2.1.2+cu121
61
+ - Datasets 2.18.0
62
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.11715243643320149,
4
- "train_runtime": 11089.6574,
5
- "train_samples_per_second": 9.017,
6
  "train_steps_per_second": 0.141
7
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.1710020688315347,
4
+ "train_runtime": 11106.5263,
5
+ "train_samples_per_second": 9.004,
6
  "train_steps_per_second": 0.141
7
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.3",
24
- "use_cache": false,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.3",
24
+ "use_cache": true,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c406cf4f7be892e5b1f2ee03c1c1d5dd1012e31d19f64879fdb89e9ec9bc29a4
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c045ac89a939e151ad8f6cd36201494b0b674084b03f4009da02b9c641f400a
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b9bd2bf245ad4034e46f152a5718edd749b73e6f7c054edf4694d1e3cd03815
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589c45c2cffbd77aeba04b60f13111b384b3def14875dc4a23190a04e2117792
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71f0bc0be638197dd0b20572bed9bd0867f9d74b8b77614b5fbc101ff2b043e3
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b2148cb02b65d16e8a261007fb4fe98d397f65aa464a0cd2170dc9fd321872a
3
  size 4540516344
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.11715243643320149,
4
- "train_runtime": 11089.6574,
5
- "train_samples_per_second": 9.017,
6
  "train_steps_per_second": 0.141
7
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.1710020688315347,
4
+ "train_runtime": 11106.5263,
5
+ "train_samples_per_second": 9.004,
6
  "train_steps_per_second": 0.141
7
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff