fpuentes commited on
Commit
6c164fe
•
1 Parent(s): 8affacd

Training in progress, step 1500

Browse files
config.json CHANGED
@@ -7,22 +7,21 @@
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
10
- "gradient_checkpointing": false,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
- "hidden_size": 1024,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 3072,
16
- "layer_norm_eps": 1e-12,
17
  "max_position_embeddings": 514,
18
  "model_type": "roberta",
19
- "num_attention_heads": 16,
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.25.1",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
- "vocab_size": 31002
28
  }
 
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
 
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 514,
17
  "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.24.0",
24
  "type_vocab_size": 1,
25
  "use_cache": true,
26
+ "vocab_size": 50265
27
  }
events.out.tfevents.1675021526.turing.3715.0 → events.out.tfevents.1675621105.turing.942264.0 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1038493fef059d45cb8f30efaeedca4c0fa3d49da4222e5f042315468cf90cb
3
  size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c4d0b29a87af88921abbc7c91b319bfe532dd747826d11b12e0c0dd1cebc89a
3
  size 40
last-checkpoint/config.json CHANGED
@@ -7,22 +7,21 @@
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
10
- "gradient_checkpointing": false,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
- "hidden_size": 1024,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 3072,
16
- "layer_norm_eps": 1e-12,
17
  "max_position_embeddings": 514,
18
  "model_type": "roberta",
19
- "num_attention_heads": 16,
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.25.1",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
- "vocab_size": 31002
28
  }
 
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
 
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 514,
17
  "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.24.0",
24
  "type_vocab_size": 1,
25
  "use_cache": true,
26
+ "vocab_size": 50265
27
  }
last-checkpoint/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e554af5d331128c984ad1b13951ca2c1f493a4b58f00f9dd426cc4e4f5c8e902
3
- size 1274845829
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778b67aa071ed05f42167ed3e5e86b6dae8ccfa222e2ddaabc7ca96c4f125bcb
3
+ size 997747845
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:699cf7e95aa4b428b6dcb17ea96d0a494fb5a668588cb15688d1dfa322b651bf
3
- size 637412409
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:890934702d88a0dd6337ebc6b82e6cf5721b9eacf995d6cf26ca8604fbb86a4c
3
+ size 498863417
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:350d8425314e155ec159c0e54caa815e16c9d40828c0e9924a6a994aadfbf4b9
3
- size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22df86520474a41b29bc3648b0a45c593feb726c65d968ff4be10a14325dda87
3
+ size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:783327ca172834179676ad8d44cd332681cd4b6b75c6ae1ec2a8870e758409a8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f0ca3db667ccea99a40abd479b8edea284c7dd390646a368deb8fde2a0bf56
3
  size 627
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
last-checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "max_len": 512,
9
+ "model_max_length": 512,
10
+ "name_or_path": "/home/pcjf/CESGA/works/lmodels/models/large",
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "special_tokens_map_file": null,
14
+ "tokenizer_class": "RobertaTokenizer",
15
+ "trim_offsets": true,
16
+ "unk_token": "<unk>"
17
+ }
last-checkpoint/trainer_state.json CHANGED
@@ -1,100 +1,30 @@
1
  {
2
- "best_metric": 7.145984172821045,
3
- "best_model_checkpoint": "/home/pcjf/CESGA/works/lmodels/models/large/checkpoint-9000",
4
- "epoch": 8.130066613977645,
5
- "global_step": 9000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.35,
12
- "learning_rate": 0.00045483288166215,
13
- "loss": 7.1968,
14
  "step": 1500
15
  },
16
  {
17
- "epoch": 1.35,
18
- "eval_loss": 7.1723456382751465,
19
- "eval_runtime": 109.6198,
20
- "eval_samples_per_second": 104.835,
21
- "eval_steps_per_second": 13.109,
22
  "step": 1500
23
- },
24
- {
25
- "epoch": 2.71,
26
- "learning_rate": 0.00040966576332429996,
27
- "loss": 7.1649,
28
- "step": 3000
29
- },
30
- {
31
- "epoch": 2.71,
32
- "eval_loss": 7.157505035400391,
33
- "eval_runtime": 108.8688,
34
- "eval_samples_per_second": 105.558,
35
- "eval_steps_per_second": 13.199,
36
- "step": 3000
37
- },
38
- {
39
- "epoch": 4.07,
40
- "learning_rate": 0.0003644986449864499,
41
- "loss": 7.1627,
42
- "step": 4500
43
- },
44
- {
45
- "epoch": 4.07,
46
- "eval_loss": 7.15585994720459,
47
- "eval_runtime": 109.3225,
48
- "eval_samples_per_second": 105.12,
49
- "eval_steps_per_second": 13.145,
50
- "step": 4500
51
- },
52
- {
53
- "epoch": 5.42,
54
- "learning_rate": 0.00031933152664859985,
55
- "loss": 7.1568,
56
- "step": 6000
57
- },
58
- {
59
- "epoch": 5.42,
60
- "eval_loss": 7.157613754272461,
61
- "eval_runtime": 108.7319,
62
- "eval_samples_per_second": 105.691,
63
- "eval_steps_per_second": 13.216,
64
- "step": 6000
65
- },
66
- {
67
- "epoch": 6.77,
68
- "learning_rate": 0.00027416440831074977,
69
- "loss": 7.1594,
70
- "step": 7500
71
- },
72
- {
73
- "epoch": 6.77,
74
- "eval_loss": 7.1492695808410645,
75
- "eval_runtime": 122.6861,
76
- "eval_samples_per_second": 93.67,
77
- "eval_steps_per_second": 11.713,
78
- "step": 7500
79
- },
80
- {
81
- "epoch": 8.13,
82
- "learning_rate": 0.00022899728997289974,
83
- "loss": 7.156,
84
- "step": 9000
85
- },
86
- {
87
- "epoch": 8.13,
88
- "eval_loss": 7.145984172821045,
89
- "eval_runtime": 111.6951,
90
- "eval_samples_per_second": 102.887,
91
- "eval_steps_per_second": 12.865,
92
- "step": 9000
93
  }
94
  ],
95
- "max_steps": 16605,
96
- "num_train_epochs": 15,
97
- "total_flos": 4.202170346996529e+17,
98
  "trial_name": null,
99
  "trial_params": null
100
  }
 
1
  {
2
+ "best_metric": 6.199821949005127,
3
+ "best_model_checkpoint": "/home/pcjf/CESGA/works/lmodels/models/large/checkpoint-1500",
4
+ "epoch": 0.215964257915315,
5
+ "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.22,
12
+ "learning_rate": 7.840172786177106e-06,
13
+ "loss": 6.8838,
14
  "step": 1500
15
  },
16
  {
17
+ "epoch": 0.22,
18
+ "eval_loss": 6.199821949005127,
19
+ "eval_runtime": 80.1923,
20
+ "eval_samples_per_second": 226.281,
21
+ "eval_steps_per_second": 28.294,
22
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
  ],
25
+ "max_steps": 6945,
26
+ "num_train_epochs": 1,
27
+ "total_flos": 2.248528520339957e+16,
28
  "trial_name": null,
29
  "trial_params": null
30
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c8cbf8cbcd2df689edcbeb8d33a558e8a91512021cd69e68769dce93442d595
3
- size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac315fb6f01a713c35d8bb0bc140e603ae3fc1ed639b8dd43fdc17fa5a664b6
3
+ size 3451
last-checkpoint/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:699cf7e95aa4b428b6dcb17ea96d0a494fb5a668588cb15688d1dfa322b651bf
3
- size 637412409
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:890934702d88a0dd6337ebc6b82e6cf5721b9eacf995d6cf26ca8604fbb86a4c
3
+ size 498863417
events.out.tfevents.1675021708.turing.3779.0 → runs/Feb05_19-18-25_turing/1675621116.2988715/events.out.tfevents.1675621116.turing.942264.2 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41a3986edb4f5b1ebd47f1d95a924a0276b7a6d51c1a727306d6a9102b0e1261
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64433e0512fffedcc9808dc13c2552a1d0628e378a1393a3e8d280019fade8f2
3
+ size 5579
events.out.tfevents.1675021876.turing.3907.0 → runs/Feb05_19-18-25_turing/events.out.tfevents.1675621116.turing.942264.1 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fae8754d53b80ee4c1b0ee5802b17e0247a6fa53c691fb9842c2931611a4296
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866688278b7caac7e71346cdddf3f728327008fe07a5c38f6e0ae3f7355eaf1d
3
+ size 4268
runs/Jan29_20-48-28_turing/1675021713.9660718/events.out.tfevents.1675021713.turing.3779.2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c29ec387e33077a1ee7b926d1a50160a6f8df4c22c40857b26082ca101531453
3
- size 5617
 
 
 
 
runs/Jan29_20-48-28_turing/events.out.tfevents.1675021713.turing.3779.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e68ff533ed530838828a7c4bb350a275f3cb8f453940d9cd9b7e428c1621019
3
- size 3906
 
 
 
 
runs/Jan29_20-51-16_turing/1675021882.0657997/events.out.tfevents.1675021882.turing.3907.2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca9e89ce53904409eef3f68013ef63b67fb63775898a5b46e336d2772178087
3
- size 5617
 
 
 
 
runs/Jan29_20-51-16_turing/events.out.tfevents.1675021882.turing.3907.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:89fd4db54cb44d24148ee660b09cd2d215ff69c4f141b45ea214ca6657e325b5
3
- size 6475
 
 
 
 
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "max_len": 512,
9
+ "model_max_length": 512,
10
+ "name_or_path": "/home/pcjf/CESGA/works/lmodels/models/large",
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "special_tokens_map_file": null,
14
+ "tokenizer_class": "RobertaTokenizer",
15
+ "trim_offsets": true,
16
+ "unk_token": "<unk>"
17
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c8cbf8cbcd2df689edcbeb8d33a558e8a91512021cd69e68769dce93442d595
3
- size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac315fb6f01a713c35d8bb0bc140e603ae3fc1ed639b8dd43fdc17fa5a664b6
3
+ size 3451
vocab.json CHANGED
The diff for this file is too large to render. See raw diff