inflaton commited on
Commit
f17faf9
1 Parent(s): 631100c

Training in progress, step 5000

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model.safetensors +1 -1
  2. run-2/checkpoint-2000/model.safetensors +1 -1
  3. run-2/checkpoint-2000/optimizer.pt +1 -1
  4. run-2/checkpoint-2000/rng_state.pth +1 -1
  5. run-2/checkpoint-2000/scheduler.pt +1 -1
  6. run-2/checkpoint-2000/training_args.bin +1 -1
  7. run-2/checkpoint-2500/model.safetensors +1 -1
  8. run-2/checkpoint-2500/optimizer.pt +1 -1
  9. run-2/checkpoint-2500/rng_state.pth +1 -1
  10. run-2/checkpoint-2500/scheduler.pt +1 -1
  11. run-2/checkpoint-2500/training_args.bin +1 -1
  12. run-2/checkpoint-3000/model.safetensors +1 -1
  13. run-2/checkpoint-3000/optimizer.pt +1 -1
  14. run-2/checkpoint-3000/rng_state.pth +1 -1
  15. run-2/checkpoint-3000/scheduler.pt +1 -1
  16. run-2/checkpoint-3000/training_args.bin +1 -1
  17. run-2/checkpoint-3500/model.safetensors +1 -1
  18. run-2/checkpoint-3500/optimizer.pt +1 -1
  19. run-2/checkpoint-3500/rng_state.pth +1 -1
  20. run-2/checkpoint-3500/scheduler.pt +1 -1
  21. run-2/checkpoint-3500/training_args.bin +1 -1
  22. run-2/checkpoint-4000/config.json +26 -0
  23. run-2/checkpoint-4000/model.safetensors +3 -0
  24. run-2/checkpoint-4000/optimizer.pt +3 -0
  25. run-2/checkpoint-4000/rng_state.pth +3 -0
  26. run-2/checkpoint-4000/scheduler.pt +3 -0
  27. run-2/checkpoint-4000/special_tokens_map.json +7 -0
  28. run-2/checkpoint-4000/tokenizer.json +0 -0
  29. run-2/checkpoint-4000/tokenizer_config.json +55 -0
  30. run-2/checkpoint-4000/trainer_state.json +125 -0
  31. run-2/checkpoint-4000/training_args.bin +3 -0
  32. run-2/checkpoint-4000/vocab.txt +0 -0
  33. run-2/checkpoint-4500/config.json +26 -0
  34. run-2/checkpoint-4500/model.safetensors +3 -0
  35. run-2/checkpoint-4500/optimizer.pt +3 -0
  36. run-2/checkpoint-4500/rng_state.pth +3 -0
  37. run-2/checkpoint-4500/scheduler.pt +3 -0
  38. run-2/checkpoint-4500/special_tokens_map.json +7 -0
  39. run-2/checkpoint-4500/tokenizer.json +0 -0
  40. run-2/checkpoint-4500/tokenizer_config.json +55 -0
  41. run-2/checkpoint-4500/trainer_state.json +141 -0
  42. run-2/checkpoint-4500/training_args.bin +3 -0
  43. run-2/checkpoint-4500/vocab.txt +0 -0
  44. run-2/checkpoint-5000/config.json +26 -0
  45. run-2/checkpoint-5000/model.safetensors +3 -0
  46. run-2/checkpoint-5000/optimizer.pt +3 -0
  47. run-2/checkpoint-5000/rng_state.pth +3 -0
  48. run-2/checkpoint-5000/scheduler.pt +3 -0
  49. run-2/checkpoint-5000/special_tokens_map.json +7 -0
  50. run-2/checkpoint-5000/tokenizer.json +0 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c04ced4e95104bc5ec0123541b73b7f8b1f5cb81b734fc7031750ea9106c9345
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55dcf2d90f757876dbb7c4b94eea3ec94f5166a71b3fbe4f0e962072d7107aec
3
  size 1340618660
run-2/checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80be31897bb33d8733d34cb040a72548acaf95ec030cad91f812ad2d751b03f6
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c937134533537f70741285bafdedc919a45c37f4f3c5cb4a9752ec17f80e655
3
  size 1340618660
run-2/checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7016afb7575f22f823ba59a2ed54e03975b7460a99333e0965e0b82fab07c37
3
  size 2681472237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f579ae5cd8cf4a42fb18f7cf72ad5583140a9f3001ca2787f7dfc16fbc518f0
3
  size 2681472237
run-2/checkpoint-2000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b9d5ad379b9f68a63bde5e9feb70202a4bac9f73ede32d42805b751fb116db2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c227961e4ca5d0f7ba2c4abe0a18977107a0b2c234fd11994cf5a6ecf20ea8
3
  size 14244
run-2/checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbc2fa08caf53304049c3848b18ec3060455d8ffeb5c9cd19698e31caac544f8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa401363c5642a0d7d02cb12727b21c4af1ac34c56e546410d0594de5848ac1
3
  size 1064
run-2/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd2df48f0748d34c1c3fee4978f0787dadcf2710571d4f013f6d6ab8c4ed57f
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68cfe34dae325096495265befbc04045f52f535badd4f779e9c272d3d0b0f80
3
  size 5048
run-2/checkpoint-2500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aa625202d359484e27339370cfa0ad63333f2ed449370c6cebf8a7d3192716a
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a4823de534c03cd8365c4d1063243e9562f9ad233f8e5b72c4faf9e5d30e9a9
3
  size 1340618660
run-2/checkpoint-2500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:839ba8a0f9db97f00f54cb996da0768a42e6ee244af22e007566323932b8d145
3
  size 2681472237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:900bdd243dc7019f81b887d4d33268ba44e786303120f092f21956eaffde22c5
3
  size 2681472237
run-2/checkpoint-2500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2702ae7042e84a4d25961cb512df6b14a1bf275d9b4501d054d3df0921900717
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59660cfbae90c75aff22fbb936299b2868fa9acddf052cbe56f2d442145f3b8b
3
  size 14244
run-2/checkpoint-2500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a454750792090fbe5cba64cf2aa476ecefd03183473b3e4c55bf7bf4d19d3bb1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5b41e5527ab73bd9c8d7179f5eb7befb77760a1f96a4dd58ff327d4748a0c9
3
  size 1064
run-2/checkpoint-2500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd2df48f0748d34c1c3fee4978f0787dadcf2710571d4f013f6d6ab8c4ed57f
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68cfe34dae325096495265befbc04045f52f535badd4f779e9c272d3d0b0f80
3
  size 5048
run-2/checkpoint-3000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35290cd19848b1e2ee531e859619964f6b551a24d3d8a1c19f6c3956d92955c2
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee681dc84fac79d1d9d643c003b15bd7ccdee1fb48cb870b660bb2ee31378926
3
  size 1340618660
run-2/checkpoint-3000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9ad35e44551783db2d965142529db5fd25716889829f5b013dfd67b6aa3f5a6
3
  size 2681472237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2782860aba135a87310c45e9585c94228cb176d6a10f199e3b982b07a1496824
3
  size 2681472237
run-2/checkpoint-3000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f0fa2dd4d5b10d8deb66aff62fa136a2da8e05b383756e331dd5cc91ded1e42
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db288463401544c237747deaf541285079a7fac434ce2c87fca0252ed44b357
3
  size 14244
run-2/checkpoint-3000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6dca56106351aeda5c12694b9b78d952bfb74380030b089006c7bc16ed43590
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6502a7770a12ff2e084bd7551eb23352078f9a6fdfa8b4cb93b3ae1994b447c
3
  size 1064
run-2/checkpoint-3000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd2df48f0748d34c1c3fee4978f0787dadcf2710571d4f013f6d6ab8c4ed57f
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68cfe34dae325096495265befbc04045f52f535badd4f779e9c272d3d0b0f80
3
  size 5048
run-2/checkpoint-3500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78bc6d683c21d6a2be3177dc35f3a8880af1d8229aefb6d3ebf403c09fca9416
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f507d33e2ac708b2aebe24a9e2832e2c9ca8ebfb0f277b02a31df18d9f9893b7
3
  size 1340618660
run-2/checkpoint-3500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d152595e28319d02b0a2298ac06e6fb3bc4b4ec789a14c81dff91e923e7fda33
3
  size 2681472237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4e47b93b237f3238d088c97ebb8c296c7735b81a7bc2954c8902b00452643ff
3
  size 2681472237
run-2/checkpoint-3500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25c6284cc8af4e58e03f9a13edd9ab9aea771e159ae47465226eaec7dd5cabda
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4337773f0e3eb3ebd819153d5c9547815b2eb781fb1b3b8acb5f2d3c9910ed
3
  size 14244
run-2/checkpoint-3500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d197f56f518557db46188ed953667fc5c4cb9e88f48deb902f66253413f089cb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b75a1a9f7ae761e7eb2cdacbdb01d35d249e5b0ce9653f5183e3f1e93019ba13
3
  size 1064
run-2/checkpoint-3500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd2df48f0748d34c1c3fee4978f0787dadcf2710571d4f013f6d6ab8c4ed57f
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68cfe34dae325096495265befbc04045f52f535badd4f779e9c272d3d0b0f80
3
  size 5048
run-2/checkpoint-4000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-uncased",
3
+ "architectures": [
4
+ "BertForMultipleChoice"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.40.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-2/checkpoint-4000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d90823cf3527aad67f3666a1a658cabc0dbab731e6147be99e69e2088a5f09
3
+ size 1340618660
run-2/checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805007eb8c09117ab5d6bb38560d087face4081479fac7447688249e3bc4dd7a
3
+ size 2681472237
run-2/checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23cd89f577f0659ddbd8c4a0e9ac86eb2003a5e05352e1a0338dfe9fb951fcfe
3
+ size 14244
run-2/checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca4d60300b89b032e5e3f23deab19eb2b4115e6ab4686a27751623f6e62aeecd
3
+ size 1064
run-2/checkpoint-4000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-2/checkpoint-4000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-4000/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-2/checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.361930294906166,
5
+ "eval_steps": 500,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.6702412868632708,
13
+ "grad_norm": 17.923147201538086,
14
+ "learning_rate": 1.3001640168623715e-05,
15
+ "loss": 0.5514,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7683539986610413,
21
+ "eval_loss": 0.46452510356903076,
22
+ "eval_runtime": 8.9706,
23
+ "eval_samples_per_second": 332.531,
24
+ "eval_steps_per_second": 20.846,
25
+ "step": 746
26
+ },
27
+ {
28
+ "epoch": 1.3404825737265416,
29
+ "grad_norm": 19.32151222229004,
30
+ "learning_rate": 1.2067614294440976e-05,
31
+ "loss": 0.3969,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.7643312215805054,
37
+ "eval_loss": 0.5671606063842773,
38
+ "eval_runtime": 9.0896,
39
+ "eval_samples_per_second": 328.177,
40
+ "eval_steps_per_second": 20.573,
41
+ "step": 1492
42
+ },
43
+ {
44
+ "epoch": 2.0107238605898123,
45
+ "grad_norm": 17.702476501464844,
46
+ "learning_rate": 1.1133588420258238e-05,
47
+ "loss": 0.2998,
48
+ "step": 1500
49
+ },
50
+ {
51
+ "epoch": 2.680965147453083,
52
+ "grad_norm": 6.516891956329346,
53
+ "learning_rate": 1.0199562546075501e-05,
54
+ "loss": 0.1319,
55
+ "step": 2000
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_accuracy": 0.7770700454711914,
60
+ "eval_loss": 0.7936307191848755,
61
+ "eval_runtime": 9.0031,
62
+ "eval_samples_per_second": 331.332,
63
+ "eval_steps_per_second": 20.771,
64
+ "step": 2238
65
+ },
66
+ {
67
+ "epoch": 3.351206434316354,
68
+ "grad_norm": 0.19944968819618225,
69
+ "learning_rate": 9.265536671892763e-06,
70
+ "loss": 0.0932,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "eval_accuracy": 0.7750586867332458,
76
+ "eval_loss": 1.1848183870315552,
77
+ "eval_runtime": 9.064,
78
+ "eval_samples_per_second": 329.104,
79
+ "eval_steps_per_second": 20.631,
80
+ "step": 2984
81
+ },
82
+ {
83
+ "epoch": 4.021447721179625,
84
+ "grad_norm": 0.03240065276622772,
85
+ "learning_rate": 8.331510797710023e-06,
86
+ "loss": 0.0618,
87
+ "step": 3000
88
+ },
89
+ {
90
+ "epoch": 4.6916890080428955,
91
+ "grad_norm": 23.173229217529297,
92
+ "learning_rate": 7.397484923527286e-06,
93
+ "loss": 0.0314,
94
+ "step": 3500
95
+ },
96
+ {
97
+ "epoch": 5.0,
98
+ "eval_accuracy": 0.7707006335258484,
99
+ "eval_loss": 1.372557520866394,
100
+ "eval_runtime": 9.0505,
101
+ "eval_samples_per_second": 329.594,
102
+ "eval_steps_per_second": 20.662,
103
+ "step": 3730
104
+ },
105
+ {
106
+ "epoch": 5.361930294906166,
107
+ "grad_norm": 0.01009325310587883,
108
+ "learning_rate": 6.463459049344548e-06,
109
+ "loss": 0.0281,
110
+ "step": 4000
111
+ }
112
+ ],
113
+ "logging_steps": 500,
114
+ "max_steps": 7460,
115
+ "num_input_tokens_seen": 0,
116
+ "num_train_epochs": 10,
117
+ "save_steps": 500,
118
+ "total_flos": 9489198869269488.0,
119
+ "train_batch_size": 16,
120
+ "trial_name": null,
121
+ "trial_params": {
122
+ "learning_rate": 1.3935666042806453e-05,
123
+ "per_device_train_batch_size": 16
124
+ }
125
+ }
run-2/checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68cfe34dae325096495265befbc04045f52f535badd4f779e9c272d3d0b0f80
3
+ size 5048
run-2/checkpoint-4000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-4500/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-uncased",
3
+ "architectures": [
4
+ "BertForMultipleChoice"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.40.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-2/checkpoint-4500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39327235f69c4c3b486cbf8df774ec52ff54ad18f8d7a4fa2bd6cfa2162b02be
3
+ size 1340618660
run-2/checkpoint-4500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b3c556ad2cfa779d1f7fed254a28a85de15b8ff152c89d60638befc4235b37
3
+ size 2681472237
run-2/checkpoint-4500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:262bdc836bd571e5c7800d3eb88b8556fd1630b1fd434d2a0a61ec4f3d7402bd
3
+ size 14244
run-2/checkpoint-4500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e557c4a78b60251d1a9d05fe3ad142e596b1da5e438c612f3802aea99dcf7618
3
+ size 1064
run-2/checkpoint-4500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-2/checkpoint-4500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-4500/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-2/checkpoint-4500/trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 6.032171581769437,
5
+ "eval_steps": 500,
6
+ "global_step": 4500,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.6702412868632708,
13
+ "grad_norm": 17.923147201538086,
14
+ "learning_rate": 1.3001640168623715e-05,
15
+ "loss": 0.5514,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7683539986610413,
21
+ "eval_loss": 0.46452510356903076,
22
+ "eval_runtime": 8.9706,
23
+ "eval_samples_per_second": 332.531,
24
+ "eval_steps_per_second": 20.846,
25
+ "step": 746
26
+ },
27
+ {
28
+ "epoch": 1.3404825737265416,
29
+ "grad_norm": 19.32151222229004,
30
+ "learning_rate": 1.2067614294440976e-05,
31
+ "loss": 0.3969,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.7643312215805054,
37
+ "eval_loss": 0.5671606063842773,
38
+ "eval_runtime": 9.0896,
39
+ "eval_samples_per_second": 328.177,
40
+ "eval_steps_per_second": 20.573,
41
+ "step": 1492
42
+ },
43
+ {
44
+ "epoch": 2.0107238605898123,
45
+ "grad_norm": 17.702476501464844,
46
+ "learning_rate": 1.1133588420258238e-05,
47
+ "loss": 0.2998,
48
+ "step": 1500
49
+ },
50
+ {
51
+ "epoch": 2.680965147453083,
52
+ "grad_norm": 6.516891956329346,
53
+ "learning_rate": 1.0199562546075501e-05,
54
+ "loss": 0.1319,
55
+ "step": 2000
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_accuracy": 0.7770700454711914,
60
+ "eval_loss": 0.7936307191848755,
61
+ "eval_runtime": 9.0031,
62
+ "eval_samples_per_second": 331.332,
63
+ "eval_steps_per_second": 20.771,
64
+ "step": 2238
65
+ },
66
+ {
67
+ "epoch": 3.351206434316354,
68
+ "grad_norm": 0.19944968819618225,
69
+ "learning_rate": 9.265536671892763e-06,
70
+ "loss": 0.0932,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "eval_accuracy": 0.7750586867332458,
76
+ "eval_loss": 1.1848183870315552,
77
+ "eval_runtime": 9.064,
78
+ "eval_samples_per_second": 329.104,
79
+ "eval_steps_per_second": 20.631,
80
+ "step": 2984
81
+ },
82
+ {
83
+ "epoch": 4.021447721179625,
84
+ "grad_norm": 0.03240065276622772,
85
+ "learning_rate": 8.331510797710023e-06,
86
+ "loss": 0.0618,
87
+ "step": 3000
88
+ },
89
+ {
90
+ "epoch": 4.6916890080428955,
91
+ "grad_norm": 23.173229217529297,
92
+ "learning_rate": 7.397484923527286e-06,
93
+ "loss": 0.0314,
94
+ "step": 3500
95
+ },
96
+ {
97
+ "epoch": 5.0,
98
+ "eval_accuracy": 0.7707006335258484,
99
+ "eval_loss": 1.372557520866394,
100
+ "eval_runtime": 9.0505,
101
+ "eval_samples_per_second": 329.594,
102
+ "eval_steps_per_second": 20.662,
103
+ "step": 3730
104
+ },
105
+ {
106
+ "epoch": 5.361930294906166,
107
+ "grad_norm": 0.01009325310587883,
108
+ "learning_rate": 6.463459049344548e-06,
109
+ "loss": 0.0281,
110
+ "step": 4000
111
+ },
112
+ {
113
+ "epoch": 6.0,
114
+ "eval_accuracy": 0.777405321598053,
115
+ "eval_loss": 1.31714928150177,
116
+ "eval_runtime": 9.0862,
117
+ "eval_samples_per_second": 328.299,
118
+ "eval_steps_per_second": 20.581,
119
+ "step": 4476
120
+ },
121
+ {
122
+ "epoch": 6.032171581769437,
123
+ "grad_norm": 0.6762986779212952,
124
+ "learning_rate": 5.529433175161809e-06,
125
+ "loss": 0.0202,
126
+ "step": 4500
127
+ }
128
+ ],
129
+ "logging_steps": 500,
130
+ "max_steps": 7460,
131
+ "num_input_tokens_seen": 0,
132
+ "num_train_epochs": 10,
133
+ "save_steps": 500,
134
+ "total_flos": 1.0682602114519764e+16,
135
+ "train_batch_size": 16,
136
+ "trial_name": null,
137
+ "trial_params": {
138
+ "learning_rate": 1.3935666042806453e-05,
139
+ "per_device_train_batch_size": 16
140
+ }
141
+ }
run-2/checkpoint-4500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68cfe34dae325096495265befbc04045f52f535badd4f779e9c272d3d0b0f80
3
+ size 5048
run-2/checkpoint-4500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-5000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-uncased",
3
+ "architectures": [
4
+ "BertForMultipleChoice"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.40.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-2/checkpoint-5000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55dcf2d90f757876dbb7c4b94eea3ec94f5166a71b3fbe4f0e962072d7107aec
3
+ size 1340618660
run-2/checkpoint-5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836c530bbcfaa88331ac4d39d8f145968a7f6cfd955cc1affc09fc971bb92ff1
3
+ size 2681472237
run-2/checkpoint-5000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350dce5f3fc8b9991f6e63d71d94cae07c7815692d88da1c6e0a2aa12455e53a
3
+ size 14244
run-2/checkpoint-5000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebca24fe91086601b79269e4a7200ab15b4e7ce37c65a3940fdb52ca7436d288
3
+ size 1064
run-2/checkpoint-5000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-2/checkpoint-5000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff