xuancoblab2023 commited on
Commit
a9b7c7c
·
verified ·
1 Parent(s): a37c8f5

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1711297090.8cc2eef2edb7.2942.4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feaf25b48afd5cec13c72a7309deddfd25015ebc1d48e8d78c23c22c7d99e770
3
- size 5406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:890180357e2c6c1e9c781ffc2dd5737c5625af93c3736ae37f5f754885bf507d
3
+ size 6136
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3efb3ab4c0e91a15fd6119f8b7e090eafed10aac6dc177239fc7ebfc74974753
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1373d9a02412908ee3ce5bab868f7cbec3faa136e3d1694a1fe4ce56b314f5d
3
  size 17549312
run-4/checkpoint-594/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-4/checkpoint-594/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a9f5899de3a13fc33628c62536f81bcc454491bdf7c8dff370e0c1f67285da1
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1373d9a02412908ee3ce5bab868f7cbec3faa136e3d1694a1fe4ce56b314f5d
3
  size 17549312
run-4/checkpoint-594/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4f225cbcf4be011db2b36b3c5f7f4a441e936b4532a87e4873bb53ef3102373
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67818f3594ea0ca3785f4f2954eef44ae6d2d2c5fa96513a50b43a27c67a8d7d
3
+ size 35123898
run-4/checkpoint-594/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df6866d0dff400085edf68783c4718d94e08dcfa8f0d23f9d94ad3e230de2def
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07703232e8862ddf942921f960a4672ac4604f89da896da8b4bab92d3ecc94f2
3
+ size 14308
run-4/checkpoint-594/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65d2dccd940bec011857c38259860d9eb722df804caef6710c785d20e0ebb647
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:938ae2beae3d140aca43811cc80ada974d817af03ca88b16ecb307a2a47a970e
3
  size 1064
run-4/checkpoint-594/tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
run-4/checkpoint-594/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.500990099009901,
3
- "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-297",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 594,
@@ -10,49 +10,57 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.3486188650131226,
14
- "learning_rate": 0.0008534021470265506,
15
- "loss": 0.5548,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.500990099009901,
21
- "eval_loss": 0.5633774399757385,
22
- "eval_runtime": 55.4108,
23
- "eval_samples_per_second": 9.114,
24
- "eval_steps_per_second": 0.289,
 
 
 
 
25
  "step": 297
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 0.8194136023521423,
30
- "learning_rate": 0.0007467268786482318,
31
- "loss": 0.5553,
32
  "step": 594
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.500990099009901,
37
- "eval_loss": 0.5545336604118347,
38
- "eval_runtime": 54.9924,
39
- "eval_samples_per_second": 9.183,
40
- "eval_steps_per_second": 0.291,
 
 
 
 
41
  "step": 594
42
  }
43
  ],
44
  "logging_steps": 500,
45
- "max_steps": 2673,
46
  "num_input_tokens_seen": 0,
47
- "num_train_epochs": 9,
48
  "save_steps": 500,
49
- "total_flos": 1555686566280.0,
50
  "train_batch_size": 32,
51
  "trial_name": null,
52
  "trial_params": {
53
- "alpha": 0.7480049651920854,
54
- "learning_rate": 0.0009600774154048695,
55
- "num_train_epochs": 9,
56
- "temperature": 2
57
  }
58
  }
 
1
  {
2
+ "best_metric": 0.594059405940594,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-594",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 594,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.202705979347229,
14
+ "learning_rate": 6.504485936113838e-05,
15
+ "loss": 0.5495,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.5069306930693069,
21
+ "eval_f1": 0.10108303249097472,
22
+ "eval_loss": 0.5468167662620544,
23
+ "eval_mcc": 0.027838098756040194,
24
+ "eval_precision": 0.56,
25
+ "eval_recall": 0.05555555555555555,
26
+ "eval_runtime": 0.9316,
27
+ "eval_samples_per_second": 542.063,
28
+ "eval_steps_per_second": 17.174,
29
  "step": 297
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 0.8953370451927185,
34
+ "learning_rate": 5.4204049467615325e-05,
35
+ "loss": 0.54,
36
  "step": 594
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_accuracy": 0.594059405940594,
41
+ "eval_f1": 0.5858585858585857,
42
+ "eval_loss": 0.5394836068153381,
43
+ "eval_mcc": 0.18817791261380143,
44
+ "eval_precision": 0.5967078189300411,
45
+ "eval_recall": 0.5753968253968254,
46
+ "eval_runtime": 0.9335,
47
+ "eval_samples_per_second": 540.967,
48
+ "eval_steps_per_second": 17.14,
49
  "step": 594
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 2079,
54
  "num_input_tokens_seen": 0,
55
+ "num_train_epochs": 7,
56
  "save_steps": 500,
57
+ "total_flos": 1461402531960.0,
58
  "train_batch_size": 32,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "alpha": 0.7404813991868276,
62
+ "learning_rate": 7.588566925466145e-05,
63
+ "num_train_epochs": 7,
64
+ "temperature": 47
65
  }
66
  }
run-4/checkpoint-594/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9362332d8be73fdeb508a202d9b7c32cc7427a6a4b9b0d6128331394ce376267
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b89f4b12e9eb0ed1644da45c1653800b9447c89374868af92258b08fc1b6045
3
+ size 4920