xuancoblab2023 commited on
Commit
f8523de
·
verified ·
1 Parent(s): 85b9aa8

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1711265653.73e3a81c01ef.4225.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29328cc7547f41743a4985f8d99faf99f508f44dea209ce91b456a7d804ef40f
3
- size 5407
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de2dbc4a46dedb66ca18a4084f06ef7d814451bb56e6f5ec8f784252448c5cd6
3
+ size 6137
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70b7f7f1a78e0b340b57dfae4e8a71ef78a3dbee8319a311f6096e6cf4233908
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d900e4f6cb14316af78ee923303ad84861c097630cd22630ae781e3dd5558d9
3
  size 17549312
run-0/checkpoint-384/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-0/checkpoint-384/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bce608ff84d676ff00ffe40bd50b97fbc7541a452729c9f63e6157c1aa613a0
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d900e4f6cb14316af78ee923303ad84861c097630cd22630ae781e3dd5558d9
3
  size 17549312
run-0/checkpoint-384/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39a4a9968d3befda2b3b1fc8eb232f4dd5dd4b1439f48ad4b5bd16b2994c1dd9
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6262f90831477c0022202b21b95260de760334c7735499127b401b2b7aad5e1f
3
  size 35122746
run-0/checkpoint-384/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45b1fc07ed8b121716021ec87c686f1cd9c3b89c82ea08a6a0792d47a39077c9
3
  size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2558f72cda987826e5e7caf54cc6282fe335ebecbddfed7bb83f0184d1f54cc
3
  size 14054
run-0/checkpoint-384/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36f2ca0dfd9a3945022ef05581370b90827ced4c3be77e49482b876673eb94ec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c14cda532036bd7a3de8ef7f02066ddc1a934accb6239ec8906df7f1daf52f
3
  size 1064
run-0/checkpoint-384/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.8003913894324853,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-384",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 384,
7
  "is_hyper_param_search": true,
@@ -10,93 +10,58 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.6740443706512451,
14
- "learning_rate": 0.0001218354408608861,
15
- "loss": 0.4816,
16
- "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7514677103718199,
21
- "eval_f1": 0.7519531250000001,
22
- "eval_loss": 0.41538161039352417,
23
- "eval_precision": 0.7504873294346979,
24
- "eval_recall": 0.7534246575342466,
25
- "eval_runtime": 26.5175,
26
- "eval_samples_per_second": 38.541,
27
- "eval_steps_per_second": 1.207,
28
- "step": 96
29
- },
30
- {
31
- "epoch": 2.0,
32
- "grad_norm": 1.7866544723510742,
33
- "learning_rate": 9.137658064566457e-05,
34
- "loss": 0.4182,
35
  "step": 192
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7798434442270059,
40
- "eval_f1": 0.7817652764306499,
41
- "eval_loss": 0.3980446457862854,
42
- "eval_precision": 0.775,
43
- "eval_recall": 0.7886497064579256,
44
- "eval_runtime": 25.5088,
45
- "eval_samples_per_second": 40.065,
46
- "eval_steps_per_second": 1.254,
47
- "step": 192
48
- },
49
- {
50
- "epoch": 3.0,
51
- "grad_norm": 2.1238555908203125,
52
- "learning_rate": 6.091772043044305e-05,
53
- "loss": 0.4044,
54
- "step": 288
55
- },
56
- {
57
- "epoch": 3.0,
58
- "eval_accuracy": 0.7984344422700587,
59
- "eval_f1": 0.8205574912891985,
60
- "eval_loss": 0.39847832918167114,
61
- "eval_precision": 0.7394034536891679,
62
- "eval_recall": 0.9217221135029354,
63
- "eval_runtime": 25.6502,
64
- "eval_samples_per_second": 39.844,
65
- "eval_steps_per_second": 1.248,
66
- "step": 288
67
- },
68
- {
69
- "epoch": 4.0,
70
- "grad_norm": 2.6613142490386963,
71
- "learning_rate": 3.0458860215221525e-05,
72
- "loss": 0.3971,
73
  "step": 384
74
  },
75
  {
76
- "epoch": 4.0,
77
- "eval_accuracy": 0.8003913894324853,
78
- "eval_f1": 0.799212598425197,
79
- "eval_loss": 0.38756656646728516,
80
- "eval_precision": 0.803960396039604,
81
- "eval_recall": 0.7945205479452054,
82
- "eval_runtime": 26.1487,
83
- "eval_samples_per_second": 39.084,
84
- "eval_steps_per_second": 1.224,
 
85
  "step": 384
86
  }
87
  ],
88
  "logging_steps": 500,
89
- "max_steps": 480,
90
  "num_input_tokens_seen": 0,
91
- "num_train_epochs": 5,
92
  "save_steps": 500,
93
- "total_flos": 942780789120.0,
94
- "train_batch_size": 32,
95
  "trial_name": null,
96
  "trial_params": {
97
- "alpha": 0.679174768290245,
98
- "learning_rate": 0.00015229430107610762,
99
- "num_train_epochs": 5,
100
- "temperature": 27
 
101
  }
102
  }
 
1
  {
2
+ "best_metric": 0.7181996086105675,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-384",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 384,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.7846055030822754,
14
+ "learning_rate": 4.610177667546352e-05,
15
+ "loss": 0.4587,
16
+ "step": 192
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.6976516634050881,
21
+ "eval_f1": 0.6419466975666281,
22
+ "eval_loss": 0.4008609354496002,
23
+ "eval_mcc": 0.41595144404027,
24
+ "eval_precision": 0.7869318181818182,
25
+ "eval_recall": 0.5420743639921722,
26
+ "eval_runtime": 66.5688,
27
+ "eval_samples_per_second": 15.353,
28
+ "eval_steps_per_second": 0.481,
 
 
 
 
 
 
29
  "step": 192
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 6.001947402954102,
34
+ "learning_rate": 4.0979357044856464e-05,
35
+ "loss": 0.4016,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "step": 384
37
  },
38
  {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.7181996086105675,
41
+ "eval_f1": 0.6778523489932886,
42
+ "eval_loss": 0.3820359408855438,
43
+ "eval_mcc": 0.4507700437564526,
44
+ "eval_precision": 0.7911227154046997,
45
+ "eval_recall": 0.5929549902152642,
46
+ "eval_runtime": 66.6005,
47
+ "eval_samples_per_second": 15.345,
48
+ "eval_steps_per_second": 0.48,
49
  "step": 384
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 1920,
54
  "num_input_tokens_seen": 0,
55
+ "num_train_epochs": 10,
56
  "save_steps": 500,
57
+ "total_flos": 471390394560.0,
58
+ "train_batch_size": 16,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "alpha": 0.6226480553446316,
62
+ "learning_rate": 5.122419630607058e-05,
63
+ "num_train_epochs": 10,
64
+ "per_device_train_batch_size": 16,
65
+ "temperature": 20
66
  }
67
  }
run-0/checkpoint-384/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1873f3a503b2d5b3f7200baa33f737ba864dd65edb3834d85c5b8e40b6b72f07
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbca9cde0b156ba25d74ee6bc6ea4c2fa160afa3570e75ccc73231ac4a159c49
3
+ size 4984