HikasaHana commited on
Commit
7a84e3b
·
verified ·
1 Parent(s): 519b0fc

Training in progress, epoch 3

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc2934c19e3f145b2c8af3a46df740321a95d0b384d33395bbea1d2f8f05e99a
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7b41357485e1704d509aa0a1421fd007fb7be5a861dfd0e44630f9fa473d8a3
3
  size 409103316
run-0/checkpoint-1599/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70e29b5f49710d0d17c6aee07e8cc1bc4edaa86a3c66b73d52d1ef1136d10f23
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7b41357485e1704d509aa0a1421fd007fb7be5a861dfd0e44630f9fa473d8a3
3
  size 409103316
run-0/checkpoint-1599/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b04eae6a685408364150ca5d02a35737876cace088d1b38561f5da6ab19d7a30
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaad4a07ff4c40b92bd14a50098833d4eb8b48978464d3b4574b5ac088e998eb
3
  size 818327802
run-0/checkpoint-1599/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3a7f77912cd35b35f45f50fd2c3bbb7b15cbe8d92a9601be01ae387622324a8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4424c5078b48aec91480ea9ddc98250169be3ffb90e1ca9a96b04048992a7ca6
3
  size 1064
run-0/checkpoint-1599/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6369415521621704,
3
- "best_model_checkpoint": "BERT-WMM/run-0/checkpoint-533",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 1599,
@@ -10,65 +10,62 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
- "grad_norm": 11.681629180908203,
14
- "learning_rate": 2.5842410540171958e-05,
15
- "loss": 0.7181,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7525821596244131,
21
- "eval_loss": 0.6369415521621704,
22
- "eval_runtime": 1.9836,
23
- "eval_samples_per_second": 1073.792,
24
- "eval_steps_per_second": 67.553,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
- "grad_norm": 6.492458820343018,
30
- "learning_rate": 2.1053231418568556e-05,
31
- "loss": 0.4571,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7497652582159624,
37
- "eval_loss": 0.6769182682037354,
38
- "eval_runtime": 2.0259,
39
- "eval_samples_per_second": 1051.385,
40
- "eval_steps_per_second": 66.144,
41
  "step": 1066
42
  },
43
  {
44
  "epoch": 2.81,
45
- "grad_norm": 13.743987083435059,
46
- "learning_rate": 1.626405229696515e-05,
47
- "loss": 0.2889,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.7596244131455399,
53
- "eval_loss": 0.8726277351379395,
54
- "eval_runtime": 2.0636,
55
- "eval_samples_per_second": 1032.174,
56
- "eval_steps_per_second": 64.935,
57
  "step": 1599
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 3198,
62
  "num_input_tokens_seen": 0,
63
- "num_train_epochs": 6,
64
  "save_steps": 500,
65
  "total_flos": 507646505902536.0,
66
  "train_batch_size": 16,
67
  "trial_name": null,
68
  "trial_params": {
69
- "learning_rate": 3.063158966177536e-05,
70
- "num_train_epochs": 6,
71
- "per_device_train_batch_size": 16,
72
- "weight_decay": 2.6426532465921284e-05
73
  }
74
  }
 
1
  {
2
+ "best_metric": 0.75532066822052,
3
+ "best_model_checkpoint": "BERT-WMM/run-0/checkpoint-1599",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 1599,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
+ "grad_norm": 10.492278099060059,
14
+ "learning_rate": 7.698774119953696e-07,
15
+ "loss": 0.9956,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.6281690140845071,
21
+ "eval_loss": 0.8787437081336975,
22
+ "eval_runtime": 2.0952,
23
+ "eval_samples_per_second": 1016.614,
24
+ "eval_steps_per_second": 63.956,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
+ "grad_norm": 16.712677001953125,
30
+ "learning_rate": 4.1961471318036977e-07,
31
+ "loss": 0.858,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.6732394366197183,
37
+ "eval_loss": 0.791917085647583,
38
+ "eval_runtime": 2.1109,
39
+ "eval_samples_per_second": 1009.044,
40
+ "eval_steps_per_second": 63.48,
41
  "step": 1066
42
  },
43
  {
44
  "epoch": 2.81,
45
+ "grad_norm": 29.26333999633789,
46
+ "learning_rate": 6.935201436536996e-08,
47
+ "loss": 0.7846,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.6892018779342723,
53
+ "eval_loss": 0.75532066822052,
54
+ "eval_runtime": 2.0969,
55
+ "eval_samples_per_second": 1015.8,
56
+ "eval_steps_per_second": 63.905,
57
  "step": 1599
58
  }
59
  ],
60
  "logging_steps": 500,
61
+ "max_steps": 1599,
62
  "num_input_tokens_seen": 0,
63
+ "num_train_epochs": 3,
64
  "save_steps": 500,
65
  "total_flos": 507646505902536.0,
66
  "train_batch_size": 16,
67
  "trial_name": null,
68
  "trial_params": {
69
+ "learning_rate": 1.1201401108103694e-06
 
 
 
70
  }
71
  }
run-0/checkpoint-1599/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48c24530a27c9d1e90a961cc0645745786b44285899342b0769165200ee8a2b1
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3b32ee6920eea73a8ff94d684bfa22384952a503144760a3a67dfa2dfe3f39f
3
  size 4856
runs/Apr18_16-31-07_544fc269209b/events.out.tfevents.1713457870.544fc269209b.792.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aea9294fd5b07cc0e83aa6ecc325232e8f25d9c6601fc8a5d6d9e88eee0356f
3
- size 5962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22075c414dff056bcd45a0e4e13a3d05f01c9aa4df3e19fc19dd8b647078a313
3
+ size 6850