HikasaHana commited on
Commit
519b0fc
·
verified ·
1 Parent(s): 550e45f

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78a0904d05584058bc1c93a27fc67433b942291b1b39acfc2f2505e394fa31ac
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc2934c19e3f145b2c8af3a46df740321a95d0b384d33395bbea1d2f8f05e99a
3
  size 409103316
run-0/checkpoint-1066/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7755e65b6162f8636a9241f6923420b193ca30b3a5dca7418b51a2bcc380f86d
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc2934c19e3f145b2c8af3a46df740321a95d0b384d33395bbea1d2f8f05e99a
3
  size 409103316
run-0/checkpoint-1066/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63d963cf3cbd7391c62eb322b08595cc0bd5b707c4533beeae4a064153966de0
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6a4292fbcc3bbbd0b7905857012c6060a4fdc03a263336f02ef5ecd3b2c2742
3
  size 818327802
run-0/checkpoint-1066/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fbf55a5bf069992fe6cfbfae4a696b1cd7d93685a9743ad6546c3030830f638
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0531b49646ec50dbec094636163f9366944098e7fa07140b9e91e313783c4ea
3
  size 1064
run-0/checkpoint-1066/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6369415521621704,
3
- "best_model_checkpoint": "BERT-WMM/run-0/checkpoint-533",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 1066,
@@ -10,49 +10,46 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
- "grad_norm": 11.681629180908203,
14
- "learning_rate": 2.5842410540171958e-05,
15
- "loss": 0.7181,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7525821596244131,
21
- "eval_loss": 0.6369415521621704,
22
- "eval_runtime": 1.9836,
23
- "eval_samples_per_second": 1073.792,
24
- "eval_steps_per_second": 67.553,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
- "grad_norm": 6.492458820343018,
30
- "learning_rate": 2.1053231418568556e-05,
31
- "loss": 0.4571,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7497652582159624,
37
- "eval_loss": 0.6769182682037354,
38
- "eval_runtime": 2.0259,
39
- "eval_samples_per_second": 1051.385,
40
- "eval_steps_per_second": 66.144,
41
  "step": 1066
42
  }
43
  ],
44
  "logging_steps": 500,
45
- "max_steps": 3198,
46
  "num_input_tokens_seen": 0,
47
- "num_train_epochs": 6,
48
  "save_steps": 500,
49
  "total_flos": 338261076519408.0,
50
  "train_batch_size": 16,
51
  "trial_name": null,
52
  "trial_params": {
53
- "learning_rate": 3.063158966177536e-05,
54
- "num_train_epochs": 6,
55
- "per_device_train_batch_size": 16,
56
- "weight_decay": 2.6426532465921284e-05
57
  }
58
  }
 
1
  {
2
+ "best_metric": 0.791917085647583,
3
+ "best_model_checkpoint": "BERT-WMM/run-0/checkpoint-1066",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 1066,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.94,
13
+ "grad_norm": 10.492278099060059,
14
+ "learning_rate": 7.698774119953696e-07,
15
+ "loss": 0.9956,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.6281690140845071,
21
+ "eval_loss": 0.8787437081336975,
22
+ "eval_runtime": 2.0952,
23
+ "eval_samples_per_second": 1016.614,
24
+ "eval_steps_per_second": 63.956,
25
  "step": 533
26
  },
27
  {
28
  "epoch": 1.88,
29
+ "grad_norm": 16.712677001953125,
30
+ "learning_rate": 4.1961471318036977e-07,
31
+ "loss": 0.858,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.6732394366197183,
37
+ "eval_loss": 0.791917085647583,
38
+ "eval_runtime": 2.1109,
39
+ "eval_samples_per_second": 1009.044,
40
+ "eval_steps_per_second": 63.48,
41
  "step": 1066
42
  }
43
  ],
44
  "logging_steps": 500,
45
+ "max_steps": 1599,
46
  "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 3,
48
  "save_steps": 500,
49
  "total_flos": 338261076519408.0,
50
  "train_batch_size": 16,
51
  "trial_name": null,
52
  "trial_params": {
53
+ "learning_rate": 1.1201401108103694e-06
 
 
 
54
  }
55
  }
run-0/checkpoint-1066/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48c24530a27c9d1e90a961cc0645745786b44285899342b0769165200ee8a2b1
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3b32ee6920eea73a8ff94d684bfa22384952a503144760a3a67dfa2dfe3f39f
3
  size 4856
runs/Apr18_16-31-07_544fc269209b/events.out.tfevents.1713457870.544fc269209b.792.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71e100b62b72129822e6624a876e5247b23379a95320e68e4e423bf875bc6df1
3
- size 5428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aea9294fd5b07cc0e83aa6ecc325232e8f25d9c6601fc8a5d6d9e88eee0356f
3
+ size 5962