ChrisZeng commited on
Commit
c64fb81
1 Parent(s): 04f0c9c

Training in progress, epoch 3

Browse files
checkpoint-652/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05b941a9d3d5f0904b9be07c1e13a18bae3fac4b04b65ace1eb83cbbb5b629ee
3
  size 2681485310
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b188021dbfa241f0a678e35b7af59e11aff7057c3843277ef9933346f641a0
3
  size 2681485310
checkpoint-652/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:743c3cf66ebcaa120ff0414f34b1f2897e119bb20e1c6c2c08e5861da71cdcb0
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ec99e6ce268c28a5ab518a36acce6ef6019d14ec92ba4b6dfe673399644daf
3
  size 1340743917
checkpoint-652/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a157602c28dafa1a0271775fa975aab22e5621dd748103545f800233df8f5450
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f37fdf2b898420edca00f388fedb6a726205e8670b9bf7173ceeb8aa3d80ff
3
  size 623
checkpoint-652/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.3068828284740448,
3
  "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-652",
4
  "epoch": 3.9973231357552583,
5
  "global_step": 652,
@@ -9,71 +9,71 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.499999999999999e-07,
13
- "loss": 0.4384,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.7444061962134251,
19
- "eval_f1": 0.7308261375858633,
20
- "eval_loss": 0.39615127444267273,
21
- "eval_runtime": 9.0599,
22
- "eval_samples_per_second": 256.514,
23
- "eval_steps_per_second": 32.119,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 9e-07,
29
- "loss": 0.3447,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.76592082616179,
35
- "eval_f1": 0.7552159046464709,
36
- "eval_loss": 0.3409559428691864,
37
- "eval_runtime": 9.4378,
38
- "eval_samples_per_second": 246.244,
39
- "eval_steps_per_second": 30.833,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 8.499999999999999e-07,
45
- "loss": 0.3057,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_accuracy": 0.7749569707401033,
51
- "eval_f1": 0.768808341108185,
52
- "eval_loss": 0.32338443398475647,
53
- "eval_runtime": 9.0418,
54
- "eval_samples_per_second": 257.028,
55
- "eval_steps_per_second": 32.184,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 8e-07,
61
- "loss": 0.287,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
- "eval_accuracy": 0.7857142857142857,
67
- "eval_f1": 0.7778970154753132,
68
- "eval_loss": 0.3068828284740448,
69
- "eval_runtime": 9.3662,
70
- "eval_samples_per_second": 248.128,
71
- "eval_steps_per_second": 31.069,
72
  "step": 652
73
  }
74
  ],
75
- "max_steps": 3260,
76
- "num_train_epochs": 20,
77
  "total_flos": 1.2184053565654464e+16,
78
  "trial_name": null,
79
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.30892524123191833,
3
  "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-652",
4
  "epoch": 3.9973231357552583,
5
  "global_step": 652,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9e-07,
13
+ "loss": 0.439,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.745697074010327,
19
+ "eval_f1": 0.732240056847258,
20
+ "eval_loss": 0.3982622027397156,
21
+ "eval_runtime": 8.9237,
22
+ "eval_samples_per_second": 260.431,
23
+ "eval_steps_per_second": 32.61,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 8e-07,
29
+ "loss": 0.3465,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7620481927710844,
35
+ "eval_f1": 0.750740067157349,
36
+ "eval_loss": 0.3448249399662018,
37
+ "eval_runtime": 9.0059,
38
+ "eval_samples_per_second": 258.053,
39
+ "eval_steps_per_second": 32.312,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 7e-07,
45
+ "loss": 0.3089,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_accuracy": 0.7693631669535284,
51
+ "eval_f1": 0.7633651185887134,
52
+ "eval_loss": 0.3303545117378235,
53
+ "eval_runtime": 9.3737,
54
+ "eval_samples_per_second": 247.927,
55
+ "eval_steps_per_second": 31.044,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 6e-07,
61
+ "loss": 0.2916,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_accuracy": 0.7839931153184165,
67
+ "eval_f1": 0.7736843738845695,
68
+ "eval_loss": 0.30892524123191833,
69
+ "eval_runtime": 9.0737,
70
+ "eval_samples_per_second": 256.125,
71
+ "eval_steps_per_second": 32.071,
72
  "step": 652
73
  }
74
  ],
75
+ "max_steps": 1630,
76
+ "num_train_epochs": 10,
77
  "total_flos": 1.2184053565654464e+16,
78
  "trial_name": null,
79
  "trial_params": null
checkpoint-652/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82126e010e751131ea0bffbc21275298a6bdbc244157fe27eed5dd4b7b0d6e0f
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd48e93c542d5f8f840918341d20dc98e6dbd60ec7052cf9f5610075d1655eaf
3
  size 3119
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c0974d1d7e3c00ca3ef5b4f10e74487753227c1cf0a5211b14949a292a65de3
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ec99e6ce268c28a5ab518a36acce6ef6019d14ec92ba4b6dfe673399644daf
3
  size 1340743917