ChrisZeng commited on
Commit
53d6ab5
1 Parent(s): c64fb81

Training in progress, epoch 5

Browse files
checkpoint-815/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e442450e8cd52442bde80cb4c5d1310a90067d38639466cab2419adbc6654816
3
- size 2681490814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba418a2ea6881af11d8ac053efc8056bfc5980d994a4c40a76b2a92fbb77a74
3
+ size 2681485310
checkpoint-815/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:144b4da96d7effe4b95740b14231286e40378d2374cf162d73586ae4d8dcf8af
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ad7eafdd205c20d639005fd7eb74c2ba2b92f6c5c9991c07b9a36e6e8341c9b
3
  size 1340743917
checkpoint-815/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c726a4aa63d9bd90a506c784ff97322020e4f71cf490bdb933f60b3449d3305
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e348f7046a04b0ce0ec4d209a25aba29b47d9db36083b021e9f718ef89663bf
3
  size 14503
checkpoint-815/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad42bdc4a1db64a469af268c60e8c94de7907ce695dc2ec26a965499c4160890
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3f696f5645bd66b26b01012348c1717a369155fa7a4dc912416dd1982b75145
3
  size 623
checkpoint-815/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.30303624272346497,
3
  "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-815",
4
  "epoch": 4.997323135755258,
5
  "global_step": 815,
@@ -9,88 +9,88 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.499999999999999e-07,
13
- "loss": 0.4384,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.7444061962134251,
19
- "eval_f1": 0.7308261375858633,
20
- "eval_loss": 0.39615127444267273,
21
- "eval_runtime": 9.0599,
22
- "eval_samples_per_second": 256.514,
23
- "eval_steps_per_second": 32.119,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 9e-07,
29
- "loss": 0.3447,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.76592082616179,
35
- "eval_f1": 0.7552159046464709,
36
- "eval_loss": 0.3409559428691864,
37
- "eval_runtime": 9.4378,
38
- "eval_samples_per_second": 246.244,
39
- "eval_steps_per_second": 30.833,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 8.499999999999999e-07,
45
- "loss": 0.3057,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_accuracy": 0.7749569707401033,
51
- "eval_f1": 0.768808341108185,
52
- "eval_loss": 0.32338443398475647,
53
- "eval_runtime": 9.0418,
54
- "eval_samples_per_second": 257.028,
55
- "eval_steps_per_second": 32.184,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 8e-07,
61
- "loss": 0.287,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
- "eval_accuracy": 0.7857142857142857,
67
- "eval_f1": 0.7778970154753132,
68
- "eval_loss": 0.3068828284740448,
69
- "eval_runtime": 9.3662,
70
- "eval_samples_per_second": 248.128,
71
- "eval_steps_per_second": 31.069,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
- "learning_rate": 7.5e-07,
77
- "loss": 0.2742,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
- "eval_accuracy": 0.7887263339070568,
83
- "eval_f1": 0.7821763089027973,
84
- "eval_loss": 0.30303624272346497,
85
- "eval_runtime": 8.908,
86
- "eval_samples_per_second": 260.89,
87
- "eval_steps_per_second": 32.667,
88
  "step": 815
89
  }
90
  ],
91
- "max_steps": 3260,
92
- "num_train_epochs": 20,
93
- "total_flos": 1.5229673797195776e+16,
94
  "trial_name": null,
95
  "trial_params": null
96
  }
 
1
  {
2
+ "best_metric": 0.3055116832256317,
3
  "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-815",
4
  "epoch": 4.997323135755258,
5
  "global_step": 815,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9e-07,
13
+ "loss": 0.439,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.745697074010327,
19
+ "eval_f1": 0.732240056847258,
20
+ "eval_loss": 0.3982622027397156,
21
+ "eval_runtime": 8.9237,
22
+ "eval_samples_per_second": 260.431,
23
+ "eval_steps_per_second": 32.61,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 8e-07,
29
+ "loss": 0.3465,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7620481927710844,
35
+ "eval_f1": 0.750740067157349,
36
+ "eval_loss": 0.3448249399662018,
37
+ "eval_runtime": 9.0059,
38
+ "eval_samples_per_second": 258.053,
39
+ "eval_steps_per_second": 32.312,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 7e-07,
45
+ "loss": 0.3089,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_accuracy": 0.7693631669535284,
51
+ "eval_f1": 0.7633651185887134,
52
+ "eval_loss": 0.3303545117378235,
53
+ "eval_runtime": 9.3737,
54
+ "eval_samples_per_second": 247.927,
55
+ "eval_steps_per_second": 31.044,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 6e-07,
61
+ "loss": 0.2916,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_accuracy": 0.7839931153184165,
67
+ "eval_f1": 0.7736843738845695,
68
+ "eval_loss": 0.30892524123191833,
69
+ "eval_runtime": 9.0737,
70
+ "eval_samples_per_second": 256.125,
71
+ "eval_steps_per_second": 32.071,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
+ "learning_rate": 5e-07,
77
+ "loss": 0.2826,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
+ "eval_accuracy": 0.7839931153184165,
83
+ "eval_f1": 0.7744514636272033,
84
+ "eval_loss": 0.3055116832256317,
85
+ "eval_runtime": 9.1262,
86
+ "eval_samples_per_second": 254.651,
87
+ "eval_steps_per_second": 31.886,
88
  "step": 815
89
  }
90
  ],
91
+ "max_steps": 1630,
92
+ "num_train_epochs": 10,
93
+ "total_flos": 1.5224431665565056e+16,
94
  "trial_name": null,
95
  "trial_params": null
96
  }
checkpoint-815/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50dde5eeea306f54118173d342686475ad9209b6c2cac103f7b114d5f582dc36
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd48e93c542d5f8f840918341d20dc98e6dbd60ec7052cf9f5610075d1655eaf
3
  size 3119
checkpoint-978/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f545bb9b4ba6c41b7833ebc1143df20d8bba9021ff50abab697bb844f1a988
3
- size 2681490814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68111dae94c48bd2b75beb407d9113814eb65164fbb9ffdf05ec852d6f8f39ea
3
+ size 2681485310
checkpoint-978/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1017cd7d6d785c901c6c1b5a7c8a543da183235f0b9932da80f8f3cd0484598b
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0655cfc868a392ac2b0850468debd2161b60753b0a1dc8c0e61845d86a8b31a
3
  size 1340743917
checkpoint-978/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e260b8f63ccafccd2ea0aa1da290f571ec2179e4f7f25fb9ca94c888b8be9ea
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:272d406ad5766e7c57c632b59b5d78e1ef70b081b3a7a860b7c3db72089f1cb3
3
  size 14503
checkpoint-978/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:511c9699b79f5054c4112d6785ff254f1cd47c675bf30666ab6edc5f165de28b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:569a60982e27ad1d96ed340e6c81707b085c0a19b972788e43e876e51ac67dc1
3
  size 623
checkpoint-978/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.2981628179550171,
3
  "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-978",
4
  "epoch": 5.997323135755258,
5
  "global_step": 978,
@@ -9,104 +9,104 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.499999999999999e-07,
13
- "loss": 0.4384,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.7444061962134251,
19
- "eval_f1": 0.7308261375858633,
20
- "eval_loss": 0.39615127444267273,
21
- "eval_runtime": 9.0599,
22
- "eval_samples_per_second": 256.514,
23
- "eval_steps_per_second": 32.119,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 9e-07,
29
- "loss": 0.3447,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.76592082616179,
35
- "eval_f1": 0.7552159046464709,
36
- "eval_loss": 0.3409559428691864,
37
- "eval_runtime": 9.4378,
38
- "eval_samples_per_second": 246.244,
39
- "eval_steps_per_second": 30.833,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 8.499999999999999e-07,
45
- "loss": 0.3057,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_accuracy": 0.7749569707401033,
51
- "eval_f1": 0.768808341108185,
52
- "eval_loss": 0.32338443398475647,
53
- "eval_runtime": 9.0418,
54
- "eval_samples_per_second": 257.028,
55
- "eval_steps_per_second": 32.184,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 8e-07,
61
- "loss": 0.287,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
- "eval_accuracy": 0.7857142857142857,
67
- "eval_f1": 0.7778970154753132,
68
- "eval_loss": 0.3068828284740448,
69
- "eval_runtime": 9.3662,
70
- "eval_samples_per_second": 248.128,
71
- "eval_steps_per_second": 31.069,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
- "learning_rate": 7.5e-07,
77
- "loss": 0.2742,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
- "eval_accuracy": 0.7887263339070568,
83
- "eval_f1": 0.7821763089027973,
84
- "eval_loss": 0.30303624272346497,
85
- "eval_runtime": 8.908,
86
- "eval_samples_per_second": 260.89,
87
- "eval_steps_per_second": 32.667,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
- "learning_rate": 7e-07,
93
- "loss": 0.2676,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
- "eval_accuracy": 0.7938898450946644,
99
- "eval_f1": 0.7850614050415754,
100
- "eval_loss": 0.2981628179550171,
101
- "eval_runtime": 9.0189,
102
- "eval_samples_per_second": 257.681,
103
- "eval_steps_per_second": 32.266,
104
  "step": 978
105
  }
106
  ],
107
- "max_steps": 3260,
108
- "num_train_epochs": 20,
109
- "total_flos": 1.8270401372548416e+16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
1
  {
2
+ "best_metric": 0.30407363176345825,
3
  "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-978",
4
  "epoch": 5.997323135755258,
5
  "global_step": 978,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9e-07,
13
+ "loss": 0.439,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.745697074010327,
19
+ "eval_f1": 0.732240056847258,
20
+ "eval_loss": 0.3982622027397156,
21
+ "eval_runtime": 8.9237,
22
+ "eval_samples_per_second": 260.431,
23
+ "eval_steps_per_second": 32.61,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 8e-07,
29
+ "loss": 0.3465,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7620481927710844,
35
+ "eval_f1": 0.750740067157349,
36
+ "eval_loss": 0.3448249399662018,
37
+ "eval_runtime": 9.0059,
38
+ "eval_samples_per_second": 258.053,
39
+ "eval_steps_per_second": 32.312,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 7e-07,
45
+ "loss": 0.3089,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_accuracy": 0.7693631669535284,
51
+ "eval_f1": 0.7633651185887134,
52
+ "eval_loss": 0.3303545117378235,
53
+ "eval_runtime": 9.3737,
54
+ "eval_samples_per_second": 247.927,
55
+ "eval_steps_per_second": 31.044,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 6e-07,
61
+ "loss": 0.2916,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_accuracy": 0.7839931153184165,
67
+ "eval_f1": 0.7736843738845695,
68
+ "eval_loss": 0.30892524123191833,
69
+ "eval_runtime": 9.0737,
70
+ "eval_samples_per_second": 256.125,
71
+ "eval_steps_per_second": 32.071,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
+ "learning_rate": 5e-07,
77
+ "loss": 0.2826,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
+ "eval_accuracy": 0.7839931153184165,
83
+ "eval_f1": 0.7744514636272033,
84
+ "eval_loss": 0.3055116832256317,
85
+ "eval_runtime": 9.1262,
86
+ "eval_samples_per_second": 254.651,
87
+ "eval_steps_per_second": 31.886,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
+ "learning_rate": 4e-07,
93
+ "loss": 0.2747,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
+ "eval_accuracy": 0.7857142857142857,
99
+ "eval_f1": 0.7771340101366444,
100
+ "eval_loss": 0.30407363176345825,
101
+ "eval_runtime": 9.1656,
102
+ "eval_samples_per_second": 253.557,
103
+ "eval_steps_per_second": 31.749,
104
  "step": 978
105
  }
106
  ],
107
+ "max_steps": 1630,
108
+ "num_train_epochs": 10,
109
+ "total_flos": 1.826859574943117e+16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
checkpoint-978/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50dde5eeea306f54118173d342686475ad9209b6c2cac103f7b114d5f582dc36
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd48e93c542d5f8f840918341d20dc98e6dbd60ec7052cf9f5610075d1655eaf
3
  size 3119
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8ec99e6ce268c28a5ab518a36acce6ef6019d14ec92ba4b6dfe673399644daf
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0655cfc868a392ac2b0850468debd2161b60753b0a1dc8c0e61845d86a8b31a
3
  size 1340743917