Jeremiah Zhou commited on
Commit
4625b60
1 Parent(s): 81aef98

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.7581227436823105,
4
- "eval_loss": 0.6365415453910828,
5
- "eval_runtime": 0.5623,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 492.657,
8
- "eval_steps_per_second": 62.249,
9
- "train_loss": 0.42912168074876833,
10
- "train_runtime": 173.8183,
11
  "train_samples": 2490,
12
- "train_samples_per_second": 143.253,
13
- "train_steps_per_second": 8.975
14
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.7978339350180506,
4
+ "eval_loss": 0.5446364283561707,
5
+ "eval_runtime": 0.6251,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 443.162,
8
+ "eval_steps_per_second": 55.995,
9
+ "train_loss": 0.31504388589125415,
10
+ "train_runtime": 295.0697,
11
  "train_samples": 2490,
12
+ "train_samples_per_second": 84.387,
13
+ "train_steps_per_second": 5.287
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.7581227436823105,
4
- "eval_loss": 0.6365415453910828,
5
- "eval_runtime": 0.5623,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 492.657,
8
- "eval_steps_per_second": 62.249
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.7978339350180506,
4
+ "eval_loss": 0.5446364283561707,
5
+ "eval_runtime": 0.6251,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 443.162,
8
+ "eval_steps_per_second": 55.995
9
  }
runs/Jun20_21-46-24_pikachu/events.out.tfevents.1655733724.pikachu.2660395.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dce7652753a7417a338897d10bc5af62e211396cdbc60c3fabdcfc6d9e616208
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 8.0,
3
- "train_loss": 0.42912168074876833,
4
- "train_runtime": 173.8183,
5
  "train_samples": 2490,
6
- "train_samples_per_second": 143.253,
7
- "train_steps_per_second": 8.975
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.31504388589125415,
4
+ "train_runtime": 295.0697,
5
  "train_samples": 2490,
6
+ "train_samples_per_second": 84.387,
7
+ "train_steps_per_second": 5.287
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.7581227436823105,
3
  "best_model_checkpoint": "./fine-tune/roberta-base/rte/checkpoint-780",
4
- "epoch": 8.0,
5
- "global_step": 1248,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,100 +10,124 @@
10
  {
11
  "epoch": 1.0,
12
  "eval_accuracy": 0.4729241877256318,
13
- "eval_loss": 0.7072076797485352,
14
- "eval_runtime": 0.4902,
15
- "eval_samples_per_second": 565.102,
16
- "eval_steps_per_second": 71.403,
17
  "step": 156
18
  },
19
  {
20
  "epoch": 2.0,
21
- "eval_accuracy": 0.5270758122743683,
22
- "eval_loss": 0.6958328485488892,
23
- "eval_runtime": 0.689,
24
- "eval_samples_per_second": 402.039,
25
- "eval_steps_per_second": 50.799,
26
  "step": 312
27
  },
28
  {
29
  "epoch": 3.0,
30
- "eval_accuracy": 0.6462093862815884,
31
- "eval_loss": 0.6193148493766785,
32
- "eval_runtime": 0.5121,
33
- "eval_samples_per_second": 540.927,
34
- "eval_steps_per_second": 68.348,
35
  "step": 468
36
  },
37
  {
38
  "epoch": 3.21,
39
- "learning_rate": 1.446111869031378e-05,
40
- "loss": 0.6759,
41
  "step": 500
42
  },
43
  {
44
  "epoch": 4.0,
45
- "eval_accuracy": 0.7075812274368231,
46
- "eval_loss": 0.6046352386474609,
47
- "eval_runtime": 0.5274,
48
- "eval_samples_per_second": 525.205,
49
- "eval_steps_per_second": 66.362,
50
  "step": 624
51
  },
52
  {
53
  "epoch": 5.0,
54
- "eval_accuracy": 0.7581227436823105,
55
- "eval_loss": 0.6365415453910828,
56
- "eval_runtime": 0.681,
57
- "eval_samples_per_second": 406.732,
58
- "eval_steps_per_second": 51.392,
59
  "step": 780
60
  },
61
  {
62
  "epoch": 6.0,
63
  "eval_accuracy": 0.7545126353790613,
64
- "eval_loss": 0.897487223148346,
65
- "eval_runtime": 0.6584,
66
- "eval_samples_per_second": 420.704,
67
- "eval_steps_per_second": 53.157,
68
  "step": 936
69
  },
70
  {
71
  "epoch": 6.41,
72
- "learning_rate": 7.639836289222374e-06,
73
- "loss": 0.3194,
74
  "step": 1000
75
  },
76
  {
77
  "epoch": 7.0,
78
- "eval_accuracy": 0.7581227436823105,
79
- "eval_loss": 1.2031357288360596,
80
- "eval_runtime": 0.5817,
81
- "eval_samples_per_second": 476.173,
82
- "eval_steps_per_second": 60.166,
83
  "step": 1092
84
  },
85
  {
86
  "epoch": 8.0,
87
- "eval_accuracy": 0.7581227436823105,
88
- "eval_loss": 1.2942094802856445,
89
- "eval_runtime": 0.6818,
90
- "eval_samples_per_second": 406.254,
91
- "eval_steps_per_second": 51.332,
92
  "step": 1248
93
  },
94
  {
95
- "epoch": 8.0,
96
- "step": 1248,
97
- "total_flos": 1310293055692800.0,
98
- "train_loss": 0.42912168074876833,
99
- "train_runtime": 173.8183,
100
- "train_samples_per_second": 143.253,
101
- "train_steps_per_second": 8.975
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  }
103
  ],
104
  "max_steps": 1560,
105
  "num_train_epochs": 10,
106
- "total_flos": 1310293055692800.0,
107
  "trial_name": null,
108
  "trial_params": null
109
  }
 
1
  {
2
+ "best_metric": 0.7978339350180506,
3
  "best_model_checkpoint": "./fine-tune/roberta-base/rte/checkpoint-780",
4
+ "epoch": 10.0,
5
+ "global_step": 1560,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "eval_accuracy": 0.4729241877256318,
13
+ "eval_loss": 0.7023295164108276,
14
+ "eval_runtime": 0.5782,
15
+ "eval_samples_per_second": 479.05,
16
+ "eval_steps_per_second": 60.53,
17
  "step": 156
18
  },
19
  {
20
  "epoch": 2.0,
21
+ "eval_accuracy": 0.6895306859205776,
22
+ "eval_loss": 0.6356249451637268,
23
+ "eval_runtime": 0.6365,
24
+ "eval_samples_per_second": 435.208,
25
+ "eval_steps_per_second": 54.99,
26
  "step": 312
27
  },
28
  {
29
  "epoch": 3.0,
30
+ "eval_accuracy": 0.7617328519855595,
31
+ "eval_loss": 0.5177018046379089,
32
+ "eval_runtime": 0.6111,
33
+ "eval_samples_per_second": 453.318,
34
+ "eval_steps_per_second": 57.278,
35
  "step": 468
36
  },
37
  {
38
  "epoch": 3.21,
39
+ "learning_rate": 7.23055934515689e-06,
40
+ "loss": 0.6131,
41
  "step": 500
42
  },
43
  {
44
  "epoch": 4.0,
45
+ "eval_accuracy": 0.7472924187725631,
46
+ "eval_loss": 0.6237531304359436,
47
+ "eval_runtime": 0.6069,
48
+ "eval_samples_per_second": 456.426,
49
+ "eval_steps_per_second": 57.671,
50
  "step": 624
51
  },
52
  {
53
  "epoch": 5.0,
54
+ "eval_accuracy": 0.7978339350180506,
55
+ "eval_loss": 0.5446364283561707,
56
+ "eval_runtime": 0.6019,
57
+ "eval_samples_per_second": 460.195,
58
+ "eval_steps_per_second": 58.147,
59
  "step": 780
60
  },
61
  {
62
  "epoch": 6.0,
63
  "eval_accuracy": 0.7545126353790613,
64
+ "eval_loss": 0.9697452783584595,
65
+ "eval_runtime": 0.6225,
66
+ "eval_samples_per_second": 444.97,
67
+ "eval_steps_per_second": 56.224,
68
  "step": 936
69
  },
70
  {
71
  "epoch": 6.41,
72
+ "learning_rate": 3.819918144611187e-06,
73
+ "loss": 0.2528,
74
  "step": 1000
75
  },
76
  {
77
  "epoch": 7.0,
78
+ "eval_accuracy": 0.7689530685920578,
79
+ "eval_loss": 1.100365161895752,
80
+ "eval_runtime": 0.6174,
81
+ "eval_samples_per_second": 448.691,
82
+ "eval_steps_per_second": 56.694,
83
  "step": 1092
84
  },
85
  {
86
  "epoch": 8.0,
87
+ "eval_accuracy": 0.7725631768953068,
88
+ "eval_loss": 1.193679928779602,
89
+ "eval_runtime": 0.6461,
90
+ "eval_samples_per_second": 428.72,
91
+ "eval_steps_per_second": 54.17,
92
  "step": 1248
93
  },
94
  {
95
+ "epoch": 9.0,
96
+ "eval_accuracy": 0.7725631768953068,
97
+ "eval_loss": 1.331286072731018,
98
+ "eval_runtime": 0.5642,
99
+ "eval_samples_per_second": 490.964,
100
+ "eval_steps_per_second": 62.035,
101
+ "step": 1404
102
+ },
103
+ {
104
+ "epoch": 9.62,
105
+ "learning_rate": 4.092769440654844e-07,
106
+ "loss": 0.1073,
107
+ "step": 1500
108
+ },
109
+ {
110
+ "epoch": 10.0,
111
+ "eval_accuracy": 0.7725631768953068,
112
+ "eval_loss": 1.353358268737793,
113
+ "eval_runtime": 0.6152,
114
+ "eval_samples_per_second": 450.281,
115
+ "eval_steps_per_second": 56.895,
116
+ "step": 1560
117
+ },
118
+ {
119
+ "epoch": 10.0,
120
+ "step": 1560,
121
+ "total_flos": 1637866319616000.0,
122
+ "train_loss": 0.31504388589125415,
123
+ "train_runtime": 295.0697,
124
+ "train_samples_per_second": 84.387,
125
+ "train_steps_per_second": 5.287
126
  }
127
  ],
128
  "max_steps": 1560,
129
  "num_train_epochs": 10,
130
+ "total_flos": 1637866319616000.0,
131
  "trial_name": null,
132
  "trial_params": null
133
  }