Kotiks commited on
Commit
4388855
1 Parent(s): 4e7cf59

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +30 -30
all_results.json CHANGED
@@ -5,9 +5,9 @@
5
  "eval_runtime": 0.2994,
6
  "eval_samples_per_second": 73.482,
7
  "eval_steps_per_second": 6.68,
8
- "total_flos": 2.348890959163392e+16,
9
- "train_loss": 0.2989959716796875,
10
- "train_runtime": 40.2987,
11
- "train_samples_per_second": 23.45,
12
- "train_steps_per_second": 0.372
13
  }
 
5
  "eval_runtime": 0.2994,
6
  "eval_samples_per_second": 73.482,
7
  "eval_steps_per_second": 6.68,
8
+ "total_flos": 7.322993018578944e+16,
9
+ "train_loss": 0.5733050982157389,
10
+ "train_runtime": 83.7822,
11
+ "train_samples_per_second": 11.279,
12
+ "train_steps_per_second": 0.179
13
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "total_flos": 2.348890959163392e+16,
4
- "train_loss": 0.2989959716796875,
5
- "train_runtime": 40.2987,
6
- "train_samples_per_second": 23.45,
7
- "train_steps_per_second": 0.372
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "total_flos": 7.322993018578944e+16,
4
+ "train_loss": 0.5733050982157389,
5
+ "train_runtime": 83.7822,
6
+ "train_samples_per_second": 11.279,
7
+ "train_steps_per_second": 0.179
8
  }
trainer_state.json CHANGED
@@ -10,70 +10,70 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5454545454545454,
14
- "eval_loss": 1.666408658027649,
15
- "eval_runtime": 0.2367,
16
- "eval_samples_per_second": 92.955,
17
- "eval_steps_per_second": 8.45,
18
  "step": 3
19
  },
20
  {
21
  "epoch": 2.0,
22
  "eval_accuracy": 0.7272727272727273,
23
- "eval_loss": 0.7899118065834045,
24
- "eval_runtime": 0.3107,
25
- "eval_samples_per_second": 70.812,
26
- "eval_steps_per_second": 6.437,
27
  "step": 6
28
  },
29
  {
30
  "epoch": 3.0,
31
  "eval_accuracy": 0.7272727272727273,
32
- "eval_loss": 0.770358145236969,
33
- "eval_runtime": 0.2398,
34
- "eval_samples_per_second": 91.755,
35
- "eval_steps_per_second": 8.341,
36
  "step": 9
37
  },
38
  {
39
  "epoch": 3.33,
40
  "learning_rate": 1.923076923076923e-05,
41
- "loss": 0.3108,
42
  "step": 10
43
  },
44
  {
45
  "epoch": 4.0,
46
- "eval_accuracy": 0.7272727272727273,
47
- "eval_loss": 0.6269326210021973,
48
- "eval_runtime": 0.2432,
49
- "eval_samples_per_second": 90.452,
50
- "eval_steps_per_second": 8.223,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 5.0,
55
- "eval_accuracy": 0.6818181818181818,
56
- "eval_loss": 0.644472062587738,
57
- "eval_runtime": 0.2431,
58
- "eval_samples_per_second": 90.505,
59
- "eval_steps_per_second": 8.228,
60
  "step": 15
61
  },
62
  {
63
  "epoch": 5.0,
64
  "step": 15,
65
- "total_flos": 2.348890959163392e+16,
66
- "train_loss": 0.2989959716796875,
67
- "train_runtime": 40.2987,
68
- "train_samples_per_second": 23.45,
69
- "train_steps_per_second": 0.372
70
  }
71
  ],
72
  "logging_steps": 10,
73
  "max_steps": 15,
74
  "num_train_epochs": 5,
75
  "save_steps": 500,
76
- "total_flos": 2.348890959163392e+16,
77
  "trial_name": null,
78
  "trial_params": null
79
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6818181818181818,
14
+ "eval_loss": 0.62430739402771,
15
+ "eval_runtime": 0.4086,
16
+ "eval_samples_per_second": 53.848,
17
+ "eval_steps_per_second": 4.895,
18
  "step": 3
19
  },
20
  {
21
  "epoch": 2.0,
22
  "eval_accuracy": 0.7272727272727273,
23
+ "eval_loss": 0.5459609031677246,
24
+ "eval_runtime": 0.4897,
25
+ "eval_samples_per_second": 44.927,
26
+ "eval_steps_per_second": 4.084,
27
  "step": 6
28
  },
29
  {
30
  "epoch": 3.0,
31
  "eval_accuracy": 0.7272727272727273,
32
+ "eval_loss": 0.5539888143539429,
33
+ "eval_runtime": 0.3962,
34
+ "eval_samples_per_second": 55.526,
35
+ "eval_steps_per_second": 5.048,
36
  "step": 9
37
  },
38
  {
39
  "epoch": 3.33,
40
  "learning_rate": 1.923076923076923e-05,
41
+ "loss": 0.6502,
42
  "step": 10
43
  },
44
  {
45
  "epoch": 4.0,
46
+ "eval_accuracy": 0.6818181818181818,
47
+ "eval_loss": 0.5746610164642334,
48
+ "eval_runtime": 0.3881,
49
+ "eval_samples_per_second": 56.691,
50
+ "eval_steps_per_second": 5.154,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 5.0,
55
+ "eval_accuracy": 0.5909090909090909,
56
+ "eval_loss": 0.5886295437812805,
57
+ "eval_runtime": 0.391,
58
+ "eval_samples_per_second": 56.271,
59
+ "eval_steps_per_second": 5.116,
60
  "step": 15
61
  },
62
  {
63
  "epoch": 5.0,
64
  "step": 15,
65
+ "total_flos": 7.322993018578944e+16,
66
+ "train_loss": 0.5733050982157389,
67
+ "train_runtime": 83.7822,
68
+ "train_samples_per_second": 11.279,
69
+ "train_steps_per_second": 0.179
70
  }
71
  ],
72
  "logging_steps": 10,
73
  "max_steps": 15,
74
  "num_train_epochs": 5,
75
  "save_steps": 500,
76
+ "total_flos": 7.322993018578944e+16,
77
  "trial_name": null,
78
  "trial_params": null
79
  }