embunna commited on
Commit
15d1d40
·
verified ·
1 Parent(s): d65f589

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 2.6666666666666665,
3
- "eval_accuracy": 0.46875,
4
  "eval_loss": NaN,
5
- "eval_runtime": 0.3392,
6
- "eval_samples_per_second": 283.038,
7
- "eval_steps_per_second": 8.845,
8
- "total_flos": 2.326039595266867e+16,
9
  "train_loss": 0.0,
10
- "train_runtime": 17.8085,
11
- "train_samples_per_second": 145.549,
12
- "train_steps_per_second": 1.011
13
  }
 
1
  {
2
+ "epoch": 2.9411764705882355,
3
+ "eval_accuracy": 0.30833333333333335,
4
  "eval_loss": NaN,
5
+ "eval_runtime": 1.611,
6
+ "eval_samples_per_second": 223.468,
7
+ "eval_steps_per_second": 7.449,
8
+ "total_flos": 9.643372488710554e+16,
9
  "train_loss": 0.0,
10
+ "train_runtime": 96.2766,
11
+ "train_samples_per_second": 100.959,
12
+ "train_steps_per_second": 0.779
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.6666666666666665,
3
- "eval_accuracy": 0.46875,
4
  "eval_loss": NaN,
5
- "eval_runtime": 0.3392,
6
- "eval_samples_per_second": 283.038,
7
- "eval_steps_per_second": 8.845
8
  }
 
1
  {
2
+ "epoch": 2.9411764705882355,
3
+ "eval_accuracy": 0.30833333333333335,
4
  "eval_loss": NaN,
5
+ "eval_runtime": 1.611,
6
+ "eval_samples_per_second": 223.468,
7
+ "eval_steps_per_second": 7.449
8
  }
runs/Jun06_04-32-49_46a18104521a/events.out.tfevents.1717648477.46a18104521a.1419.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29da9b2c62281e71e206cffc2d09b5e58910cc1885112eb069bd399382d19664
3
+ size 405
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.6666666666666665,
3
- "total_flos": 2.326039595266867e+16,
4
  "train_loss": 0.0,
5
- "train_runtime": 17.8085,
6
- "train_samples_per_second": 145.549,
7
- "train_steps_per_second": 1.011
8
  }
 
1
  {
2
+ "epoch": 2.9411764705882355,
3
+ "total_flos": 9.643372488710554e+16,
4
  "train_loss": 0.0,
5
+ "train_runtime": 96.2766,
6
+ "train_samples_per_second": 100.959,
7
+ "train_steps_per_second": 0.779
8
  }
trainer_state.json CHANGED
@@ -1,59 +1,101 @@
1
  {
2
- "best_metric": 0.46875,
3
- "best_model_checkpoint": "resnet-18-please-work/checkpoint-6",
4
- "epoch": 2.6666666666666665,
5
  "eval_steps": 500,
6
- "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.8888888888888888,
13
- "eval_accuracy": 0.46875,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "eval_loss": NaN,
15
- "eval_runtime": 0.5279,
16
- "eval_samples_per_second": 181.87,
17
- "eval_steps_per_second": 5.683,
18
- "step": 6
19
  },
20
  {
21
- "epoch": 1.4814814814814814,
22
  "grad_norm": NaN,
23
- "learning_rate": 2.5e-05,
24
  "loss": 0.0,
25
- "step": 10
 
 
 
 
 
 
 
26
  },
27
  {
28
- "epoch": 1.925925925925926,
29
- "eval_accuracy": 0.46875,
 
 
 
 
 
 
 
30
  "eval_loss": NaN,
31
- "eval_runtime": 0.3091,
32
- "eval_samples_per_second": 310.537,
33
- "eval_steps_per_second": 9.704,
34
- "step": 13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  },
36
  {
37
- "epoch": 2.6666666666666665,
38
- "eval_accuracy": 0.46875,
39
  "eval_loss": NaN,
40
- "eval_runtime": 0.2963,
41
- "eval_samples_per_second": 323.973,
42
- "eval_steps_per_second": 10.124,
43
- "step": 18
44
  },
45
  {
46
- "epoch": 2.6666666666666665,
47
- "step": 18,
48
- "total_flos": 2.326039595266867e+16,
49
  "train_loss": 0.0,
50
- "train_runtime": 17.8085,
51
- "train_samples_per_second": 145.549,
52
- "train_steps_per_second": 1.011
53
  }
54
  ],
55
  "logging_steps": 10,
56
- "max_steps": 18,
57
  "num_input_tokens_seen": 0,
58
  "num_train_epochs": 3,
59
  "save_steps": 500,
@@ -69,7 +111,7 @@
69
  "attributes": {}
70
  }
71
  },
72
- "total_flos": 2.326039595266867e+16,
73
  "train_batch_size": 32,
74
  "trial_name": null,
75
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.30833333333333335,
3
+ "best_model_checkpoint": "resnet-18-please-work/checkpoint-25",
4
+ "epoch": 2.9411764705882355,
5
  "eval_steps": 500,
6
+ "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.39215686274509803,
13
+ "grad_norm": NaN,
14
+ "learning_rate": 4.850746268656717e-05,
15
+ "loss": 0.0,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.7843137254901961,
20
+ "grad_norm": NaN,
21
+ "learning_rate": 4.104477611940299e-05,
22
+ "loss": 0.0,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.9803921568627451,
27
+ "eval_accuracy": 0.30833333333333335,
28
  "eval_loss": NaN,
29
+ "eval_runtime": 2.1345,
30
+ "eval_samples_per_second": 168.656,
31
+ "eval_steps_per_second": 5.622,
32
+ "step": 25
33
  },
34
  {
35
+ "epoch": 1.1764705882352942,
36
  "grad_norm": NaN,
37
+ "learning_rate": 3.358208955223881e-05,
38
  "loss": 0.0,
39
+ "step": 30
40
+ },
41
+ {
42
+ "epoch": 1.5686274509803921,
43
+ "grad_norm": NaN,
44
+ "learning_rate": 2.6119402985074626e-05,
45
+ "loss": 0.0,
46
+ "step": 40
47
  },
48
  {
49
+ "epoch": 1.9607843137254903,
50
+ "grad_norm": NaN,
51
+ "learning_rate": 1.865671641791045e-05,
52
+ "loss": 0.0,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "eval_accuracy": 0.30833333333333335,
58
  "eval_loss": NaN,
59
+ "eval_runtime": 1.8559,
60
+ "eval_samples_per_second": 193.978,
61
+ "eval_steps_per_second": 6.466,
62
+ "step": 51
63
+ },
64
+ {
65
+ "epoch": 2.3529411764705883,
66
+ "grad_norm": NaN,
67
+ "learning_rate": 1.119402985074627e-05,
68
+ "loss": 0.0,
69
+ "step": 60
70
+ },
71
+ {
72
+ "epoch": 2.7450980392156863,
73
+ "grad_norm": NaN,
74
+ "learning_rate": 3.7313432835820893e-06,
75
+ "loss": 0.0,
76
+ "step": 70
77
  },
78
  {
79
+ "epoch": 2.9411764705882355,
80
+ "eval_accuracy": 0.30833333333333335,
81
  "eval_loss": NaN,
82
+ "eval_runtime": 1.403,
83
+ "eval_samples_per_second": 256.584,
84
+ "eval_steps_per_second": 8.553,
85
+ "step": 75
86
  },
87
  {
88
+ "epoch": 2.9411764705882355,
89
+ "step": 75,
90
+ "total_flos": 9.643372488710554e+16,
91
  "train_loss": 0.0,
92
+ "train_runtime": 96.2766,
93
+ "train_samples_per_second": 100.959,
94
+ "train_steps_per_second": 0.779
95
  }
96
  ],
97
  "logging_steps": 10,
98
+ "max_steps": 75,
99
  "num_input_tokens_seen": 0,
100
  "num_train_epochs": 3,
101
  "save_steps": 500,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 9.643372488710554e+16,
115
  "train_batch_size": 32,
116
  "trial_name": null,
117
  "trial_params": null