aliciiavs commited on
Commit
5a02696
1 Parent(s): 7a7b09a

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. train_results.json +6 -6
  3. trainer_state.json +19 -67
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.93,
3
- "total_flos": 5.498979435916001e+17,
4
- "train_loss": 1.532381147146225,
5
- "train_runtime": 193.437,
6
- "train_samples_per_second": 37.221,
7
- "train_steps_per_second": 0.165
8
  }
 
1
  {
2
+ "epoch": 0.98,
3
+ "total_flos": 1.3886937921591706e+17,
4
+ "train_loss": 1.3266394138336182,
5
+ "train_runtime": 44.5183,
6
+ "train_samples_per_second": 40.433,
7
+ "train_steps_per_second": 0.18
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.93,
3
- "total_flos": 5.498979435916001e+17,
4
- "train_loss": 1.532381147146225,
5
- "train_runtime": 193.437,
6
- "train_samples_per_second": 37.221,
7
- "train_steps_per_second": 0.165
8
  }
 
1
  {
2
+ "epoch": 0.98,
3
+ "total_flos": 1.3886937921591706e+17,
4
+ "train_loss": 1.3266394138336182,
5
+ "train_runtime": 44.5183,
6
+ "train_samples_per_second": 40.433,
7
+ "train_steps_per_second": 0.18
8
  }
trainer_state.json CHANGED
@@ -1,86 +1,38 @@
1
  {
2
- "best_metric": 0.54,
3
- "best_model_checkpoint": "alea/checkpoint-32",
4
- "epoch": 3.9298245614035086,
5
  "eval_steps": 500,
6
- "global_step": 32,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.98,
13
- "eval_accuracy": 0.295,
14
- "eval_loss": 1.5737404823303223,
15
- "eval_runtime": 2.351,
16
- "eval_samples_per_second": 85.069,
17
- "eval_steps_per_second": 2.977,
18
  "step": 8
19
  },
20
  {
21
- "epoch": 1.23,
22
- "grad_norm": 2.310837745666504,
23
- "learning_rate": 7.857142857142858e-06,
24
- "loss": 1.7073,
25
- "step": 10
26
- },
27
- {
28
- "epoch": 1.96,
29
- "eval_accuracy": 0.315,
30
- "eval_loss": 1.5100140571594238,
31
- "eval_runtime": 2.2724,
32
- "eval_samples_per_second": 88.014,
33
- "eval_steps_per_second": 3.081,
34
- "step": 16
35
- },
36
- {
37
- "epoch": 2.46,
38
- "grad_norm": 2.365218162536621,
39
- "learning_rate": 4.2857142857142855e-06,
40
- "loss": 1.5052,
41
- "step": 20
42
- },
43
- {
44
- "epoch": 2.95,
45
- "eval_accuracy": 0.48,
46
- "eval_loss": 1.4322025775909424,
47
- "eval_runtime": 2.2766,
48
- "eval_samples_per_second": 87.851,
49
- "eval_steps_per_second": 3.075,
50
- "step": 24
51
- },
52
- {
53
- "epoch": 3.68,
54
- "grad_norm": 2.047398328781128,
55
- "learning_rate": 7.142857142857143e-07,
56
- "loss": 1.4107,
57
- "step": 30
58
- },
59
- {
60
- "epoch": 3.93,
61
- "eval_accuracy": 0.54,
62
- "eval_loss": 1.3986802101135254,
63
- "eval_runtime": 2.2742,
64
- "eval_samples_per_second": 87.942,
65
- "eval_steps_per_second": 3.078,
66
- "step": 32
67
- },
68
- {
69
- "epoch": 3.93,
70
- "step": 32,
71
- "total_flos": 5.498979435916001e+17,
72
- "train_loss": 1.532381147146225,
73
- "train_runtime": 193.437,
74
- "train_samples_per_second": 37.221,
75
- "train_steps_per_second": 0.165
76
  }
77
  ],
78
  "logging_steps": 10,
79
- "max_steps": 32,
80
  "num_input_tokens_seen": 0,
81
- "num_train_epochs": 4,
82
  "save_steps": 500,
83
- "total_flos": 5.498979435916001e+17,
84
  "train_batch_size": 32,
85
  "trial_name": null,
86
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.645,
3
+ "best_model_checkpoint": "alea/checkpoint-8",
4
+ "epoch": 0.9824561403508771,
5
  "eval_steps": 500,
6
+ "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.98,
13
+ "eval_accuracy": 0.645,
14
+ "eval_loss": 1.2876272201538086,
15
+ "eval_runtime": 2.2928,
16
+ "eval_samples_per_second": 87.229,
17
+ "eval_steps_per_second": 3.053,
18
  "step": 8
19
  },
20
  {
21
+ "epoch": 0.98,
22
+ "step": 8,
23
+ "total_flos": 1.3886937921591706e+17,
24
+ "train_loss": 1.3266394138336182,
25
+ "train_runtime": 44.5183,
26
+ "train_samples_per_second": 40.433,
27
+ "train_steps_per_second": 0.18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  ],
30
  "logging_steps": 10,
31
+ "max_steps": 8,
32
  "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 1,
34
  "save_steps": 500,
35
+ "total_flos": 1.3886937921591706e+17,
36
  "train_batch_size": 32,
37
  "trial_name": null,
38
  "trial_params": null