aliciiavs commited on
Commit
7bcd68c
1 Parent(s): 2d3173c

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
- "epoch": 2.95,
3
- "total_flos": 4.128884221330391e+17,
4
- "train_loss": 1.5558896859486897,
5
- "train_runtime": 188.9978,
6
- "train_samples_per_second": 28.572,
7
- "train_steps_per_second": 0.127
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 3.93,
3
+ "eval_accuracy": 0.865,
4
+ "eval_loss": 1.028597354888916,
5
+ "eval_runtime": 2.3005,
6
+ "eval_samples_per_second": 86.936,
7
+ "eval_steps_per_second": 3.043,
8
+ "total_flos": 5.498979435916001e+17,
9
+ "train_loss": 1.2344711497426033,
10
+ "train_runtime": 175.6186,
11
+ "train_samples_per_second": 40.998,
12
+ "train_steps_per_second": 0.182
13
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.93,
3
+ "eval_accuracy": 0.865,
4
+ "eval_loss": 1.028597354888916,
5
+ "eval_runtime": 2.3005,
6
+ "eval_samples_per_second": 86.936,
7
+ "eval_steps_per_second": 3.043
8
+ }
runs/Apr09_20-24-58_4fe294af0ba5/events.out.tfevents.1712694513.4fe294af0ba5.34.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc9878db27b97fef5d5e2d1eb329937640a4248096bc335340b7b3dc5807005
3
+ size 1039
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.95,
3
- "total_flos": 4.128884221330391e+17,
4
- "train_loss": 1.5558896859486897,
5
- "train_runtime": 188.9978,
6
- "train_samples_per_second": 28.572,
7
- "train_steps_per_second": 0.127
8
  }
 
1
  {
2
+ "epoch": 3.93,
3
+ "total_flos": 5.498979435916001e+17,
4
+ "train_loss": 1.2344711497426033,
5
+ "train_runtime": 175.6186,
6
+ "train_samples_per_second": 40.998,
7
+ "train_steps_per_second": 0.182
8
  }
trainer_state.json CHANGED
@@ -1,70 +1,86 @@
1
  {
2
- "best_metric": 0.435,
3
- "best_model_checkpoint": "final-run/checkpoint-24",
4
- "epoch": 2.9473684210526314,
5
  "eval_steps": 500,
6
- "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.98,
13
- "eval_accuracy": 0.305,
14
- "eval_loss": 1.5488276481628418,
15
- "eval_runtime": 2.2778,
16
- "eval_samples_per_second": 87.805,
17
- "eval_steps_per_second": 3.073,
18
  "step": 8
19
  },
20
  {
21
  "epoch": 1.23,
22
- "grad_norm": 1.9612401723861694,
23
- "learning_rate": 6.666666666666667e-06,
24
- "loss": 1.6543,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 1.96,
29
- "eval_accuracy": 0.31,
30
- "eval_loss": 1.4750585556030273,
31
- "eval_runtime": 2.273,
32
- "eval_samples_per_second": 87.989,
33
- "eval_steps_per_second": 3.08,
34
  "step": 16
35
  },
36
  {
37
  "epoch": 2.46,
38
- "grad_norm": 2.8159549236297607,
39
- "learning_rate": 1.904761904761905e-06,
40
- "loss": 1.5,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 2.95,
45
- "eval_accuracy": 0.435,
46
- "eval_loss": 1.436577320098877,
47
- "eval_runtime": 2.2765,
48
- "eval_samples_per_second": 87.855,
49
- "eval_steps_per_second": 3.075,
50
  "step": 24
51
  },
52
  {
53
- "epoch": 2.95,
54
- "step": 24,
55
- "total_flos": 4.128884221330391e+17,
56
- "train_loss": 1.5558896859486897,
57
- "train_runtime": 188.9978,
58
- "train_samples_per_second": 28.572,
59
- "train_steps_per_second": 0.127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  }
61
  ],
62
  "logging_steps": 10,
63
- "max_steps": 24,
64
  "num_input_tokens_seen": 0,
65
- "num_train_epochs": 3,
66
  "save_steps": 500,
67
- "total_flos": 4.128884221330391e+17,
68
  "train_batch_size": 32,
69
  "trial_name": null,
70
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.865,
3
+ "best_model_checkpoint": "final-run/checkpoint-32",
4
+ "epoch": 3.9298245614035086,
5
  "eval_steps": 500,
6
+ "global_step": 32,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.98,
13
+ "eval_accuracy": 0.71,
14
+ "eval_loss": 1.295009970664978,
15
+ "eval_runtime": 2.2823,
16
+ "eval_samples_per_second": 87.632,
17
+ "eval_steps_per_second": 3.067,
18
  "step": 8
19
  },
20
  {
21
  "epoch": 1.23,
22
+ "grad_norm": 2.336587429046631,
23
+ "learning_rate": 7.857142857142858e-06,
24
+ "loss": 1.383,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 1.96,
29
+ "eval_accuracy": 0.815,
30
+ "eval_loss": 1.1531453132629395,
31
+ "eval_runtime": 2.2842,
32
+ "eval_samples_per_second": 87.558,
33
+ "eval_steps_per_second": 3.065,
34
  "step": 16
35
  },
36
  {
37
  "epoch": 2.46,
38
+ "grad_norm": 2.422740936279297,
39
+ "learning_rate": 4.2857142857142855e-06,
40
+ "loss": 1.2243,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 2.95,
45
+ "eval_accuracy": 0.86,
46
+ "eval_loss": 1.0636448860168457,
47
+ "eval_runtime": 2.3294,
48
+ "eval_samples_per_second": 85.86,
49
+ "eval_steps_per_second": 3.005,
50
  "step": 24
51
  },
52
  {
53
+ "epoch": 3.68,
54
+ "grad_norm": 2.130319356918335,
55
+ "learning_rate": 7.142857142857143e-07,
56
+ "loss": 1.1189,
57
+ "step": 30
58
+ },
59
+ {
60
+ "epoch": 3.93,
61
+ "eval_accuracy": 0.865,
62
+ "eval_loss": 1.028597354888916,
63
+ "eval_runtime": 2.2993,
64
+ "eval_samples_per_second": 86.984,
65
+ "eval_steps_per_second": 3.044,
66
+ "step": 32
67
+ },
68
+ {
69
+ "epoch": 3.93,
70
+ "step": 32,
71
+ "total_flos": 5.498979435916001e+17,
72
+ "train_loss": 1.2344711497426033,
73
+ "train_runtime": 175.6186,
74
+ "train_samples_per_second": 40.998,
75
+ "train_steps_per_second": 0.182
76
  }
77
  ],
78
  "logging_steps": 10,
79
+ "max_steps": 32,
80
  "num_input_tokens_seen": 0,
81
+ "num_train_epochs": 4,
82
  "save_steps": 500,
83
+ "total_flos": 5.498979435916001e+17,
84
  "train_batch_size": 32,
85
  "trial_name": null,
86
  "trial_params": null