MahmoudIbrahim commited on
Commit
76fd95e
1 Parent(s): d82a0b1

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +84 -21
trainer_state.json CHANGED
@@ -1,58 +1,121 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.004966476285075739,
5
  "eval_steps": 500,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0008277460475126231,
13
- "grad_norm": 0.666200578212738,
14
- "learning_rate": 0.00018545454545454545,
15
  "loss": 2.806,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.0016554920950252463,
20
- "grad_norm": 0.7228823304176331,
21
- "learning_rate": 0.0001490909090909091,
22
- "loss": 2.4029,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.0024832381425378696,
27
- "grad_norm": 0.6035403609275818,
28
- "learning_rate": 0.00011272727272727272,
29
- "loss": 2.4103,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.0033109841900504926,
34
- "grad_norm": 0.6814154386520386,
35
- "learning_rate": 7.636363636363637e-05,
36
- "loss": 2.367,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.004138730237563116,
41
- "grad_norm": 0.5607640147209167,
42
- "learning_rate": 4e-05,
43
- "loss": 2.1349,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.004966476285075739,
48
- "grad_norm": 0.6008804440498352,
49
- "learning_rate": 3.636363636363636e-06,
50
- "loss": 2.2581,
51
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
  ],
54
  "logging_steps": 10,
55
- "max_steps": 60,
56
  "num_input_tokens_seen": 0,
57
  "num_train_epochs": 1,
58
  "save_steps": 500,
@@ -68,7 +131,7 @@
68
  "attributes": {}
69
  }
70
  },
71
- "total_flos": 2.053680226486272e+16,
72
  "train_batch_size": 2,
73
  "trial_name": null,
74
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.012416190712689347,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0008277460475126231,
13
+ "grad_norm": 0.6720314025878906,
14
+ "learning_rate": 0.00019448275862068965,
15
  "loss": 2.806,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.0016554920950252463,
20
+ "grad_norm": 0.7282748818397522,
21
+ "learning_rate": 0.00018068965517241382,
22
+ "loss": 2.4015,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.0024832381425378696,
27
+ "grad_norm": 0.6224257349967957,
28
+ "learning_rate": 0.00016689655172413793,
29
+ "loss": 2.4062,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.0033109841900504926,
34
+ "grad_norm": 0.6465514898300171,
35
+ "learning_rate": 0.00015310344827586207,
36
+ "loss": 2.3612,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.004138730237563116,
41
+ "grad_norm": 0.5610107183456421,
42
+ "learning_rate": 0.0001393103448275862,
43
+ "loss": 2.1203,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.004966476285075739,
48
+ "grad_norm": 0.5969945192337036,
49
+ "learning_rate": 0.00012551724137931035,
50
+ "loss": 2.2384,
51
  "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.005794222332588362,
55
+ "grad_norm": 0.6113339066505432,
56
+ "learning_rate": 0.00011172413793103449,
57
+ "loss": 2.3128,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.006621968380100985,
62
+ "grad_norm": 0.8051493167877197,
63
+ "learning_rate": 9.793103448275862e-05,
64
+ "loss": 2.2082,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.007449714427613608,
69
+ "grad_norm": 0.6741610169410706,
70
+ "learning_rate": 8.413793103448277e-05,
71
+ "loss": 2.3156,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.008277460475126232,
76
+ "grad_norm": 0.5629040598869324,
77
+ "learning_rate": 7.03448275862069e-05,
78
+ "loss": 2.1989,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.009105206522638855,
83
+ "grad_norm": 0.7672610282897949,
84
+ "learning_rate": 5.6551724137931037e-05,
85
+ "loss": 2.1749,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.009932952570151478,
90
+ "grad_norm": 0.5322269201278687,
91
+ "learning_rate": 4.275862068965518e-05,
92
+ "loss": 2.171,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.010760698617664101,
97
+ "grad_norm": 0.7353241443634033,
98
+ "learning_rate": 2.8965517241379313e-05,
99
+ "loss": 2.1584,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.011588444665176724,
104
+ "grad_norm": 0.6774106025695801,
105
+ "learning_rate": 1.5172413793103448e-05,
106
+ "loss": 2.2079,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.012416190712689347,
111
+ "grad_norm": 0.5964234471321106,
112
+ "learning_rate": 1.3793103448275862e-06,
113
+ "loss": 2.1165,
114
+ "step": 150
115
  }
116
  ],
117
  "logging_steps": 10,
118
+ "max_steps": 150,
119
  "num_input_tokens_seen": 0,
120
  "num_train_epochs": 1,
121
  "save_steps": 500,
 
131
  "attributes": {}
132
  }
133
  },
134
+ "total_flos": 5.120470461579264e+16,
135
  "train_batch_size": 2,
136
  "trial_name": null,
137
  "trial_params": null