samira1234 commited on
Commit
dbddc7b
1 Parent(s): b0102f0

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +140 -0
trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.88786658999425,
5
+ "eval_steps": 500,
6
+ "global_step": 8500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2875215641173088,
13
+ "grad_norm": 423.2573547363281,
14
+ "learning_rate": 4.712478435882691e-05,
15
+ "loss": 17.6035,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.5750431282346176,
20
+ "grad_norm": 9.149832725524902,
21
+ "learning_rate": 4.4249568717653824e-05,
22
+ "loss": 2.5022,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.8625646923519263,
27
+ "grad_norm": 3.246171236038208,
28
+ "learning_rate": 4.137435307648074e-05,
29
+ "loss": 1.3833,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 1.1500862564692351,
34
+ "grad_norm": 5.592076778411865,
35
+ "learning_rate": 3.8499137435307647e-05,
36
+ "loss": 1.0705,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 1.437607820586544,
41
+ "grad_norm": 12.64777660369873,
42
+ "learning_rate": 3.562392179413456e-05,
43
+ "loss": 0.9577,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.7251293847038527,
48
+ "grad_norm": 0.7939251065254211,
49
+ "learning_rate": 3.2748706152961475e-05,
50
+ "loss": 0.919,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 2.0126509488211615,
55
+ "grad_norm": 0.4556220769882202,
56
+ "learning_rate": 2.9873490511788386e-05,
57
+ "loss": 0.8873,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 2.3001725129384702,
62
+ "grad_norm": 0.45749151706695557,
63
+ "learning_rate": 2.6998274870615297e-05,
64
+ "loss": 0.8553,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 2.587694077055779,
69
+ "grad_norm": 0.6427181363105774,
70
+ "learning_rate": 2.4123059229442212e-05,
71
+ "loss": 0.8427,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 2.875215641173088,
76
+ "grad_norm": 0.35242655873298645,
77
+ "learning_rate": 2.124784358826912e-05,
78
+ "loss": 0.8227,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 3.1627372052903966,
83
+ "grad_norm": 0.5690515041351318,
84
+ "learning_rate": 1.8372627947096034e-05,
85
+ "loss": 0.808,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 3.4502587694077054,
90
+ "grad_norm": 0.46968725323677063,
91
+ "learning_rate": 1.5497412305922945e-05,
92
+ "loss": 0.8198,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 3.7377803335250146,
97
+ "grad_norm": 0.4196397662162781,
98
+ "learning_rate": 1.2622196664749858e-05,
99
+ "loss": 0.8029,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 4.025301897642323,
104
+ "grad_norm": 0.5571260452270508,
105
+ "learning_rate": 9.746981023576769e-06,
106
+ "loss": 0.8049,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 4.312823461759632,
111
+ "grad_norm": 0.5123286247253418,
112
+ "learning_rate": 6.871765382403681e-06,
113
+ "loss": 0.7934,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 4.6003450258769405,
118
+ "grad_norm": 0.3241867423057556,
119
+ "learning_rate": 3.9965497412305925e-06,
120
+ "loss": 0.796,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 4.88786658999425,
125
+ "grad_norm": 0.4111732542514801,
126
+ "learning_rate": 1.1213341000575044e-06,
127
+ "loss": 0.7851,
128
+ "step": 8500
129
+ }
130
+ ],
131
+ "logging_steps": 500,
132
+ "max_steps": 8695,
133
+ "num_input_tokens_seen": 0,
134
+ "num_train_epochs": 5,
135
+ "save_steps": 500,
136
+ "total_flos": 4492786123407360.0,
137
+ "train_batch_size": 8,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }