tdelic commited on
Commit
6364edf
1 Parent(s): e35c556

Upload 8 files

Browse files
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9547b3c3c24ab99cfe3bf2ef74c3a30a59fc3ca5c187df32b4addc62aa5358d4
3
+ size 1204678496
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6dc7442bd684c6a1ebba9eddcf6df77ba509fc48b8712f9d8c1238edf34a4d8
3
+ size 341357212
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eadd77e21733695103b908b1c52f3ddb5ea56e01c7ea5fe2a8b4ceab2fdf11cb
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb65947675bbd89d6fc34b5241617778049bb2d0dc3dbd91bba90144a1950fac
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.05318588730911,
5
+ "eval_steps": 50,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "eval_loss": 1.8554456233978271,
14
+ "eval_runtime": 2143.5349,
15
+ "eval_samples_per_second": 2.363,
16
+ "eval_steps_per_second": 0.074,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.11,
21
+ "eval_loss": 1.8419370651245117,
22
+ "eval_runtime": 2143.6574,
23
+ "eval_samples_per_second": 2.363,
24
+ "eval_steps_per_second": 0.074,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.16,
29
+ "eval_loss": 1.832232117652893,
30
+ "eval_runtime": 2143.9117,
31
+ "eval_samples_per_second": 2.363,
32
+ "eval_steps_per_second": 0.074,
33
+ "step": 150
34
+ },
35
+ {
36
+ "epoch": 0.21,
37
+ "eval_loss": 1.8266302347183228,
38
+ "eval_runtime": 2144.0794,
39
+ "eval_samples_per_second": 2.363,
40
+ "eval_steps_per_second": 0.074,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.26,
45
+ "eval_loss": 1.8243311643600464,
46
+ "eval_runtime": 2144.1306,
47
+ "eval_samples_per_second": 2.363,
48
+ "eval_steps_per_second": 0.074,
49
+ "step": 250
50
+ },
51
+ {
52
+ "epoch": 0.32,
53
+ "eval_loss": 1.8222966194152832,
54
+ "eval_runtime": 2144.2111,
55
+ "eval_samples_per_second": 2.363,
56
+ "eval_steps_per_second": 0.074,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.37,
61
+ "eval_loss": 1.8207086324691772,
62
+ "eval_runtime": 2143.8422,
63
+ "eval_samples_per_second": 2.363,
64
+ "eval_steps_per_second": 0.074,
65
+ "step": 350
66
+ },
67
+ {
68
+ "epoch": 0.42,
69
+ "eval_loss": 1.8194689750671387,
70
+ "eval_runtime": 2143.9601,
71
+ "eval_samples_per_second": 2.363,
72
+ "eval_steps_per_second": 0.074,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 0.47,
77
+ "eval_loss": 1.818217158317566,
78
+ "eval_runtime": 2142.9482,
79
+ "eval_samples_per_second": 2.364,
80
+ "eval_steps_per_second": 0.074,
81
+ "step": 450
82
+ },
83
+ {
84
+ "epoch": 0.53,
85
+ "learning_rate": 1.0050251256281408e-05,
86
+ "loss": 1.8371,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 0.53,
91
+ "eval_loss": 1.8166958093643188,
92
+ "eval_runtime": 2143.0693,
93
+ "eval_samples_per_second": 2.364,
94
+ "eval_steps_per_second": 0.074,
95
+ "step": 500
96
+ },
97
+ {
98
+ "epoch": 0.58,
99
+ "eval_loss": 1.8156176805496216,
100
+ "eval_runtime": 2143.2912,
101
+ "eval_samples_per_second": 2.364,
102
+ "eval_steps_per_second": 0.074,
103
+ "step": 550
104
+ },
105
+ {
106
+ "epoch": 0.63,
107
+ "eval_loss": 1.8143000602722168,
108
+ "eval_runtime": 2143.0457,
109
+ "eval_samples_per_second": 2.364,
110
+ "eval_steps_per_second": 0.074,
111
+ "step": 600
112
+ },
113
+ {
114
+ "epoch": 0.68,
115
+ "eval_loss": 1.813321590423584,
116
+ "eval_runtime": 2142.6086,
117
+ "eval_samples_per_second": 2.364,
118
+ "eval_steps_per_second": 0.074,
119
+ "step": 650
120
+ },
121
+ {
122
+ "epoch": 0.74,
123
+ "eval_loss": 1.8123859167099,
124
+ "eval_runtime": 2142.7684,
125
+ "eval_samples_per_second": 2.364,
126
+ "eval_steps_per_second": 0.074,
127
+ "step": 700
128
+ },
129
+ {
130
+ "epoch": 0.79,
131
+ "eval_loss": 1.81136155128479,
132
+ "eval_runtime": 2143.3216,
133
+ "eval_samples_per_second": 2.364,
134
+ "eval_steps_per_second": 0.074,
135
+ "step": 750
136
+ },
137
+ {
138
+ "epoch": 0.84,
139
+ "eval_loss": 1.8105697631835938,
140
+ "eval_runtime": 2142.989,
141
+ "eval_samples_per_second": 2.364,
142
+ "eval_steps_per_second": 0.074,
143
+ "step": 800
144
+ },
145
+ {
146
+ "epoch": 0.9,
147
+ "eval_loss": 1.8099677562713623,
148
+ "eval_runtime": 2142.6436,
149
+ "eval_samples_per_second": 2.364,
150
+ "eval_steps_per_second": 0.074,
151
+ "step": 850
152
+ },
153
+ {
154
+ "epoch": 0.95,
155
+ "eval_loss": 1.809475302696228,
156
+ "eval_runtime": 2142.6825,
157
+ "eval_samples_per_second": 2.364,
158
+ "eval_steps_per_second": 0.074,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 1.0,
163
+ "eval_loss": 1.8091450929641724,
164
+ "eval_runtime": 2142.5605,
165
+ "eval_samples_per_second": 2.364,
166
+ "eval_steps_per_second": 0.074,
167
+ "step": 950
168
+ },
169
+ {
170
+ "epoch": 1.05,
171
+ "learning_rate": 0.0,
172
+ "loss": 1.8001,
173
+ "step": 1000
174
+ },
175
+ {
176
+ "epoch": 1.05,
177
+ "eval_loss": 1.809110403060913,
178
+ "eval_runtime": 2142.8135,
179
+ "eval_samples_per_second": 2.364,
180
+ "eval_steps_per_second": 0.074,
181
+ "step": 1000
182
+ }
183
+ ],
184
+ "logging_steps": 500,
185
+ "max_steps": 1000,
186
+ "num_input_tokens_seen": 0,
187
+ "num_train_epochs": 2,
188
+ "save_steps": 50,
189
+ "total_flos": 7.15390604279808e+17,
190
+ "train_batch_size": 16,
191
+ "trial_name": null,
192
+ "trial_params": null
193
+ }