Commit
Β·
2f2fdda
1
Parent(s):
62bf125
Training in progress, step 5100
Browse files- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- {checkpoint-4700 β checkpoint-5000/adapter_model}/README.md +0 -0
- {checkpoint-4700 β checkpoint-5000}/adapter_model/adapter_config.json +4 -4
- {checkpoint-4700 β checkpoint-5000/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-4700/adapter_model β checkpoint-5100}/README.md +0 -0
- {checkpoint-4700 β checkpoint-5100}/adapter_config.json +4 -4
- {checkpoint-4700/adapter_model β checkpoint-5100}/adapter_model.bin +1 -1
- {checkpoint-4700 β checkpoint-5100}/optimizer.pt +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_0.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_1.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_10.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_11.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_12.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_13.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_2.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_3.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_4.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_5.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_6.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_7.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_8.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/rng_state_9.pth +1 -1
- {checkpoint-4700 β checkpoint-5100}/scheduler.pt +1 -1
- {checkpoint-4700 β checkpoint-5100}/trainer_state.json +107 -3
- {checkpoint-4700 β checkpoint-5100}/training_args.bin +1 -1
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
17 |
"k_proj",
|
18 |
-
"
|
19 |
"down_proj",
|
20 |
"gate_proj",
|
21 |
-
"
|
22 |
-
"q_proj",
|
23 |
-
"up_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"v_proj",
|
18 |
+
"up_proj",
|
19 |
"k_proj",
|
20 |
+
"q_proj",
|
21 |
"down_proj",
|
22 |
"gate_proj",
|
23 |
+
"o_proj"
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0782e3c048e8ec06b53914deeacadabf30a533ab881541cd5057d49805c57014
|
3 |
size 500897101
|
{checkpoint-4700 β checkpoint-5000/adapter_model}/README.md
RENAMED
File without changes
|
{checkpoint-4700 β checkpoint-5000}/adapter_model/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"gate_proj",
|
18 |
"v_proj",
|
19 |
-
"k_proj",
|
20 |
"q_proj",
|
21 |
-
"up_proj"
|
22 |
-
"o_proj",
|
23 |
-
"down_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"k_proj",
|
18 |
+
"o_proj",
|
19 |
+
"down_proj",
|
20 |
"gate_proj",
|
21 |
"v_proj",
|
|
|
22 |
"q_proj",
|
23 |
+
"up_proj"
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-4700 β checkpoint-5000/adapter_model}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:658b9b2dec5afef11956f93a69d1e5899dfaf7ec45314dbb9a4f4fe9a8d341ef
|
3 |
size 500897101
|
{checkpoint-4700/adapter_model β checkpoint-5100}/README.md
RENAMED
File without changes
|
{checkpoint-4700 β checkpoint-5100}/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"gate_proj",
|
18 |
"v_proj",
|
|
|
19 |
"k_proj",
|
20 |
"q_proj",
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
17 |
"v_proj",
|
18 |
+
"up_proj",
|
19 |
"k_proj",
|
20 |
"q_proj",
|
21 |
+
"down_proj",
|
22 |
+
"gate_proj",
|
23 |
+
"o_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-4700/adapter_model β checkpoint-5100}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0782e3c048e8ec06b53914deeacadabf30a533ab881541cd5057d49805c57014
|
3 |
size 500897101
|
{checkpoint-4700 β checkpoint-5100}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001752701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e547facfed70ca3ced4bba623ceed57a68ada7036877bcb0fe8abdc4206a71d
|
3 |
size 1001752701
|
{checkpoint-4700 β checkpoint-5100}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93586c7c2f4af77dae57306cefbbba6e501b56255a6e8ab1e51526e93247ec0c
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:914cd17a4d5c2393130506ee74b380b7ebf49b994b8e1f7f9a5a5011789dcb9d
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_10.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f79575017a903227eead6d8ec8c987f6c9f113b02cc3fdca1a05e6dfeb87c263
|
3 |
size 27789
|
{checkpoint-4700 β checkpoint-5100}/rng_state_11.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e862437c6ef81136a36f5dcccf18047c8bf5b5ce5c4cb6a5de4068de8ac98fd4
|
3 |
size 27789
|
{checkpoint-4700 β checkpoint-5100}/rng_state_12.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bd910734bd5ed0209fccf18e21529e6637d89e5d932f3863bdeb79c5a9955a0
|
3 |
size 27789
|
{checkpoint-4700 β checkpoint-5100}/rng_state_13.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd706d0aeecfb525da3ab5daa901fe7638c2e7f5d4cee63cd71b6ca026275bdd
|
3 |
size 27789
|
{checkpoint-4700 β checkpoint-5100}/rng_state_2.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3cc4068be271200f493c6e5933125535d31c00d193a77baaf617e995cb80113
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_3.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:134ec8578ce099215e557b373586d1334dcba1f9bc3678e21e8a11c8293273b5
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_4.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2799584e6f3b97fb9250177cae1099c95a1a72f8c924828310d6e9c9f712a0ad
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_5.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2d851757976cd5436fb4f7e4f6b90e86fd7eb7fd174ef2f06b786fab4c8b687
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_6.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0944f297f8c989eaaaa8748e1b4a866de5757e645fa13d826211ad5f9bf81798
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_7.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4847f06e89ac0c33e93f4108e29bba8a6a11b57185752e5a61d1d159db7176b
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_8.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32e20dcbaf8912d2c4ea76cd78324e384e6e3f340b552198fd77b189a7f80400
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/rng_state_9.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6299cbf9ce668897a91d6363b1369a182cddf5e47a03415dbcb196480ea98de3
|
3 |
size 27772
|
{checkpoint-4700 β checkpoint-5100}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04c0158b55de3dc26efbfe9fb78f379b4093417c7720f3a9de19f86082d0caf3
|
3 |
size 627
|
{checkpoint-4700 β checkpoint-5100}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1142,11 +1142,115 @@
|
|
1142 |
"learning_rate": 6.702555108433461e-05,
|
1143 |
"loss": 0.789,
|
1144 |
"step": 4700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1145 |
}
|
1146 |
],
|
1147 |
"max_steps": 7737,
|
1148 |
"num_train_epochs": 3,
|
1149 |
-
"total_flos": 2.
|
1150 |
"trial_name": null,
|
1151 |
"trial_params": null
|
1152 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9775106630476929,
|
5 |
+
"global_step": 5100,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1142 |
"learning_rate": 6.702555108433461e-05,
|
1143 |
"loss": 0.789,
|
1144 |
"step": 4700
|
1145 |
+
},
|
1146 |
+
{
|
1147 |
+
"epoch": 1.83,
|
1148 |
+
"learning_rate": 6.606768635795574e-05,
|
1149 |
+
"loss": 0.7902,
|
1150 |
+
"step": 4725
|
1151 |
+
},
|
1152 |
+
{
|
1153 |
+
"epoch": 1.84,
|
1154 |
+
"learning_rate": 6.511332727445191e-05,
|
1155 |
+
"loss": 0.7924,
|
1156 |
+
"step": 4750
|
1157 |
+
},
|
1158 |
+
{
|
1159 |
+
"epoch": 1.85,
|
1160 |
+
"learning_rate": 6.416257243134747e-05,
|
1161 |
+
"loss": 0.7957,
|
1162 |
+
"step": 4775
|
1163 |
+
},
|
1164 |
+
{
|
1165 |
+
"epoch": 1.86,
|
1166 |
+
"learning_rate": 6.321552005380256e-05,
|
1167 |
+
"loss": 0.7916,
|
1168 |
+
"step": 4800
|
1169 |
+
},
|
1170 |
+
{
|
1171 |
+
"epoch": 1.87,
|
1172 |
+
"learning_rate": 6.22722679844652e-05,
|
1173 |
+
"loss": 0.7867,
|
1174 |
+
"step": 4825
|
1175 |
+
},
|
1176 |
+
{
|
1177 |
+
"epoch": 1.88,
|
1178 |
+
"learning_rate": 6.133291367336284e-05,
|
1179 |
+
"loss": 0.7944,
|
1180 |
+
"step": 4850
|
1181 |
+
},
|
1182 |
+
{
|
1183 |
+
"epoch": 1.89,
|
1184 |
+
"learning_rate": 6.039755416783457e-05,
|
1185 |
+
"loss": 0.7982,
|
1186 |
+
"step": 4875
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"epoch": 1.9,
|
1190 |
+
"learning_rate": 5.946628610250484e-05,
|
1191 |
+
"loss": 0.7918,
|
1192 |
+
"step": 4900
|
1193 |
+
},
|
1194 |
+
{
|
1195 |
+
"epoch": 1.91,
|
1196 |
+
"learning_rate": 5.853920568929996e-05,
|
1197 |
+
"loss": 0.7921,
|
1198 |
+
"step": 4925
|
1199 |
+
},
|
1200 |
+
{
|
1201 |
+
"epoch": 1.92,
|
1202 |
+
"learning_rate": 5.761640870750799e-05,
|
1203 |
+
"loss": 0.7878,
|
1204 |
+
"step": 4950
|
1205 |
+
},
|
1206 |
+
{
|
1207 |
+
"epoch": 1.93,
|
1208 |
+
"learning_rate": 5.669799049388375e-05,
|
1209 |
+
"loss": 0.7901,
|
1210 |
+
"step": 4975
|
1211 |
+
},
|
1212 |
+
{
|
1213 |
+
"epoch": 1.94,
|
1214 |
+
"learning_rate": 5.578404593279911e-05,
|
1215 |
+
"loss": 0.7858,
|
1216 |
+
"step": 5000
|
1217 |
+
},
|
1218 |
+
{
|
1219 |
+
"epoch": 1.94,
|
1220 |
+
"eval_loss": 0.807844877243042,
|
1221 |
+
"eval_runtime": 59.586,
|
1222 |
+
"eval_samples_per_second": 12.251,
|
1223 |
+
"eval_steps_per_second": 0.889,
|
1224 |
+
"step": 5000
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"epoch": 1.95,
|
1228 |
+
"learning_rate": 5.487466944644033e-05,
|
1229 |
+
"loss": 0.7902,
|
1230 |
+
"step": 5025
|
1231 |
+
},
|
1232 |
+
{
|
1233 |
+
"epoch": 1.96,
|
1234 |
+
"learning_rate": 5.3969954985052996e-05,
|
1235 |
+
"loss": 0.7979,
|
1236 |
+
"step": 5050
|
1237 |
+
},
|
1238 |
+
{
|
1239 |
+
"epoch": 1.97,
|
1240 |
+
"learning_rate": 5.306999601723579e-05,
|
1241 |
+
"loss": 0.7931,
|
1242 |
+
"step": 5075
|
1243 |
+
},
|
1244 |
+
{
|
1245 |
+
"epoch": 1.98,
|
1246 |
+
"learning_rate": 5.21748855202839e-05,
|
1247 |
+
"loss": 0.7868,
|
1248 |
+
"step": 5100
|
1249 |
}
|
1250 |
],
|
1251 |
"max_steps": 7737,
|
1252 |
"num_train_epochs": 3,
|
1253 |
+
"total_flos": 2.1963895120276226e+19,
|
1254 |
"trial_name": null,
|
1255 |
"trial_params": null
|
1256 |
}
|
{checkpoint-4700 β checkpoint-5100}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5cc18faa1420e425c2fed06bfe2dd967461487c15531bd94429b7a3c0c02a49
|
3 |
size 4027
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5cc18faa1420e425c2fed06bfe2dd967461487c15531bd94429b7a3c0c02a49
|
3 |
size 4027
|