commit files to HF hub
Browse files- all_results.json +9 -9
- checkpoint-500/optimizer.pt +1 -1
- checkpoint-500/pytorch_model.bin +1 -1
- checkpoint-500/scheduler.pt +1 -1
- checkpoint-500/trainer_state.json +6 -6
- checkpoint-500/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- train_results.json +9 -9
- trainer_state.json +10 -10
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
-
"init_mem_cpu_alloc_delta":
|
4 |
-
"init_mem_cpu_peaked_delta":
|
5 |
"init_mem_gpu_alloc_delta": 511148032,
|
6 |
"init_mem_gpu_peaked_delta": 0,
|
7 |
-
"train_mem_cpu_alloc_delta":
|
8 |
-
"train_mem_cpu_peaked_delta":
|
9 |
-
"train_mem_gpu_alloc_delta":
|
10 |
-
"train_mem_gpu_peaked_delta":
|
11 |
-
"train_runtime":
|
12 |
-
"train_samples":
|
13 |
-
"train_samples_per_second": 2.
|
14 |
}
|
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
+
"init_mem_cpu_alloc_delta": 1023545344,
|
4 |
+
"init_mem_cpu_peaked_delta": 153387008,
|
5 |
"init_mem_gpu_alloc_delta": 511148032,
|
6 |
"init_mem_gpu_peaked_delta": 0,
|
7 |
+
"train_mem_cpu_alloc_delta": 172761088,
|
8 |
+
"train_mem_cpu_peaked_delta": 463097856,
|
9 |
+
"train_mem_gpu_alloc_delta": 1500642816,
|
10 |
+
"train_mem_gpu_peaked_delta": 7988768768,
|
11 |
+
"train_runtime": 305.2911,
|
12 |
+
"train_samples": 577,
|
13 |
+
"train_samples_per_second": 2.84
|
14 |
}
|
checkpoint-500/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995611287
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7106267482dd7f9dadd0124963cf6596a36f59fb95e965946f9c704ae92e104
|
3 |
size 995611287
|
checkpoint-500/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510408315
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ec4014830229bce6a184219bc21fb2037266b105e6425dc931edbbff4601d72
|
3 |
size 510408315
|
checkpoint-500/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19ffc60afaedadd3abaee5ad20cb4c2a9f27982fbbb5f2db2270705966f592f3
|
3 |
size 559
|
checkpoint-500/trainer_state.json
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"global_step": 500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
-
"epoch": 1.
|
12 |
-
"learning_rate":
|
13 |
-
"loss": 2.
|
14 |
"step": 500
|
15 |
}
|
16 |
],
|
17 |
-
"max_steps":
|
18 |
"num_train_epochs": 3,
|
19 |
-
"total_flos":
|
20 |
"trial_name": null,
|
21 |
"trial_params": null
|
22 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7301038062283736,
|
5 |
"global_step": 500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
+
"epoch": 1.73,
|
12 |
+
"learning_rate": 2.116493656286044e-05,
|
13 |
+
"loss": 2.5834,
|
14 |
"step": 500
|
15 |
}
|
16 |
],
|
17 |
+
"max_steps": 867,
|
18 |
"num_train_epochs": 3,
|
19 |
+
"total_flos": 763793622171648.0,
|
20 |
"trial_name": null,
|
21 |
"trial_params": null
|
22 |
}
|
checkpoint-500/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2351
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1b65e46c37193b61039559f5d3a3c73dbb95ab3dc845c86cc37ed12a94db83c
|
3 |
size 2351
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510408315
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86c3804b42db5dff04ce27b94958556b8aa6c1e6d4bf998f88c71999452a1648
|
3 |
size 510408315
|
train_results.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
-
"init_mem_cpu_alloc_delta":
|
4 |
-
"init_mem_cpu_peaked_delta":
|
5 |
"init_mem_gpu_alloc_delta": 511148032,
|
6 |
"init_mem_gpu_peaked_delta": 0,
|
7 |
-
"train_mem_cpu_alloc_delta":
|
8 |
-
"train_mem_cpu_peaked_delta":
|
9 |
-
"train_mem_gpu_alloc_delta":
|
10 |
-
"train_mem_gpu_peaked_delta":
|
11 |
-
"train_runtime":
|
12 |
-
"train_samples":
|
13 |
-
"train_samples_per_second": 2.
|
14 |
}
|
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
+
"init_mem_cpu_alloc_delta": 1023545344,
|
4 |
+
"init_mem_cpu_peaked_delta": 153387008,
|
5 |
"init_mem_gpu_alloc_delta": 511148032,
|
6 |
"init_mem_gpu_peaked_delta": 0,
|
7 |
+
"train_mem_cpu_alloc_delta": 172761088,
|
8 |
+
"train_mem_cpu_peaked_delta": 463097856,
|
9 |
+
"train_mem_gpu_alloc_delta": 1500642816,
|
10 |
+
"train_mem_gpu_peaked_delta": 7988768768,
|
11 |
+
"train_runtime": 305.2911,
|
12 |
+
"train_samples": 577,
|
13 |
+
"train_samples_per_second": 2.84
|
14 |
}
|
trainer_state.json
CHANGED
@@ -2,28 +2,28 @@
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 3.0,
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
-
"epoch": 1.
|
12 |
-
"learning_rate":
|
13 |
-
"loss": 2.
|
14 |
"step": 500
|
15 |
},
|
16 |
{
|
17 |
"epoch": 3.0,
|
18 |
-
"step":
|
19 |
-
"total_flos":
|
20 |
-
"train_runtime":
|
21 |
-
"train_samples_per_second": 2.
|
22 |
}
|
23 |
],
|
24 |
-
"max_steps":
|
25 |
"num_train_epochs": 3,
|
26 |
-
"total_flos":
|
27 |
"trial_name": null,
|
28 |
"trial_params": null
|
29 |
}
|
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 3.0,
|
5 |
+
"global_step": 867,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
+
"epoch": 1.73,
|
12 |
+
"learning_rate": 2.116493656286044e-05,
|
13 |
+
"loss": 2.5834,
|
14 |
"step": 500
|
15 |
},
|
16 |
{
|
17 |
"epoch": 3.0,
|
18 |
+
"step": 867,
|
19 |
+
"total_flos": 1323450210189312.0,
|
20 |
+
"train_runtime": 305.2911,
|
21 |
+
"train_samples_per_second": 2.84
|
22 |
}
|
23 |
],
|
24 |
+
"max_steps": 867,
|
25 |
"num_train_epochs": 3,
|
26 |
+
"total_flos": 1323450210189312.0,
|
27 |
"trial_name": null,
|
28 |
"trial_params": null
|
29 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2351
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1b65e46c37193b61039559f5d3a3c73dbb95ab3dc845c86cc37ed12a94db83c
|
3 |
size 2351
|