Upload with huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint-1000/config.json +0 -0
- checkpoint-1000/optimizer.pt +3 -0
- checkpoint-1000/preprocessor_config.json +23 -0
- checkpoint-1000/pytorch_model.bin +3 -0
- checkpoint-1000/rng_state.pth +3 -0
- checkpoint-1000/scheduler.pt +3 -0
- checkpoint-1000/trainer_state.json +28 -0
- checkpoint-1000/training_args.bin +3 -0
- checkpoint-1500/config.json +0 -0
- checkpoint-1500/optimizer.pt +3 -0
- checkpoint-1500/preprocessor_config.json +23 -0
- checkpoint-1500/pytorch_model.bin +3 -0
- checkpoint-1500/rng_state.pth +3 -0
- checkpoint-1500/scheduler.pt +3 -0
- checkpoint-1500/trainer_state.json +47 -0
- checkpoint-1500/training_args.bin +3 -0
- checkpoint-2000/config.json +0 -0
- checkpoint-2000/optimizer.pt +3 -0
- checkpoint-2000/preprocessor_config.json +23 -0
- checkpoint-2000/pytorch_model.bin +3 -0
- checkpoint-2000/rng_state.pth +3 -0
- checkpoint-2000/scheduler.pt +3 -0
- checkpoint-2000/trainer_state.json +53 -0
- checkpoint-2000/training_args.bin +3 -0
- checkpoint-2500/config.json +0 -0
- checkpoint-2500/optimizer.pt +3 -0
- checkpoint-2500/preprocessor_config.json +23 -0
- checkpoint-2500/pytorch_model.bin +3 -0
- checkpoint-2500/rng_state.pth +3 -0
- checkpoint-2500/scheduler.pt +3 -0
- checkpoint-2500/trainer_state.json +72 -0
- checkpoint-2500/training_args.bin +3 -0
- checkpoint-3000/config.json +0 -0
- checkpoint-3000/optimizer.pt +3 -0
- checkpoint-3000/preprocessor_config.json +23 -0
- checkpoint-3000/pytorch_model.bin +3 -0
- checkpoint-3000/rng_state.pth +3 -0
- checkpoint-3000/scheduler.pt +3 -0
- checkpoint-3000/trainer_state.json +78 -0
- checkpoint-3000/training_args.bin +3 -0
- checkpoint-500/config.json +0 -0
- checkpoint-500/optimizer.pt +3 -0
- checkpoint-500/preprocessor_config.json +23 -0
- checkpoint-500/pytorch_model.bin +3 -0
- checkpoint-500/rng_state.pth +3 -0
- checkpoint-500/scheduler.pt +3 -0
- checkpoint-500/trainer_state.json +22 -0
- checkpoint-500/training_args.bin +3 -0
- config.json +0 -0
- preprocessor_config.json +23 -0
checkpoint-1000/config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4929974b07ac5a3b5ccaa387cbbe851f5afd4f9474371e0b3fae1327ee3b01f2
|
3 |
+
size 1803732531
|
checkpoint-1000/preprocessor_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_rescale": true,
|
4 |
+
"do_resize": true,
|
5 |
+
"feature_extractor_type": "ViTFeatureExtractor",
|
6 |
+
"image_mean": [
|
7 |
+
0.485,
|
8 |
+
0.456,
|
9 |
+
0.406
|
10 |
+
],
|
11 |
+
"image_processor_type": "ViTImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.229,
|
14 |
+
0.224,
|
15 |
+
0.225
|
16 |
+
],
|
17 |
+
"resample": 3,
|
18 |
+
"rescale_factor": 0.00392156862745098,
|
19 |
+
"size": {
|
20 |
+
"height": 224,
|
21 |
+
"width": 224
|
22 |
+
}
|
23 |
+
}
|
checkpoint-1000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7411cb4546010ac49090da446bae9363a3bd4a69e1926a856db2fac2d57e74e
|
3 |
+
size 902384861
|
checkpoint-1000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d37055c2965a1ab538faf950187b10722ff94c11941c2b5dbdd04a49c2b6fbc
|
3 |
+
size 14575
|
checkpoint-1000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7befb2cc9aaa9c0a9e7ed884d86bfd058f786d8c3cf5b67327739941e8d8e943
|
3 |
+
size 627
|
checkpoint-1000/trainer_state.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9433962264150944,
|
5 |
+
"global_step": 1000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.47,
|
12 |
+
"learning_rate": 4.213836477987422e-05,
|
13 |
+
"loss": 0.3579,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.94,
|
18 |
+
"learning_rate": 3.4276729559748424e-05,
|
19 |
+
"loss": 0.3025,
|
20 |
+
"step": 1000
|
21 |
+
}
|
22 |
+
],
|
23 |
+
"max_steps": 3180,
|
24 |
+
"num_train_epochs": 3,
|
25 |
+
"total_flos": 7.28227689283584e+17,
|
26 |
+
"trial_name": null,
|
27 |
+
"trial_params": null
|
28 |
+
}
|
checkpoint-1000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1362775d9d3da70b6a6d1b44a3c31fa208caaf44188f1f739397820f61bec36a
|
3 |
+
size 3579
|
checkpoint-1500/config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1500/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd4ba48119ef427872eac0a9ce89b82f989f29b14c274d626f9be92bd6519680
|
3 |
+
size 1803732531
|
checkpoint-1500/preprocessor_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_rescale": true,
|
4 |
+
"do_resize": true,
|
5 |
+
"feature_extractor_type": "ViTFeatureExtractor",
|
6 |
+
"image_mean": [
|
7 |
+
0.485,
|
8 |
+
0.456,
|
9 |
+
0.406
|
10 |
+
],
|
11 |
+
"image_processor_type": "ViTImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.229,
|
14 |
+
0.224,
|
15 |
+
0.225
|
16 |
+
],
|
17 |
+
"resample": 3,
|
18 |
+
"rescale_factor": 0.00392156862745098,
|
19 |
+
"size": {
|
20 |
+
"height": 224,
|
21 |
+
"width": 224
|
22 |
+
}
|
23 |
+
}
|
checkpoint-1500/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8c5daa011f3cdbdaebd3adae671bd77d4229550ad7d1942411f0398c48d1184
|
3 |
+
size 902384861
|
checkpoint-1500/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d3bc7c95917dcdca357f857099956d422e167514e6902d7ed1d28a72dafa493
|
3 |
+
size 14575
|
checkpoint-1500/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ea721b244295b0cd3fb0cd8d77c630a7e046d638c25c9add958e2ba223154a9
|
3 |
+
size 627
|
checkpoint-1500/trainer_state.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.4150943396226414,
|
5 |
+
"global_step": 1500,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.47,
|
12 |
+
"learning_rate": 4.213836477987422e-05,
|
13 |
+
"loss": 0.3579,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.94,
|
18 |
+
"learning_rate": 3.4276729559748424e-05,
|
19 |
+
"loss": 0.3025,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.0,
|
24 |
+
"eval_gen_len": 14.745930644019817,
|
25 |
+
"eval_loss": 0.3904925584793091,
|
26 |
+
"eval_rouge1": 22.6674,
|
27 |
+
"eval_rouge2": 5.4037,
|
28 |
+
"eval_rougeL": 20.2687,
|
29 |
+
"eval_rougeLsum": 20.297,
|
30 |
+
"eval_runtime": 247.0796,
|
31 |
+
"eval_samples_per_second": 5.719,
|
32 |
+
"eval_steps_per_second": 1.433,
|
33 |
+
"step": 1060
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 1.42,
|
37 |
+
"learning_rate": 2.641509433962264e-05,
|
38 |
+
"loss": 0.2978,
|
39 |
+
"step": 1500
|
40 |
+
}
|
41 |
+
],
|
42 |
+
"max_steps": 3180,
|
43 |
+
"num_train_epochs": 3,
|
44 |
+
"total_flos": 1.0919774200807342e+18,
|
45 |
+
"trial_name": null,
|
46 |
+
"trial_params": null
|
47 |
+
}
|
checkpoint-1500/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1362775d9d3da70b6a6d1b44a3c31fa208caaf44188f1f739397820f61bec36a
|
3 |
+
size 3579
|
checkpoint-2000/config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72fa66f879460e7430267736f5b3c814a2a672fe6d0d67830a591bc73c199d72
|
3 |
+
size 1803732531
|
checkpoint-2000/preprocessor_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_rescale": true,
|
4 |
+
"do_resize": true,
|
5 |
+
"feature_extractor_type": "ViTFeatureExtractor",
|
6 |
+
"image_mean": [
|
7 |
+
0.485,
|
8 |
+
0.456,
|
9 |
+
0.406
|
10 |
+
],
|
11 |
+
"image_processor_type": "ViTImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.229,
|
14 |
+
0.224,
|
15 |
+
0.225
|
16 |
+
],
|
17 |
+
"resample": 3,
|
18 |
+
"rescale_factor": 0.00392156862745098,
|
19 |
+
"size": {
|
20 |
+
"height": 224,
|
21 |
+
"width": 224
|
22 |
+
}
|
23 |
+
}
|
checkpoint-2000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f18fe066042c1246d8f6d922eec7aaad06a0e00b035fe61340ece609de26162
|
3 |
+
size 902384861
|
checkpoint-2000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:351c53366ce4a420ab4d6353ba45288c26e3bc635fc544ced1a8c870e142c367
|
3 |
+
size 14575
|
checkpoint-2000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca5a3d7c75cb89008e9763f5745fa429f291a8fe2d71aeda77e213178c066a68
|
3 |
+
size 627
|
checkpoint-2000/trainer_state.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.8867924528301887,
|
5 |
+
"global_step": 2000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.47,
|
12 |
+
"learning_rate": 4.213836477987422e-05,
|
13 |
+
"loss": 0.3579,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.94,
|
18 |
+
"learning_rate": 3.4276729559748424e-05,
|
19 |
+
"loss": 0.3025,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.0,
|
24 |
+
"eval_gen_len": 14.745930644019817,
|
25 |
+
"eval_loss": 0.3904925584793091,
|
26 |
+
"eval_rouge1": 22.6674,
|
27 |
+
"eval_rouge2": 5.4037,
|
28 |
+
"eval_rougeL": 20.2687,
|
29 |
+
"eval_rougeLsum": 20.297,
|
30 |
+
"eval_runtime": 247.0796,
|
31 |
+
"eval_samples_per_second": 5.719,
|
32 |
+
"eval_steps_per_second": 1.433,
|
33 |
+
"step": 1060
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 1.42,
|
37 |
+
"learning_rate": 2.641509433962264e-05,
|
38 |
+
"loss": 0.2978,
|
39 |
+
"step": 1500
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 1.89,
|
43 |
+
"learning_rate": 1.8553459119496856e-05,
|
44 |
+
"loss": 0.2772,
|
45 |
+
"step": 2000
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"max_steps": 3180,
|
49 |
+
"num_train_epochs": 3,
|
50 |
+
"total_flos": 1.4560912647225262e+18,
|
51 |
+
"trial_name": null,
|
52 |
+
"trial_params": null
|
53 |
+
}
|
checkpoint-2000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1362775d9d3da70b6a6d1b44a3c31fa208caaf44188f1f739397820f61bec36a
|
3 |
+
size 3579
|
checkpoint-2500/config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2500/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcf3941bd6d3dad8c50a1e61f46ccdaaaf39c6da8225cc16a6c9d3bb0a01aa5d
|
3 |
+
size 1803732531
|
checkpoint-2500/preprocessor_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_rescale": true,
|
4 |
+
"do_resize": true,
|
5 |
+
"feature_extractor_type": "ViTFeatureExtractor",
|
6 |
+
"image_mean": [
|
7 |
+
0.485,
|
8 |
+
0.456,
|
9 |
+
0.406
|
10 |
+
],
|
11 |
+
"image_processor_type": "ViTImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.229,
|
14 |
+
0.224,
|
15 |
+
0.225
|
16 |
+
],
|
17 |
+
"resample": 3,
|
18 |
+
"rescale_factor": 0.00392156862745098,
|
19 |
+
"size": {
|
20 |
+
"height": 224,
|
21 |
+
"width": 224
|
22 |
+
}
|
23 |
+
}
|
checkpoint-2500/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f54dbe7c6853f93dc71440a7eaac09f9bba01c420e4fb341c80be16881ce5d5
|
3 |
+
size 902384861
|
checkpoint-2500/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34a82a6889ec7034ed91552505dc1056a7d1c221e29e04219d8c00c1c0dcb131
|
3 |
+
size 14575
|
checkpoint-2500/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:077e40c42304363124a4811883b035236785d28262b3c485fb167fd63ff75acd
|
3 |
+
size 627
|
checkpoint-2500/trainer_state.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.358490566037736,
|
5 |
+
"global_step": 2500,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.47,
|
12 |
+
"learning_rate": 4.213836477987422e-05,
|
13 |
+
"loss": 0.3579,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.94,
|
18 |
+
"learning_rate": 3.4276729559748424e-05,
|
19 |
+
"loss": 0.3025,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.0,
|
24 |
+
"eval_gen_len": 14.745930644019817,
|
25 |
+
"eval_loss": 0.3904925584793091,
|
26 |
+
"eval_rouge1": 22.6674,
|
27 |
+
"eval_rouge2": 5.4037,
|
28 |
+
"eval_rougeL": 20.2687,
|
29 |
+
"eval_rougeLsum": 20.297,
|
30 |
+
"eval_runtime": 247.0796,
|
31 |
+
"eval_samples_per_second": 5.719,
|
32 |
+
"eval_steps_per_second": 1.433,
|
33 |
+
"step": 1060
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 1.42,
|
37 |
+
"learning_rate": 2.641509433962264e-05,
|
38 |
+
"loss": 0.2978,
|
39 |
+
"step": 1500
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 1.89,
|
43 |
+
"learning_rate": 1.8553459119496856e-05,
|
44 |
+
"loss": 0.2772,
|
45 |
+
"step": 2000
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 2.0,
|
49 |
+
"eval_gen_len": 11.341825902335456,
|
50 |
+
"eval_loss": 0.37492871284484863,
|
51 |
+
"eval_rouge1": 24.3917,
|
52 |
+
"eval_rouge2": 6.7944,
|
53 |
+
"eval_rougeL": 22.2165,
|
54 |
+
"eval_rougeLsum": 22.244,
|
55 |
+
"eval_runtime": 263.2819,
|
56 |
+
"eval_samples_per_second": 5.367,
|
57 |
+
"eval_steps_per_second": 1.345,
|
58 |
+
"step": 2120
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 2.36,
|
62 |
+
"learning_rate": 1.069182389937107e-05,
|
63 |
+
"loss": 0.2196,
|
64 |
+
"step": 2500
|
65 |
+
}
|
66 |
+
],
|
67 |
+
"max_steps": 3180,
|
68 |
+
"num_train_epochs": 3,
|
69 |
+
"total_flos": 1.8198409955196764e+18,
|
70 |
+
"trial_name": null,
|
71 |
+
"trial_params": null
|
72 |
+
}
|
checkpoint-2500/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1362775d9d3da70b6a6d1b44a3c31fa208caaf44188f1f739397820f61bec36a
|
3 |
+
size 3579
|
checkpoint-3000/config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eac004071b76afc4e0eab463adf5f874206cefc671a90cc39e1b98a4c38d888f
|
3 |
+
size 1803732531
|
checkpoint-3000/preprocessor_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_rescale": true,
|
4 |
+
"do_resize": true,
|
5 |
+
"feature_extractor_type": "ViTFeatureExtractor",
|
6 |
+
"image_mean": [
|
7 |
+
0.485,
|
8 |
+
0.456,
|
9 |
+
0.406
|
10 |
+
],
|
11 |
+
"image_processor_type": "ViTImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.229,
|
14 |
+
0.224,
|
15 |
+
0.225
|
16 |
+
],
|
17 |
+
"resample": 3,
|
18 |
+
"rescale_factor": 0.00392156862745098,
|
19 |
+
"size": {
|
20 |
+
"height": 224,
|
21 |
+
"width": 224
|
22 |
+
}
|
23 |
+
}
|
checkpoint-3000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a08bcb0afe0ad7f7f5087b567ed630be8e0848ddbd92527a053cb9e7811024c
|
3 |
+
size 902384861
|
checkpoint-3000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05f10dd1340049368fe2630d218b60989e5bed15ce5d55a2808430ae6058d980
|
3 |
+
size 14575
|
checkpoint-3000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45ae7449699d5b904cf67d1abdb9982264a2eb53850909ae3a6e806496c60652
|
3 |
+
size 627
|
checkpoint-3000/trainer_state.json
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.830188679245283,
|
5 |
+
"global_step": 3000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.47,
|
12 |
+
"learning_rate": 4.213836477987422e-05,
|
13 |
+
"loss": 0.3579,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.94,
|
18 |
+
"learning_rate": 3.4276729559748424e-05,
|
19 |
+
"loss": 0.3025,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.0,
|
24 |
+
"eval_gen_len": 14.745930644019817,
|
25 |
+
"eval_loss": 0.3904925584793091,
|
26 |
+
"eval_rouge1": 22.6674,
|
27 |
+
"eval_rouge2": 5.4037,
|
28 |
+
"eval_rougeL": 20.2687,
|
29 |
+
"eval_rougeLsum": 20.297,
|
30 |
+
"eval_runtime": 247.0796,
|
31 |
+
"eval_samples_per_second": 5.719,
|
32 |
+
"eval_steps_per_second": 1.433,
|
33 |
+
"step": 1060
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 1.42,
|
37 |
+
"learning_rate": 2.641509433962264e-05,
|
38 |
+
"loss": 0.2978,
|
39 |
+
"step": 1500
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 1.89,
|
43 |
+
"learning_rate": 1.8553459119496856e-05,
|
44 |
+
"loss": 0.2772,
|
45 |
+
"step": 2000
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 2.0,
|
49 |
+
"eval_gen_len": 11.341825902335456,
|
50 |
+
"eval_loss": 0.37492871284484863,
|
51 |
+
"eval_rouge1": 24.3917,
|
52 |
+
"eval_rouge2": 6.7944,
|
53 |
+
"eval_rougeL": 22.2165,
|
54 |
+
"eval_rougeLsum": 22.244,
|
55 |
+
"eval_runtime": 263.2819,
|
56 |
+
"eval_samples_per_second": 5.367,
|
57 |
+
"eval_steps_per_second": 1.345,
|
58 |
+
"step": 2120
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 2.36,
|
62 |
+
"learning_rate": 1.069182389937107e-05,
|
63 |
+
"loss": 0.2196,
|
64 |
+
"step": 2500
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 2.83,
|
68 |
+
"learning_rate": 2.830188679245283e-06,
|
69 |
+
"loss": 0.2016,
|
70 |
+
"step": 3000
|
71 |
+
}
|
72 |
+
],
|
73 |
+
"max_steps": 3180,
|
74 |
+
"num_train_epochs": 3,
|
75 |
+
"total_flos": 2.1839548401614684e+18,
|
76 |
+
"trial_name": null,
|
77 |
+
"trial_params": null
|
78 |
+
}
|
checkpoint-3000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1362775d9d3da70b6a6d1b44a3c31fa208caaf44188f1f739397820f61bec36a
|
3 |
+
size 3579
|
checkpoint-500/config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-500/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cffc91b03e82bbcdb130485fa8c273666cbba9a5347fb24e8e471d1b54df492
|
3 |
+
size 1803732531
|
checkpoint-500/preprocessor_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_rescale": true,
|
4 |
+
"do_resize": true,
|
5 |
+
"feature_extractor_type": "ViTFeatureExtractor",
|
6 |
+
"image_mean": [
|
7 |
+
0.485,
|
8 |
+
0.456,
|
9 |
+
0.406
|
10 |
+
],
|
11 |
+
"image_processor_type": "ViTImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.229,
|
14 |
+
0.224,
|
15 |
+
0.225
|
16 |
+
],
|
17 |
+
"resample": 3,
|
18 |
+
"rescale_factor": 0.00392156862745098,
|
19 |
+
"size": {
|
20 |
+
"height": 224,
|
21 |
+
"width": 224
|
22 |
+
}
|
23 |
+
}
|
checkpoint-500/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b9e093e5d1d6e10a7fdb9606d50eb41f2e522fd9c3d8ee1b8160133841eb098
|
3 |
+
size 902384861
|
checkpoint-500/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f7c1b60ca2ffb1ff54f28f53bb842be5e78269fdf2164edb43dfd508963e2c1
|
3 |
+
size 14575
|
checkpoint-500/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b97bfb25a714221bc23f6051d604fb88c167cca5335940d2deb6375e4b531b8f
|
3 |
+
size 627
|
checkpoint-500/trainer_state.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4716981132075472,
|
5 |
+
"global_step": 500,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.47,
|
12 |
+
"learning_rate": 4.213836477987422e-05,
|
13 |
+
"loss": 0.3579,
|
14 |
+
"step": 500
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"max_steps": 3180,
|
18 |
+
"num_train_epochs": 3,
|
19 |
+
"total_flos": 3.64113844641792e+17,
|
20 |
+
"trial_name": null,
|
21 |
+
"trial_params": null
|
22 |
+
}
|
checkpoint-500/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1362775d9d3da70b6a6d1b44a3c31fa208caaf44188f1f739397820f61bec36a
|
3 |
+
size 3579
|
config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
preprocessor_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_rescale": true,
|
4 |
+
"do_resize": true,
|
5 |
+
"feature_extractor_type": "ViTFeatureExtractor",
|
6 |
+
"image_mean": [
|
7 |
+
0.485,
|
8 |
+
0.456,
|
9 |
+
0.406
|
10 |
+
],
|
11 |
+
"image_processor_type": "ViTImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.229,
|
14 |
+
0.224,
|
15 |
+
0.225
|
16 |
+
],
|
17 |
+
"resample": 3,
|
18 |
+
"rescale_factor": 0.00392156862745098,
|
19 |
+
"size": {
|
20 |
+
"height": 224,
|
21 |
+
"width": 224
|
22 |
+
}
|
23 |
+
}
|