Tianjiao-Yu commited on
Commit
58ac5f7
1 Parent(s): 9e966de

End of training

Browse files
Files changed (22) hide show
  1. README.md +40 -36
  2. all_results.json +6 -6
  3. config.json +30 -22
  4. model.safetensors +2 -2
  5. runs/Feb20_14-50-43_plan.cs.vt.edu/events.out.tfevents.1708469451.plan.cs.vt.edu.3543922.0 +3 -0
  6. runs/Feb20_14-50-43_plan.cs.vt.edu/events.out.tfevents.1708469726.plan.cs.vt.edu.3543922.1 +3 -0
  7. runs/Feb20_14-57-12_plan.cs.vt.edu/events.out.tfevents.1708469850.plan.cs.vt.edu.3569885.0 +3 -0
  8. runs/Feb20_15-18-42_plan.cs.vt.edu/events.out.tfevents.1708471132.plan.cs.vt.edu.3569885.1 +3 -0
  9. runs/Feb20_15-19-55_plan.cs.vt.edu/events.out.tfevents.1708471203.plan.cs.vt.edu.3569885.2 +3 -0
  10. runs/Feb20_15-20-55_plan.cs.vt.edu/events.out.tfevents.1708471262.plan.cs.vt.edu.3569885.3 +3 -0
  11. runs/Feb20_15-21-34_plan.cs.vt.edu/events.out.tfevents.1708471299.plan.cs.vt.edu.3569885.4 +3 -0
  12. runs/Feb20_15-49-17_plan.cs.vt.edu/events.out.tfevents.1708472959.plan.cs.vt.edu.3697382.0 +3 -0
  13. runs/Feb20_15-52-57_plan.cs.vt.edu/events.out.tfevents.1708473184.plan.cs.vt.edu.3697382.1 +3 -0
  14. runs/Feb20_15-54-37_plan.cs.vt.edu/events.out.tfevents.1708473283.plan.cs.vt.edu.3697382.2 +3 -0
  15. runs/Feb20_16-41-48_plan.cs.vt.edu/events.out.tfevents.1708476123.plan.cs.vt.edu.3697382.3 +3 -0
  16. runs/Feb20_16-42-29_plan.cs.vt.edu/events.out.tfevents.1708476167.plan.cs.vt.edu.3697382.4 +3 -0
  17. runs/Feb20_16-49-13_plan.cs.vt.edu/events.out.tfevents.1708476554.plan.cs.vt.edu.3697382.5 +3 -0
  18. runs/Feb20_16-49-46_plan.cs.vt.edu/events.out.tfevents.1708476591.plan.cs.vt.edu.3697382.6 +3 -0
  19. runs/Feb20_16-49-46_plan.cs.vt.edu/events.out.tfevents.1708477852.plan.cs.vt.edu.3697382.7 +3 -0
  20. test_results.json +6 -6
  21. trainer_state.json +418 -316
  22. training_args.bin +1 -1
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  metrics:
@@ -13,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # videomae-large
15
 
16
- This model was trained from scratch on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.0993
19
- - Accuracy: 0.9742
20
 
21
  ## Model description
22
 
@@ -36,48 +38,50 @@ More information needed
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 5e-05
39
- - train_batch_size: 32
40
- - eval_batch_size: 32
41
  - seed: 42
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_ratio: 0.1
45
- - training_steps: 300
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
50
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
51
- | 2.3203 | 0.03 | 10 | 2.1994 | 0.1571 |
52
- | 1.9795 | 1.03 | 20 | 1.7835 | 0.3429 |
53
- | 1.0467 | 2.03 | 30 | 0.7311 | 0.6571 |
54
- | 0.301 | 3.03 | 40 | 0.2195 | 0.9429 |
55
- | 0.1061 | 4.03 | 50 | 0.1529 | 0.9143 |
56
- | 0.0499 | 5.03 | 60 | 0.0826 | 0.9857 |
57
- | 0.079 | 6.03 | 70 | 0.0534 | 0.9857 |
58
- | 0.0487 | 7.03 | 80 | 0.0299 | 0.9857 |
59
- | 0.0217 | 8.03 | 90 | 0.3283 | 0.9 |
60
- | 0.0387 | 9.03 | 100 | 0.0268 | 0.9857 |
61
- | 0.0252 | 10.03 | 110 | 0.0386 | 0.9857 |
62
- | 0.0324 | 11.03 | 120 | 0.3067 | 0.9 |
63
- | 0.0022 | 12.03 | 130 | 0.0131 | 1.0 |
64
- | 0.0115 | 13.03 | 140 | 0.0889 | 0.9857 |
65
- | 0.0225 | 14.03 | 150 | 0.0091 | 1.0 |
66
- | 0.0012 | 15.03 | 160 | 0.0081 | 1.0 |
67
- | 0.001 | 16.03 | 170 | 0.0103 | 1.0 |
68
- | 0.0255 | 17.03 | 180 | 0.0113 | 1.0 |
69
- | 0.0016 | 18.03 | 190 | 0.0252 | 0.9857 |
70
- | 0.0039 | 19.03 | 200 | 0.0177 | 0.9857 |
71
- | 0.0007 | 20.03 | 210 | 0.0017 | 1.0 |
72
- | 0.0006 | 21.03 | 220 | 0.0013 | 1.0 |
73
- | 0.0006 | 22.03 | 230 | 0.0012 | 1.0 |
74
- | 0.0007 | 23.03 | 240 | 0.0011 | 1.0 |
75
- | 0.0005 | 24.03 | 250 | 0.0011 | 1.0 |
76
- | 0.0005 | 25.03 | 260 | 0.0011 | 1.0 |
77
- | 0.0005 | 26.03 | 270 | 0.0011 | 1.0 |
78
- | 0.0005 | 27.03 | 280 | 0.0011 | 1.0 |
79
- | 0.0005 | 28.03 | 290 | 0.0011 | 1.0 |
80
- | 0.0005 | 29.03 | 300 | 0.0011 | 1.0 |
 
 
81
 
82
 
83
  ### Framework versions
 
1
  ---
2
+ license: cc-by-nc-4.0
3
+ base_model: MCG-NJU/videomae-large-finetuned-kinetics
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # videomae-large
17
 
18
+ This model is a fine-tuned version of [MCG-NJU/videomae-large-finetuned-kinetics](https://huggingface.co/MCG-NJU/videomae-large-finetuned-kinetics) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.5042
21
+ - Accuracy: 0.4286
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
+ - train_batch_size: 16
42
+ - eval_batch_size: 16
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.1
47
+ - training_steps: 220
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | 2.6619 | 0.03 | 7 | 2.7017 | 0.0 |
54
+ | 2.6232 | 1.03 | 14 | 2.6628 | 0.0 |
55
+ | 2.381 | 2.03 | 21 | 2.5798 | 0.1667 |
56
+ | 2.2215 | 3.03 | 28 | 2.4757 | 0.1667 |
57
+ | 1.7389 | 4.03 | 35 | 2.3636 | 0.2333 |
58
+ | 1.3366 | 5.03 | 42 | 2.2424 | 0.3 |
59
+ | 1.1946 | 6.03 | 49 | 2.1675 | 0.3 |
60
+ | 0.6809 | 7.03 | 56 | 2.0548 | 0.3667 |
61
+ | 0.5255 | 8.03 | 63 | 2.0410 | 0.4 |
62
+ | 0.3285 | 9.03 | 70 | 1.9539 | 0.4 |
63
+ | 0.2849 | 10.03 | 77 | 1.8536 | 0.4667 |
64
+ | 0.1832 | 11.03 | 84 | 1.8293 | 0.4333 |
65
+ | 0.1307 | 12.03 | 91 | 1.8200 | 0.4 |
66
+ | 0.0901 | 13.03 | 98 | 1.8355 | 0.4 |
67
+ | 0.0636 | 14.03 | 105 | 1.8201 | 0.4333 |
68
+ | 0.0413 | 15.03 | 112 | 1.7750 | 0.4667 |
69
+ | 0.0427 | 16.03 | 119 | 1.7460 | 0.5333 |
70
+ | 0.0254 | 17.03 | 126 | 1.7804 | 0.5333 |
71
+ | 0.0203 | 18.03 | 133 | 1.8869 | 0.4333 |
72
+ | 0.0174 | 19.03 | 140 | 1.7741 | 0.5667 |
73
+ | 0.0154 | 20.03 | 147 | 1.7401 | 0.5333 |
74
+ | 0.0136 | 21.03 | 154 | 1.7672 | 0.5 |
75
+ | 0.0116 | 22.03 | 161 | 1.7793 | 0.5333 |
76
+ | 0.0123 | 23.03 | 168 | 1.8018 | 0.4667 |
77
+ | 0.0102 | 24.03 | 175 | 1.8024 | 0.5 |
78
+ | 0.0103 | 25.03 | 182 | 1.8058 | 0.5 |
79
+ | 0.0089 | 26.03 | 189 | 1.8106 | 0.5 |
80
+ | 0.0088 | 27.03 | 196 | 1.8029 | 0.5 |
81
+ | 0.0092 | 28.03 | 203 | 1.7961 | 0.5 |
82
+ | 0.0083 | 29.03 | 210 | 1.7940 | 0.5 |
83
+ | 0.0099 | 30.03 | 217 | 1.7922 | 0.5 |
84
+ | 0.0085 | 31.01 | 220 | 1.7920 | 0.5 |
85
 
86
 
87
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 29.03,
3
- "eval_accuracy": 0.9741935483870968,
4
- "eval_loss": 0.09928672015666962,
5
- "eval_runtime": 13.0498,
6
- "eval_samples_per_second": 11.878,
7
- "eval_steps_per_second": 0.383
8
  }
 
1
  {
2
+ "epoch": 31.01,
3
+ "eval_accuracy": 0.42857142857142855,
4
+ "eval_loss": 1.504156231880188,
5
+ "eval_runtime": 2.2938,
6
+ "eval_samples_per_second": 6.104,
7
+ "eval_steps_per_second": 0.436
8
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "MCG-NJU/videomae-large",
3
  "architectures": [
4
  "VideoMAEForVideoClassification"
5
  ],
@@ -12,31 +12,39 @@
12
  "hidden_dropout_prob": 0.0,
13
  "hidden_size": 1024,
14
  "id2label": {
15
- "0": "ApplyEyeMakeup",
16
- "1": "ApplyLipstick",
17
- "2": "Archery",
18
- "3": "BabyCrawling",
19
- "4": "BalanceBeam",
20
- "5": "BandMarching",
21
- "6": "BaseballPitch",
22
- "7": "Basketball",
23
- "8": "BasketballDunk",
24
- "9": "BenchPress"
 
 
 
 
25
  },
26
  "image_size": 224,
27
  "initializer_range": 0.02,
28
  "intermediate_size": 4096,
29
  "label2id": {
30
- "ApplyEyeMakeup": 0,
31
- "ApplyLipstick": 1,
32
- "Archery": 2,
33
- "BabyCrawling": 3,
34
- "BalanceBeam": 4,
35
- "BandMarching": 5,
36
- "BaseballPitch": 6,
37
- "Basketball": 7,
38
- "BasketballDunk": 8,
39
- "BenchPress": 9
 
 
 
 
40
  },
41
  "layer_norm_eps": 1e-12,
42
  "model_type": "videomae",
@@ -51,5 +59,5 @@
51
  "torch_dtype": "float32",
52
  "transformers_version": "4.37.2",
53
  "tubelet_size": 2,
54
- "use_mean_pooling": false
55
  }
 
1
  {
2
+ "_name_or_path": "MCG-NJU/videomae-large-finetuned-kinetics",
3
  "architectures": [
4
  "VideoMAEForVideoClassification"
5
  ],
 
12
  "hidden_dropout_prob": 0.0,
13
  "hidden_size": 1024,
14
  "id2label": {
15
+ "0": "climb",
16
+ "1": "crawl",
17
+ "2": "grasp",
18
+ "3": "hiding",
19
+ "4": "jump",
20
+ "5": "pick up",
21
+ "6": "pull",
22
+ "7": "push",
23
+ "8": "put down",
24
+ "9": "roll",
25
+ "10": "running",
26
+ "11": "slide",
27
+ "12": "walking",
28
+ "13": "zibaroon"
29
  },
30
  "image_size": 224,
31
  "initializer_range": 0.02,
32
  "intermediate_size": 4096,
33
  "label2id": {
34
+ "climb": 0,
35
+ "crawl": 1,
36
+ "grasp": 2,
37
+ "hiding": 3,
38
+ "jump": 4,
39
+ "pick up": 5,
40
+ "pull": 6,
41
+ "push": 7,
42
+ "put down": 8,
43
+ "roll": 9,
44
+ "running": 10,
45
+ "slide": 11,
46
+ "walking": 12,
47
+ "zibaroon": 13
48
  },
49
  "layer_norm_eps": 1e-12,
50
  "model_type": "videomae",
 
59
  "torch_dtype": "float32",
60
  "transformers_version": "4.37.2",
61
  "tubelet_size": 2,
62
+ "use_mean_pooling": true
63
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8f784d5e79a9b4421e39993c18671489940fd7d36ff370e75ffa6cc83706838
3
- size 1215529056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6699c065fd187ca3510c6f854530949fe1a0fb8ca23b46262e967099572e5b07
3
+ size 1215545408
runs/Feb20_14-50-43_plan.cs.vt.edu/events.out.tfevents.1708469451.plan.cs.vt.edu.3543922.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c0590daa12c802f4fbd7a642eff30ad07498cf04a73e68ad0e4cdc43b9b090
3
+ size 7974
runs/Feb20_14-50-43_plan.cs.vt.edu/events.out.tfevents.1708469726.plan.cs.vt.edu.3543922.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640213db4fe4008ca9a6d4c1d31af408cabc120fa3ebfbf3c4a76e8c5d1dd81e
3
+ size 405
runs/Feb20_14-57-12_plan.cs.vt.edu/events.out.tfevents.1708469850.plan.cs.vt.edu.3569885.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1913e300bd7725c4aee313c4e56326cb9d4a41615ec5efbe09b0da162d18b2f1
3
+ size 17240
runs/Feb20_15-18-42_plan.cs.vt.edu/events.out.tfevents.1708471132.plan.cs.vt.edu.3569885.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc38f52209a4f52c900bd7c44160fcbff23ddbff1e26b0ed556fc2d5fddf1677
3
+ size 5098
runs/Feb20_15-19-55_plan.cs.vt.edu/events.out.tfevents.1708471203.plan.cs.vt.edu.3569885.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca35aa06a66c9f289a90569ddff96b03dad1b6b22a6420d6f240b3b1d611fd1c
3
+ size 5098
runs/Feb20_15-20-55_plan.cs.vt.edu/events.out.tfevents.1708471262.plan.cs.vt.edu.3569885.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dcc59d8afec032a73bdb632bc6e6f9c61d89c3a486377592935e9fe4883539d
3
+ size 5038
runs/Feb20_15-21-34_plan.cs.vt.edu/events.out.tfevents.1708471299.plan.cs.vt.edu.3569885.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e05a10eb291cc84cc4f4f52a7416f14c55b419548fcaa661a215f09e5eef7812
3
+ size 20198
runs/Feb20_15-49-17_plan.cs.vt.edu/events.out.tfevents.1708472959.plan.cs.vt.edu.3697382.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10957846cb7f2c0af0a8d18d5d4ab869cbf591c8ffdf139c94245aedd7c9890e
3
+ size 5067
runs/Feb20_15-52-57_plan.cs.vt.edu/events.out.tfevents.1708473184.plan.cs.vt.edu.3697382.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f27ba231d80a65a24706a979eb5aa9d852449ce9f2006225e11f27555e1a08
3
+ size 5116
runs/Feb20_15-54-37_plan.cs.vt.edu/events.out.tfevents.1708473283.plan.cs.vt.edu.3697382.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56fd9a2804f59e4f63066b24e75f61b1fe8357335f9676c8c70844feab690a4
3
+ size 22531
runs/Feb20_16-41-48_plan.cs.vt.edu/events.out.tfevents.1708476123.plan.cs.vt.edu.3697382.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56bfc0cf7cf6a99eeb5ad550b898600384890e08744f468c2f7bc6b7f16236d
3
+ size 346
runs/Feb20_16-42-29_plan.cs.vt.edu/events.out.tfevents.1708476167.plan.cs.vt.edu.3697382.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0634219e6e9c583cb9b2d9ba2dab584bb8ab664f632d8c3d77360468e86b8f07
3
+ size 15172
runs/Feb20_16-49-13_plan.cs.vt.edu/events.out.tfevents.1708476554.plan.cs.vt.edu.3697382.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2268931e4164e0e77e63e1c142777578c78de76043e1a2b93ba2742272cb3a
3
+ size 5068
runs/Feb20_16-49-46_plan.cs.vt.edu/events.out.tfevents.1708476591.plan.cs.vt.edu.3697382.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f046b4a37f5f59784fd5f956981984eb5c3f9442da4a05ba47ea603f6be6f774
3
+ size 22532
runs/Feb20_16-49-46_plan.cs.vt.edu/events.out.tfevents.1708477852.plan.cs.vt.edu.3697382.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f713a0ad5eb7c555067fef62196245e31bb4a68d52309b425b41bddbf23130
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 29.03,
3
- "eval_accuracy": 0.9741935483870968,
4
- "eval_loss": 0.09928672015666962,
5
- "eval_runtime": 13.0498,
6
- "eval_samples_per_second": 11.878,
7
- "eval_steps_per_second": 0.383
8
  }
 
1
  {
2
+ "epoch": 31.01,
3
+ "eval_accuracy": 0.42857142857142855,
4
+ "eval_loss": 1.504156231880188,
5
+ "eval_runtime": 2.2938,
6
+ "eval_samples_per_second": 6.104,
7
+ "eval_steps_per_second": 0.436
8
  }
trainer_state.json CHANGED
@@ -1,498 +1,600 @@
1
  {
2
- "best_metric": 1.0,
3
- "best_model_checkpoint": "MCG-NJU/videomae-large/checkpoint-130",
4
- "epoch": 29.033333333333335,
5
  "eval_steps": 500,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03,
13
- "learning_rate": 1.6666666666666667e-05,
14
- "loss": 2.3203,
15
- "step": 10
16
  },
17
  {
18
  "epoch": 0.03,
19
- "eval_accuracy": 0.15714285714285714,
20
- "eval_loss": 2.199398994445801,
21
- "eval_runtime": 5.6646,
22
- "eval_samples_per_second": 12.357,
23
- "eval_steps_per_second": 0.53,
 
 
 
 
 
 
24
  "step": 10
25
  },
26
  {
27
  "epoch": 1.03,
28
- "learning_rate": 3.3333333333333335e-05,
29
- "loss": 1.9795,
30
- "step": 20
 
 
 
31
  },
32
  {
33
- "epoch": 1.03,
34
- "eval_accuracy": 0.34285714285714286,
35
- "eval_loss": 1.7835056781768799,
36
- "eval_runtime": 6.2612,
37
- "eval_samples_per_second": 11.18,
38
- "eval_steps_per_second": 0.479,
39
- "step": 20
40
  },
41
  {
42
  "epoch": 2.03,
43
- "learning_rate": 5e-05,
44
- "loss": 1.0467,
45
- "step": 30
46
  },
47
  {
48
  "epoch": 2.03,
49
- "eval_accuracy": 0.6571428571428571,
50
- "eval_loss": 0.7310971617698669,
51
- "eval_runtime": 6.125,
52
- "eval_samples_per_second": 11.429,
53
- "eval_steps_per_second": 0.49,
54
- "step": 30
55
  },
56
  {
57
- "epoch": 3.03,
58
- "learning_rate": 4.814814814814815e-05,
59
- "loss": 0.301,
60
- "step": 40
61
  },
62
  {
63
  "epoch": 3.03,
64
- "eval_accuracy": 0.9428571428571428,
65
- "eval_loss": 0.21951383352279663,
66
- "eval_runtime": 5.975,
67
- "eval_samples_per_second": 11.715,
68
- "eval_steps_per_second": 0.502,
69
- "step": 40
 
 
 
 
 
 
70
  },
71
  {
72
  "epoch": 4.03,
73
- "learning_rate": 4.62962962962963e-05,
74
- "loss": 0.1061,
75
- "step": 50
76
  },
77
  {
78
  "epoch": 4.03,
79
- "eval_accuracy": 0.9142857142857143,
80
- "eval_loss": 0.15290319919586182,
81
- "eval_runtime": 6.6932,
82
- "eval_samples_per_second": 10.458,
83
- "eval_steps_per_second": 0.448,
84
- "step": 50
85
  },
86
  {
87
- "epoch": 5.03,
88
- "learning_rate": 4.4444444444444447e-05,
89
- "loss": 0.0499,
90
- "step": 60
91
  },
92
  {
93
  "epoch": 5.03,
94
- "eval_accuracy": 0.9857142857142858,
95
- "eval_loss": 0.08257948607206345,
96
- "eval_runtime": 6.8638,
97
- "eval_samples_per_second": 10.198,
98
- "eval_steps_per_second": 0.437,
99
- "step": 60
100
  },
101
  {
102
- "epoch": 6.03,
103
- "learning_rate": 4.259259259259259e-05,
104
- "loss": 0.079,
105
- "step": 70
106
  },
107
  {
108
  "epoch": 6.03,
109
- "eval_accuracy": 0.9857142857142858,
110
- "eval_loss": 0.05339507758617401,
111
- "eval_runtime": 5.6428,
112
- "eval_samples_per_second": 12.405,
113
- "eval_steps_per_second": 0.532,
114
- "step": 70
 
 
 
 
 
 
115
  },
116
  {
117
  "epoch": 7.03,
118
- "learning_rate": 4.074074074074074e-05,
119
- "loss": 0.0487,
120
- "step": 80
121
  },
122
  {
123
  "epoch": 7.03,
124
- "eval_accuracy": 0.9857142857142858,
125
- "eval_loss": 0.02986798621714115,
126
- "eval_runtime": 5.2266,
127
- "eval_samples_per_second": 13.393,
128
- "eval_steps_per_second": 0.574,
129
- "step": 80
130
  },
131
  {
132
- "epoch": 8.03,
133
- "learning_rate": 3.888888888888889e-05,
134
- "loss": 0.0217,
135
- "step": 90
136
  },
137
  {
138
  "epoch": 8.03,
139
- "eval_accuracy": 0.9,
140
- "eval_loss": 0.32826170325279236,
141
- "eval_runtime": 6.6113,
142
- "eval_samples_per_second": 10.588,
143
- "eval_steps_per_second": 0.454,
144
- "step": 90
 
 
 
 
 
 
145
  },
146
  {
147
  "epoch": 9.03,
148
- "learning_rate": 3.7037037037037037e-05,
149
- "loss": 0.0387,
150
- "step": 100
151
  },
152
  {
153
  "epoch": 9.03,
154
- "eval_accuracy": 0.9857142857142858,
155
- "eval_loss": 0.026779260486364365,
156
- "eval_runtime": 6.3515,
157
- "eval_samples_per_second": 11.021,
158
- "eval_steps_per_second": 0.472,
159
- "step": 100
160
  },
161
  {
162
- "epoch": 10.03,
163
- "learning_rate": 3.518518518518519e-05,
164
- "loss": 0.0252,
165
- "step": 110
166
  },
167
  {
168
  "epoch": 10.03,
169
- "eval_accuracy": 0.9857142857142858,
170
- "eval_loss": 0.03859327733516693,
171
- "eval_runtime": 6.9316,
172
- "eval_samples_per_second": 10.099,
173
- "eval_steps_per_second": 0.433,
174
- "step": 110
175
  },
176
  {
177
- "epoch": 11.03,
178
- "learning_rate": 3.3333333333333335e-05,
179
- "loss": 0.0324,
180
- "step": 120
181
  },
182
  {
183
  "epoch": 11.03,
184
- "eval_accuracy": 0.9,
185
- "eval_loss": 0.3067415654659271,
186
- "eval_runtime": 5.7803,
187
- "eval_samples_per_second": 12.11,
188
- "eval_steps_per_second": 0.519,
189
- "step": 120
 
 
 
 
 
 
190
  },
191
  {
192
  "epoch": 12.03,
193
- "learning_rate": 3.148148148148148e-05,
194
- "loss": 0.0022,
195
- "step": 130
196
  },
197
  {
198
  "epoch": 12.03,
199
- "eval_accuracy": 1.0,
200
- "eval_loss": 0.013092391192913055,
201
- "eval_runtime": 5.2039,
202
- "eval_samples_per_second": 13.451,
203
- "eval_steps_per_second": 0.576,
204
- "step": 130
205
  },
206
  {
207
- "epoch": 13.03,
208
- "learning_rate": 2.962962962962963e-05,
209
- "loss": 0.0115,
210
- "step": 140
211
  },
212
  {
213
  "epoch": 13.03,
214
- "eval_accuracy": 0.9857142857142858,
215
- "eval_loss": 0.08892710506916046,
216
- "eval_runtime": 4.8696,
217
- "eval_samples_per_second": 14.375,
218
- "eval_steps_per_second": 0.616,
219
- "step": 140
 
 
 
 
 
 
220
  },
221
  {
222
  "epoch": 14.03,
223
- "learning_rate": 2.777777777777778e-05,
224
- "loss": 0.0225,
225
- "step": 150
226
  },
227
  {
228
  "epoch": 14.03,
229
- "eval_accuracy": 1.0,
230
- "eval_loss": 0.009070915170013905,
231
- "eval_runtime": 6.0016,
232
- "eval_samples_per_second": 11.663,
233
- "eval_steps_per_second": 0.5,
234
- "step": 150
235
  },
236
  {
237
- "epoch": 15.03,
238
- "learning_rate": 2.5925925925925925e-05,
239
- "loss": 0.0012,
240
- "step": 160
241
  },
242
  {
243
  "epoch": 15.03,
244
- "eval_accuracy": 1.0,
245
- "eval_loss": 0.008068457245826721,
246
- "eval_runtime": 5.6404,
247
- "eval_samples_per_second": 12.411,
248
- "eval_steps_per_second": 0.532,
249
- "step": 160
250
  },
251
  {
252
- "epoch": 16.03,
253
- "learning_rate": 2.4074074074074074e-05,
254
- "loss": 0.001,
255
- "step": 170
256
  },
257
  {
258
  "epoch": 16.03,
259
- "eval_accuracy": 1.0,
260
- "eval_loss": 0.010284548625349998,
261
- "eval_runtime": 5.7893,
262
- "eval_samples_per_second": 12.091,
263
- "eval_steps_per_second": 0.518,
264
- "step": 170
 
 
 
 
 
 
265
  },
266
  {
267
  "epoch": 17.03,
268
- "learning_rate": 2.2222222222222223e-05,
269
- "loss": 0.0255,
270
- "step": 180
271
  },
272
  {
273
  "epoch": 17.03,
274
- "eval_accuracy": 1.0,
275
- "eval_loss": 0.01131558045744896,
276
- "eval_runtime": 6.4736,
277
- "eval_samples_per_second": 10.813,
278
- "eval_steps_per_second": 0.463,
279
- "step": 180
280
  },
281
  {
282
- "epoch": 18.03,
283
- "learning_rate": 2.037037037037037e-05,
284
- "loss": 0.0016,
285
- "step": 190
286
  },
287
  {
288
  "epoch": 18.03,
289
- "eval_accuracy": 0.9857142857142858,
290
- "eval_loss": 0.025160381570458412,
291
- "eval_runtime": 5.7118,
292
- "eval_samples_per_second": 12.255,
293
- "eval_steps_per_second": 0.525,
294
- "step": 190
 
 
 
 
 
 
295
  },
296
  {
297
  "epoch": 19.03,
298
- "learning_rate": 1.8518518518518518e-05,
299
- "loss": 0.0039,
300
- "step": 200
301
  },
302
  {
303
  "epoch": 19.03,
304
- "eval_accuracy": 0.9857142857142858,
305
- "eval_loss": 0.017666514962911606,
306
- "eval_runtime": 5.7688,
307
- "eval_samples_per_second": 12.134,
308
- "eval_steps_per_second": 0.52,
309
- "step": 200
310
  },
311
  {
312
- "epoch": 20.03,
313
- "learning_rate": 1.6666666666666667e-05,
314
- "loss": 0.0007,
315
- "step": 210
316
  },
317
  {
318
  "epoch": 20.03,
319
- "eval_accuracy": 1.0,
320
- "eval_loss": 0.0016855127178132534,
321
- "eval_runtime": 5.9439,
322
- "eval_samples_per_second": 11.777,
323
- "eval_steps_per_second": 0.505,
324
- "step": 210
325
  },
326
  {
327
- "epoch": 21.03,
328
- "learning_rate": 1.4814814814814815e-05,
329
- "loss": 0.0006,
330
- "step": 220
331
  },
332
  {
333
  "epoch": 21.03,
334
- "eval_accuracy": 1.0,
335
- "eval_loss": 0.00132262974511832,
336
- "eval_runtime": 6.2152,
337
- "eval_samples_per_second": 11.263,
338
- "eval_steps_per_second": 0.483,
339
- "step": 220
 
 
 
 
 
 
340
  },
341
  {
342
  "epoch": 22.03,
343
- "learning_rate": 1.2962962962962962e-05,
344
- "loss": 0.0006,
345
- "step": 230
346
  },
347
  {
348
  "epoch": 22.03,
349
- "eval_accuracy": 1.0,
350
- "eval_loss": 0.0012219419004395604,
351
- "eval_runtime": 5.788,
352
- "eval_samples_per_second": 12.094,
353
- "eval_steps_per_second": 0.518,
354
- "step": 230
355
  },
356
  {
357
- "epoch": 23.03,
358
- "learning_rate": 1.1111111111111112e-05,
359
- "loss": 0.0007,
360
- "step": 240
361
  },
362
  {
363
  "epoch": 23.03,
364
- "eval_accuracy": 1.0,
365
- "eval_loss": 0.001058029243722558,
366
- "eval_runtime": 6.1445,
367
- "eval_samples_per_second": 11.392,
368
- "eval_steps_per_second": 0.488,
369
- "step": 240
 
 
 
 
 
 
370
  },
371
  {
372
  "epoch": 24.03,
373
- "learning_rate": 9.259259259259259e-06,
374
- "loss": 0.0005,
375
- "step": 250
376
  },
377
  {
378
  "epoch": 24.03,
379
- "eval_accuracy": 1.0,
380
- "eval_loss": 0.0010857696179300547,
381
- "eval_runtime": 6.974,
382
- "eval_samples_per_second": 10.037,
383
- "eval_steps_per_second": 0.43,
384
- "step": 250
385
  },
386
  {
387
- "epoch": 25.03,
388
- "learning_rate": 7.4074074074074075e-06,
389
- "loss": 0.0005,
390
- "step": 260
391
  },
392
  {
393
  "epoch": 25.03,
394
- "eval_accuracy": 1.0,
395
- "eval_loss": 0.0010956026380881667,
396
- "eval_runtime": 5.6397,
397
- "eval_samples_per_second": 12.412,
398
- "eval_steps_per_second": 0.532,
399
- "step": 260
400
  },
401
  {
402
- "epoch": 26.03,
403
- "learning_rate": 5.555555555555556e-06,
404
- "loss": 0.0005,
405
- "step": 270
406
  },
407
  {
408
  "epoch": 26.03,
409
- "eval_accuracy": 1.0,
410
- "eval_loss": 0.0010995334014296532,
411
- "eval_runtime": 5.1379,
412
- "eval_samples_per_second": 13.624,
413
- "eval_steps_per_second": 0.584,
414
- "step": 270
 
 
 
 
 
 
415
  },
416
  {
417
  "epoch": 27.03,
418
- "learning_rate": 3.7037037037037037e-06,
419
- "loss": 0.0005,
420
- "step": 280
421
  },
422
  {
423
  "epoch": 27.03,
424
- "eval_accuracy": 1.0,
425
- "eval_loss": 0.0010933494195342064,
426
- "eval_runtime": 5.5747,
427
- "eval_samples_per_second": 12.557,
428
- "eval_steps_per_second": 0.538,
429
- "step": 280
430
  },
431
  {
432
- "epoch": 28.03,
433
- "learning_rate": 1.8518518518518519e-06,
434
- "loss": 0.0005,
435
- "step": 290
436
  },
437
  {
438
  "epoch": 28.03,
439
- "eval_accuracy": 1.0,
440
- "eval_loss": 0.0010898270411416888,
441
- "eval_runtime": 5.3602,
442
- "eval_samples_per_second": 13.059,
443
- "eval_steps_per_second": 0.56,
444
- "step": 290
445
  },
446
  {
447
- "epoch": 29.03,
448
- "learning_rate": 0.0,
449
- "loss": 0.0005,
450
- "step": 300
451
  },
452
  {
453
  "epoch": 29.03,
454
- "eval_accuracy": 1.0,
455
- "eval_loss": 0.0010924884118139744,
456
- "eval_runtime": 6.1154,
457
- "eval_samples_per_second": 11.447,
458
- "eval_steps_per_second": 0.491,
459
- "step": 300
460
  },
461
  {
462
  "epoch": 29.03,
463
- "step": 300,
464
- "total_flos": 3.952030329844531e+19,
465
- "train_loss": 0.2041329901261876,
466
- "train_runtime": 1416.8695,
467
- "train_samples_per_second": 6.776,
468
- "train_steps_per_second": 0.212
469
  },
470
  {
471
- "epoch": 29.03,
472
- "eval_accuracy": 0.9741935483870968,
473
- "eval_loss": 0.09911247342824936,
474
- "eval_runtime": 16.8335,
475
- "eval_samples_per_second": 9.208,
476
- "eval_steps_per_second": 0.297,
477
- "step": 300
478
  },
479
  {
480
- "epoch": 29.03,
481
- "eval_accuracy": 0.9741935483870968,
482
- "eval_loss": 0.09928672015666962,
483
- "eval_runtime": 13.0498,
484
- "eval_samples_per_second": 11.878,
485
- "eval_steps_per_second": 0.383,
486
- "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  }
488
  ],
489
- "logging_steps": 10,
490
- "max_steps": 300,
491
  "num_input_tokens_seen": 0,
492
  "num_train_epochs": 9223372036854775807,
493
  "save_steps": 500,
494
- "total_flos": 3.952030329844531e+19,
495
- "train_batch_size": 32,
496
  "trial_name": null,
497
  "trial_params": null
498
  }
 
1
  {
2
+ "best_metric": 0.5666666666666667,
3
+ "best_model_checkpoint": "MCG-NJU/videomae-large/checkpoint-140",
4
+ "epoch": 31.013636363636362,
5
  "eval_steps": 500,
6
+ "global_step": 220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
+ "learning_rate": 1.1363636363636365e-05,
14
+ "loss": 2.6619,
15
+ "step": 5
16
  },
17
  {
18
  "epoch": 0.03,
19
+ "eval_accuracy": 0.0,
20
+ "eval_loss": 2.7016749382019043,
21
+ "eval_runtime": 5.569,
22
+ "eval_samples_per_second": 5.387,
23
+ "eval_steps_per_second": 0.359,
24
+ "step": 7
25
+ },
26
+ {
27
+ "epoch": 1.01,
28
+ "learning_rate": 2.272727272727273e-05,
29
+ "loss": 2.6232,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 1.03,
34
+ "eval_accuracy": 0.0,
35
+ "eval_loss": 2.6628258228302,
36
+ "eval_runtime": 5.363,
37
+ "eval_samples_per_second": 5.594,
38
+ "eval_steps_per_second": 0.373,
39
+ "step": 14
40
  },
41
  {
42
+ "epoch": 2.0,
43
+ "learning_rate": 3.409090909090909e-05,
44
+ "loss": 2.5419,
45
+ "step": 15
 
 
 
46
  },
47
  {
48
  "epoch": 2.03,
49
+ "learning_rate": 4.545454545454546e-05,
50
+ "loss": 2.381,
51
+ "step": 20
52
  },
53
  {
54
  "epoch": 2.03,
55
+ "eval_accuracy": 0.16666666666666666,
56
+ "eval_loss": 2.5797576904296875,
57
+ "eval_runtime": 5.6099,
58
+ "eval_samples_per_second": 5.348,
59
+ "eval_steps_per_second": 0.357,
60
+ "step": 21
61
  },
62
  {
63
+ "epoch": 3.02,
64
+ "learning_rate": 4.9242424242424245e-05,
65
+ "loss": 2.2215,
66
+ "step": 25
67
  },
68
  {
69
  "epoch": 3.03,
70
+ "eval_accuracy": 0.16666666666666666,
71
+ "eval_loss": 2.4757392406463623,
72
+ "eval_runtime": 5.5008,
73
+ "eval_samples_per_second": 5.454,
74
+ "eval_steps_per_second": 0.364,
75
+ "step": 28
76
+ },
77
+ {
78
+ "epoch": 4.01,
79
+ "learning_rate": 4.797979797979798e-05,
80
+ "loss": 1.8738,
81
+ "step": 30
82
  },
83
  {
84
  "epoch": 4.03,
85
+ "learning_rate": 4.671717171717172e-05,
86
+ "loss": 1.7389,
87
+ "step": 35
88
  },
89
  {
90
  "epoch": 4.03,
91
+ "eval_accuracy": 0.23333333333333334,
92
+ "eval_loss": 2.363579511642456,
93
+ "eval_runtime": 6.4613,
94
+ "eval_samples_per_second": 4.643,
95
+ "eval_steps_per_second": 0.31,
96
+ "step": 35
97
  },
98
  {
99
+ "epoch": 5.02,
100
+ "learning_rate": 4.545454545454546e-05,
101
+ "loss": 1.3366,
102
+ "step": 40
103
  },
104
  {
105
  "epoch": 5.03,
106
+ "eval_accuracy": 0.3,
107
+ "eval_loss": 2.2424137592315674,
108
+ "eval_runtime": 5.7626,
109
+ "eval_samples_per_second": 5.206,
110
+ "eval_steps_per_second": 0.347,
111
+ "step": 42
112
  },
113
  {
114
+ "epoch": 6.01,
115
+ "learning_rate": 4.41919191919192e-05,
116
+ "loss": 1.1946,
117
+ "step": 45
118
  },
119
  {
120
  "epoch": 6.03,
121
+ "eval_accuracy": 0.3,
122
+ "eval_loss": 2.167524814605713,
123
+ "eval_runtime": 5.8137,
124
+ "eval_samples_per_second": 5.16,
125
+ "eval_steps_per_second": 0.344,
126
+ "step": 49
127
+ },
128
+ {
129
+ "epoch": 7.0,
130
+ "learning_rate": 4.292929292929293e-05,
131
+ "loss": 0.8832,
132
+ "step": 50
133
  },
134
  {
135
  "epoch": 7.03,
136
+ "learning_rate": 4.166666666666667e-05,
137
+ "loss": 0.6809,
138
+ "step": 55
139
  },
140
  {
141
  "epoch": 7.03,
142
+ "eval_accuracy": 0.36666666666666664,
143
+ "eval_loss": 2.0548174381256104,
144
+ "eval_runtime": 5.6032,
145
+ "eval_samples_per_second": 5.354,
146
+ "eval_steps_per_second": 0.357,
147
+ "step": 56
148
  },
149
  {
150
+ "epoch": 8.02,
151
+ "learning_rate": 4.0404040404040405e-05,
152
+ "loss": 0.5255,
153
+ "step": 60
154
  },
155
  {
156
  "epoch": 8.03,
157
+ "eval_accuracy": 0.4,
158
+ "eval_loss": 2.0410492420196533,
159
+ "eval_runtime": 5.8803,
160
+ "eval_samples_per_second": 5.102,
161
+ "eval_steps_per_second": 0.34,
162
+ "step": 63
163
+ },
164
+ {
165
+ "epoch": 9.01,
166
+ "learning_rate": 3.9141414141414145e-05,
167
+ "loss": 0.4759,
168
+ "step": 65
169
  },
170
  {
171
  "epoch": 9.03,
172
+ "learning_rate": 3.787878787878788e-05,
173
+ "loss": 0.3285,
174
+ "step": 70
175
  },
176
  {
177
  "epoch": 9.03,
178
+ "eval_accuracy": 0.4,
179
+ "eval_loss": 1.9539462327957153,
180
+ "eval_runtime": 6.0204,
181
+ "eval_samples_per_second": 4.983,
182
+ "eval_steps_per_second": 0.332,
183
+ "step": 70
184
  },
185
  {
186
+ "epoch": 10.02,
187
+ "learning_rate": 3.661616161616162e-05,
188
+ "loss": 0.2849,
189
+ "step": 75
190
  },
191
  {
192
  "epoch": 10.03,
193
+ "eval_accuracy": 0.4666666666666667,
194
+ "eval_loss": 1.8536347150802612,
195
+ "eval_runtime": 5.2379,
196
+ "eval_samples_per_second": 5.727,
197
+ "eval_steps_per_second": 0.382,
198
+ "step": 77
199
  },
200
  {
201
+ "epoch": 11.01,
202
+ "learning_rate": 3.535353535353535e-05,
203
+ "loss": 0.1832,
204
+ "step": 80
205
  },
206
  {
207
  "epoch": 11.03,
208
+ "eval_accuracy": 0.43333333333333335,
209
+ "eval_loss": 1.8293204307556152,
210
+ "eval_runtime": 5.7575,
211
+ "eval_samples_per_second": 5.211,
212
+ "eval_steps_per_second": 0.347,
213
+ "step": 84
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "learning_rate": 3.409090909090909e-05,
218
+ "loss": 0.1485,
219
+ "step": 85
220
  },
221
  {
222
  "epoch": 12.03,
223
+ "learning_rate": 3.282828282828283e-05,
224
+ "loss": 0.1307,
225
+ "step": 90
226
  },
227
  {
228
  "epoch": 12.03,
229
+ "eval_accuracy": 0.4,
230
+ "eval_loss": 1.8200174570083618,
231
+ "eval_runtime": 5.6546,
232
+ "eval_samples_per_second": 5.305,
233
+ "eval_steps_per_second": 0.354,
234
+ "step": 91
235
  },
236
  {
237
+ "epoch": 13.02,
238
+ "learning_rate": 3.1565656565656566e-05,
239
+ "loss": 0.0901,
240
+ "step": 95
241
  },
242
  {
243
  "epoch": 13.03,
244
+ "eval_accuracy": 0.4,
245
+ "eval_loss": 1.8354666233062744,
246
+ "eval_runtime": 5.7638,
247
+ "eval_samples_per_second": 5.205,
248
+ "eval_steps_per_second": 0.347,
249
+ "step": 98
250
+ },
251
+ {
252
+ "epoch": 14.01,
253
+ "learning_rate": 3.0303030303030306e-05,
254
+ "loss": 0.0757,
255
+ "step": 100
256
  },
257
  {
258
  "epoch": 14.03,
259
+ "learning_rate": 2.904040404040404e-05,
260
+ "loss": 0.0636,
261
+ "step": 105
262
  },
263
  {
264
  "epoch": 14.03,
265
+ "eval_accuracy": 0.43333333333333335,
266
+ "eval_loss": 1.8200985193252563,
267
+ "eval_runtime": 5.0989,
268
+ "eval_samples_per_second": 5.884,
269
+ "eval_steps_per_second": 0.392,
270
+ "step": 105
271
  },
272
  {
273
+ "epoch": 15.02,
274
+ "learning_rate": 2.777777777777778e-05,
275
+ "loss": 0.0413,
276
+ "step": 110
277
  },
278
  {
279
  "epoch": 15.03,
280
+ "eval_accuracy": 0.4666666666666667,
281
+ "eval_loss": 1.7749541997909546,
282
+ "eval_runtime": 5.2291,
283
+ "eval_samples_per_second": 5.737,
284
+ "eval_steps_per_second": 0.382,
285
+ "step": 112
286
  },
287
  {
288
+ "epoch": 16.01,
289
+ "learning_rate": 2.6515151515151516e-05,
290
+ "loss": 0.0427,
291
+ "step": 115
292
  },
293
  {
294
  "epoch": 16.03,
295
+ "eval_accuracy": 0.5333333333333333,
296
+ "eval_loss": 1.745997667312622,
297
+ "eval_runtime": 5.2765,
298
+ "eval_samples_per_second": 5.686,
299
+ "eval_steps_per_second": 0.379,
300
+ "step": 119
301
+ },
302
+ {
303
+ "epoch": 17.0,
304
+ "learning_rate": 2.5252525252525256e-05,
305
+ "loss": 0.0369,
306
+ "step": 120
307
  },
308
  {
309
  "epoch": 17.03,
310
+ "learning_rate": 2.398989898989899e-05,
311
+ "loss": 0.0254,
312
+ "step": 125
313
  },
314
  {
315
  "epoch": 17.03,
316
+ "eval_accuracy": 0.5333333333333333,
317
+ "eval_loss": 1.7804018259048462,
318
+ "eval_runtime": 5.5848,
319
+ "eval_samples_per_second": 5.372,
320
+ "eval_steps_per_second": 0.358,
321
+ "step": 126
322
  },
323
  {
324
+ "epoch": 18.02,
325
+ "learning_rate": 2.272727272727273e-05,
326
+ "loss": 0.0203,
327
+ "step": 130
328
  },
329
  {
330
  "epoch": 18.03,
331
+ "eval_accuracy": 0.43333333333333335,
332
+ "eval_loss": 1.8868685960769653,
333
+ "eval_runtime": 5.6579,
334
+ "eval_samples_per_second": 5.302,
335
+ "eval_steps_per_second": 0.353,
336
+ "step": 133
337
+ },
338
+ {
339
+ "epoch": 19.01,
340
+ "learning_rate": 2.1464646464646466e-05,
341
+ "loss": 0.0231,
342
+ "step": 135
343
  },
344
  {
345
  "epoch": 19.03,
346
+ "learning_rate": 2.0202020202020203e-05,
347
+ "loss": 0.0174,
348
+ "step": 140
349
  },
350
  {
351
  "epoch": 19.03,
352
+ "eval_accuracy": 0.5666666666666667,
353
+ "eval_loss": 1.7740839719772339,
354
+ "eval_runtime": 5.4112,
355
+ "eval_samples_per_second": 5.544,
356
+ "eval_steps_per_second": 0.37,
357
+ "step": 140
358
  },
359
  {
360
+ "epoch": 20.02,
361
+ "learning_rate": 1.893939393939394e-05,
362
+ "loss": 0.0154,
363
+ "step": 145
364
  },
365
  {
366
  "epoch": 20.03,
367
+ "eval_accuracy": 0.5333333333333333,
368
+ "eval_loss": 1.7400553226470947,
369
+ "eval_runtime": 5.673,
370
+ "eval_samples_per_second": 5.288,
371
+ "eval_steps_per_second": 0.353,
372
+ "step": 147
373
  },
374
  {
375
+ "epoch": 21.01,
376
+ "learning_rate": 1.7676767676767676e-05,
377
+ "loss": 0.0136,
378
+ "step": 150
379
  },
380
  {
381
  "epoch": 21.03,
382
+ "eval_accuracy": 0.5,
383
+ "eval_loss": 1.7672396898269653,
384
+ "eval_runtime": 5.5489,
385
+ "eval_samples_per_second": 5.406,
386
+ "eval_steps_per_second": 0.36,
387
+ "step": 154
388
+ },
389
+ {
390
+ "epoch": 22.0,
391
+ "learning_rate": 1.6414141414141416e-05,
392
+ "loss": 0.0123,
393
+ "step": 155
394
  },
395
  {
396
  "epoch": 22.03,
397
+ "learning_rate": 1.5151515151515153e-05,
398
+ "loss": 0.0116,
399
+ "step": 160
400
  },
401
  {
402
  "epoch": 22.03,
403
+ "eval_accuracy": 0.5333333333333333,
404
+ "eval_loss": 1.7792834043502808,
405
+ "eval_runtime": 5.7051,
406
+ "eval_samples_per_second": 5.258,
407
+ "eval_steps_per_second": 0.351,
408
+ "step": 161
409
  },
410
  {
411
+ "epoch": 23.02,
412
+ "learning_rate": 1.388888888888889e-05,
413
+ "loss": 0.0123,
414
+ "step": 165
415
  },
416
  {
417
  "epoch": 23.03,
418
+ "eval_accuracy": 0.4666666666666667,
419
+ "eval_loss": 1.8018161058425903,
420
+ "eval_runtime": 5.5773,
421
+ "eval_samples_per_second": 5.379,
422
+ "eval_steps_per_second": 0.359,
423
+ "step": 168
424
+ },
425
+ {
426
+ "epoch": 24.01,
427
+ "learning_rate": 1.2626262626262628e-05,
428
+ "loss": 0.0093,
429
+ "step": 170
430
  },
431
  {
432
  "epoch": 24.03,
433
+ "learning_rate": 1.1363636363636365e-05,
434
+ "loss": 0.0102,
435
+ "step": 175
436
  },
437
  {
438
  "epoch": 24.03,
439
+ "eval_accuracy": 0.5,
440
+ "eval_loss": 1.8023875951766968,
441
+ "eval_runtime": 5.4661,
442
+ "eval_samples_per_second": 5.488,
443
+ "eval_steps_per_second": 0.366,
444
+ "step": 175
445
  },
446
  {
447
+ "epoch": 25.02,
448
+ "learning_rate": 1.0101010101010101e-05,
449
+ "loss": 0.0103,
450
+ "step": 180
451
  },
452
  {
453
  "epoch": 25.03,
454
+ "eval_accuracy": 0.5,
455
+ "eval_loss": 1.8057912588119507,
456
+ "eval_runtime": 5.5758,
457
+ "eval_samples_per_second": 5.38,
458
+ "eval_steps_per_second": 0.359,
459
+ "step": 182
460
  },
461
  {
462
+ "epoch": 26.01,
463
+ "learning_rate": 8.838383838383838e-06,
464
+ "loss": 0.0089,
465
+ "step": 185
466
  },
467
  {
468
  "epoch": 26.03,
469
+ "eval_accuracy": 0.5,
470
+ "eval_loss": 1.810552954673767,
471
+ "eval_runtime": 5.6149,
472
+ "eval_samples_per_second": 5.343,
473
+ "eval_steps_per_second": 0.356,
474
+ "step": 189
475
+ },
476
+ {
477
+ "epoch": 27.0,
478
+ "learning_rate": 7.5757575757575764e-06,
479
+ "loss": 0.009,
480
+ "step": 190
481
  },
482
  {
483
  "epoch": 27.03,
484
+ "learning_rate": 6.313131313131314e-06,
485
+ "loss": 0.0088,
486
+ "step": 195
487
  },
488
  {
489
  "epoch": 27.03,
490
+ "eval_accuracy": 0.5,
491
+ "eval_loss": 1.8028618097305298,
492
+ "eval_runtime": 5.3559,
493
+ "eval_samples_per_second": 5.601,
494
+ "eval_steps_per_second": 0.373,
495
+ "step": 196
496
  },
497
  {
498
+ "epoch": 28.02,
499
+ "learning_rate": 5.050505050505051e-06,
500
+ "loss": 0.0092,
501
+ "step": 200
502
  },
503
  {
504
  "epoch": 28.03,
505
+ "eval_accuracy": 0.5,
506
+ "eval_loss": 1.7960565090179443,
507
+ "eval_runtime": 5.2538,
508
+ "eval_samples_per_second": 5.71,
509
+ "eval_steps_per_second": 0.381,
510
+ "step": 203
511
  },
512
  {
513
+ "epoch": 29.01,
514
+ "learning_rate": 3.7878787878787882e-06,
515
+ "loss": 0.0082,
516
+ "step": 205
517
  },
518
  {
519
  "epoch": 29.03,
520
+ "learning_rate": 2.5252525252525253e-06,
521
+ "loss": 0.0083,
522
+ "step": 210
 
 
 
523
  },
524
  {
525
  "epoch": 29.03,
526
+ "eval_accuracy": 0.5,
527
+ "eval_loss": 1.7939893007278442,
528
+ "eval_runtime": 5.0992,
529
+ "eval_samples_per_second": 5.883,
530
+ "eval_steps_per_second": 0.392,
531
+ "step": 210
532
  },
533
  {
534
+ "epoch": 30.02,
535
+ "learning_rate": 1.2626262626262627e-06,
536
+ "loss": 0.0099,
537
+ "step": 215
 
 
 
538
  },
539
  {
540
+ "epoch": 30.03,
541
+ "eval_accuracy": 0.5,
542
+ "eval_loss": 1.7922049760818481,
543
+ "eval_runtime": 5.3556,
544
+ "eval_samples_per_second": 5.602,
545
+ "eval_steps_per_second": 0.373,
546
+ "step": 217
547
+ },
548
+ {
549
+ "epoch": 31.01,
550
+ "learning_rate": 0.0,
551
+ "loss": 0.0085,
552
+ "step": 220
553
+ },
554
+ {
555
+ "epoch": 31.01,
556
+ "eval_accuracy": 0.5,
557
+ "eval_loss": 1.7919764518737793,
558
+ "eval_runtime": 5.1737,
559
+ "eval_samples_per_second": 5.799,
560
+ "eval_steps_per_second": 0.387,
561
+ "step": 220
562
+ },
563
+ {
564
+ "epoch": 31.01,
565
+ "step": 220,
566
+ "total_flos": 1.5320910961010737e+19,
567
+ "train_loss": 0.5185655888847329,
568
+ "train_runtime": 1238.2272,
569
+ "train_samples_per_second": 2.843,
570
+ "train_steps_per_second": 0.178
571
+ },
572
+ {
573
+ "epoch": 31.01,
574
+ "eval_accuracy": 0.42857142857142855,
575
+ "eval_loss": 1.504156231880188,
576
+ "eval_runtime": 2.3481,
577
+ "eval_samples_per_second": 5.962,
578
+ "eval_steps_per_second": 0.426,
579
+ "step": 220
580
+ },
581
+ {
582
+ "epoch": 31.01,
583
+ "eval_accuracy": 0.42857142857142855,
584
+ "eval_loss": 1.504156231880188,
585
+ "eval_runtime": 2.2938,
586
+ "eval_samples_per_second": 6.104,
587
+ "eval_steps_per_second": 0.436,
588
+ "step": 220
589
  }
590
  ],
591
+ "logging_steps": 5,
592
+ "max_steps": 220,
593
  "num_input_tokens_seen": 0,
594
  "num_train_epochs": 9223372036854775807,
595
  "save_steps": 500,
596
+ "total_flos": 1.5320910961010737e+19,
597
+ "train_batch_size": 16,
598
  "trial_name": null,
599
  "trial_params": null
600
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3454bc7aa261c0482f31475bec6bc2ac80d03b78757742a3642beabe3f48f8f2
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4ef8e9fe8d9207ef60f4ef95e43a78337eeb8ead430b1b5a800c027fd54c2d
3
  size 4728