Markk commited on
Commit
08396ef
1 Parent(s): 734bf93

Upload v2 model

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +17 -65
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bd3f6a9c08e12f0995b8987b6aa0be12f6f347b899e266e2a788f00e75afe7d
3
  size 10524831541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb1dfe9e3fb3e4565d46305a1c359d55c6d170b2ea69871ebfc0920f690f10a
3
  size 10524831541
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eca4b63e00c0cf643a0b4cbbeda9874195025489d5dd449f7db2b9e80ab0db0
3
  size 5363104725
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209f413dc67f384b46885e98571da206fc6c4457c89d15a2ff44ca826f01b17d
3
  size 5363104725
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b77d9a5a81ac8b69e0670f5933e23ce7468b78385324a6bbb8b86fd0ba74fb6b
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369852fda86a4e5f67261d035b21273f6fcf461fb59c7bbda54eb04e9cbed42f
3
  size 15597
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b77c67ca3fb0d7234d39f939d0791074eda83a0f90bb8b136d320c3a473fa62
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93dcbb27405421069b5f31003e3f224646e375ff4c9d7ba4b0d265614f660d0f
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35d421c455531e443ef68bd5042b7f963d2347c483d8dfcb6aeb3c14fd4ba202
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4ad8ded642fb7c5cc9603be3aae650f1ddaee94d88bc96085f01683245bac67
3
  size 627
trainer_state.json CHANGED
@@ -1,88 +1,40 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.875739644970414,
5
- "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.74,
12
- "learning_rate": 4.96e-05,
13
- "loss": 1.8134,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 1.48,
18
- "learning_rate": 4.996328645447816e-05,
19
- "loss": 1.3742,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 2.22,
24
- "learning_rate": 4.992627683197632e-05,
25
- "loss": 1.122,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 2.96,
30
- "learning_rate": 4.988934122871947e-05,
31
- "loss": 0.8645,
32
  "step": 2000
33
- },
34
- {
35
- "epoch": 3.7,
36
- "learning_rate": 4.9852553663952626e-05,
37
- "loss": 0.6174,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 4.44,
42
- "learning_rate": 4.9815618060695786e-05,
43
- "loss": 0.5355,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 5.18,
48
- "learning_rate": 4.977868245743893e-05,
49
- "loss": 0.4336,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 5.92,
54
- "learning_rate": 4.974167283493709e-05,
55
- "loss": 0.3695,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 6.66,
60
- "learning_rate": 4.970466321243524e-05,
61
- "loss": 0.3013,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 7.4,
66
- "learning_rate": 4.966772760917839e-05,
67
- "loss": 0.2954,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 8.14,
72
- "learning_rate": 4.963071798667654e-05,
73
- "loss": 0.2812,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 8.88,
78
- "learning_rate": 4.959370836417469e-05,
79
- "loss": 0.2718,
80
- "step": 6000
81
  }
82
  ],
83
- "max_steps": 676000,
84
- "num_train_epochs": 1000,
85
- "total_flos": 1.2118642683671347e+17,
86
  "trial_name": null,
87
  "trial_params": null
88
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.760546642899584,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.19,
12
+ "learning_rate": 1.9920000000000002e-05,
13
+ "loss": 1.8071,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 2.38,
18
+ "learning_rate": 3.9920000000000004e-05,
19
+ "loss": 1.2711,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 3.57,
24
+ "learning_rate": 3.951414634146342e-05,
25
+ "loss": 0.9311,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 4.76,
30
+ "learning_rate": 3.9026341463414634e-05,
31
+ "loss": 0.6448,
32
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
  ],
35
+ "max_steps": 42000,
36
+ "num_train_epochs": 100,
37
+ "total_flos": 1.4237038007161651e+17,
38
  "trial_name": null,
39
  "trial_params": null
40
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9949c173097804108f0625268b24c63f3f1c70d9342dfee2164c0e5dc8f16ef3
3
  size 3259
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79a1524a3e0cf9ed521d405e1d37b687d354f40a4093feacd6350515bf4392c1
3
  size 3259