mgh6 commited on
Commit
6fa80b4
1 Parent(s): d8661f7

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -3,12 +3,12 @@
3
  "architectures": [
4
  "EsmForMaskedLM"
5
  ],
6
- "attention_probs_dropout_prob": 0.0,
7
  "classifier_dropout": null,
8
  "emb_layer_norm_before": false,
9
  "esmfold_config": null,
10
  "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.0,
12
  "hidden_size": 480,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 1920,
 
3
  "architectures": [
4
  "EsmForMaskedLM"
5
  ],
6
+ "attention_probs_dropout_prob": 0.01,
7
  "classifier_dropout": null,
8
  "emb_layer_norm_before": false,
9
  "esmfold_config": null,
10
  "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.01,
12
  "hidden_size": 480,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 1920,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83ce41a972129fb0b1551fb265aaeb45dcb8ef48ed776e2a9693ee89609dc74c
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54523ee02a2389771e25d39b96df920a04fddbc3f888f4e4a1dc10c14a900999
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd3a95f8d68a6d3fb84e9ede2ee7c143d1c5afcd1d769a243d8ca98d8f33aa79
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9c915286f9878c2b1a2926e419498689b5e3e4a19b9adea6de4edafe20fa82d
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef9e4df5ee72c5c3f44486d5dca78bea54cda14da3592af33eedfde3caef4154
3
- size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a36eda00bdae5afbc5bdeebfcbdace8a0365966fa57c020bed29406600ec49
3
+ size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ca222bf68a3564a804857106a49feb87b18dba9a0be2082fc90028354b2ec2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c11d4e32c92ddc1610815e78968750931b353aa09e9bb0384d11e35312cefd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,138 +1,19 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.335782247380465,
5
  "eval_steps": 500,
6
- "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.24087679152113695,
13
- "grad_norm": 0.5820891857147217,
14
  "learning_rate": 0.000991969806472336,
15
- "loss": 1.5141,
16
  "step": 1000
17
- },
18
- {
19
- "epoch": 0.4817535830422739,
20
- "grad_norm": 0.4833473861217499,
21
- "learning_rate": 0.0009839396129446719,
22
- "loss": 1.3712,
23
- "step": 2000
24
- },
25
- {
26
- "epoch": 0.7226303745634108,
27
- "grad_norm": 0.49527350068092346,
28
- "learning_rate": 0.0009759094194170079,
29
- "loss": 1.3169,
30
- "step": 3000
31
- },
32
- {
33
- "epoch": 0.9635071660845478,
34
- "grad_norm": 0.5308498740196228,
35
- "learning_rate": 0.000967879225889344,
36
- "loss": 1.2857,
37
- "step": 4000
38
- },
39
- {
40
- "epoch": 1.2043839576056847,
41
- "grad_norm": 0.46151238679885864,
42
- "learning_rate": 0.00095984903236168,
43
- "loss": 1.2532,
44
- "step": 5000
45
- },
46
- {
47
- "epoch": 1.4452607491268217,
48
- "grad_norm": 0.4826602339744568,
49
- "learning_rate": 0.0009518188388340159,
50
- "loss": 1.2336,
51
- "step": 6000
52
- },
53
- {
54
- "epoch": 1.6861375406479586,
55
- "grad_norm": 0.5108029246330261,
56
- "learning_rate": 0.0009437886453063518,
57
- "loss": 1.2128,
58
- "step": 7000
59
- },
60
- {
61
- "epoch": 1.9270143321690956,
62
- "grad_norm": 0.4216555655002594,
63
- "learning_rate": 0.0009357584517786879,
64
- "loss": 1.1934,
65
- "step": 8000
66
- },
67
- {
68
- "epoch": 2.1678911236902323,
69
- "grad_norm": 0.5326149463653564,
70
- "learning_rate": 0.0009277282582510239,
71
- "loss": 1.176,
72
- "step": 9000
73
- },
74
- {
75
- "epoch": 2.4087679152113695,
76
- "grad_norm": 0.4811168313026428,
77
- "learning_rate": 0.0009196980647233599,
78
- "loss": 1.1599,
79
- "step": 10000
80
- },
81
- {
82
- "epoch": 2.649644706732506,
83
- "grad_norm": 0.5096309781074524,
84
- "learning_rate": 0.0009116678711956958,
85
- "loss": 1.1506,
86
- "step": 11000
87
- },
88
- {
89
- "epoch": 2.8905214982536434,
90
- "grad_norm": 0.479825496673584,
91
- "learning_rate": 0.0009036376776680318,
92
- "loss": 1.1389,
93
- "step": 12000
94
- },
95
- {
96
- "epoch": 3.13139828977478,
97
- "grad_norm": 0.4735005795955658,
98
- "learning_rate": 0.0008956074841403679,
99
- "loss": 1.1269,
100
- "step": 13000
101
- },
102
- {
103
- "epoch": 3.3722750812959172,
104
- "grad_norm": 0.5051562190055847,
105
- "learning_rate": 0.0008875772906127038,
106
- "loss": 1.116,
107
- "step": 14000
108
- },
109
- {
110
- "epoch": 3.613151872817054,
111
- "grad_norm": 0.48939937353134155,
112
- "learning_rate": 0.0008795470970850398,
113
- "loss": 1.11,
114
- "step": 15000
115
- },
116
- {
117
- "epoch": 3.854028664338191,
118
- "grad_norm": 0.43153703212738037,
119
- "learning_rate": 0.0008715169035573758,
120
- "loss": 1.0959,
121
- "step": 16000
122
- },
123
- {
124
- "epoch": 4.094905455859328,
125
- "grad_norm": 0.4543096125125885,
126
- "learning_rate": 0.0008634867100297117,
127
- "loss": 1.0903,
128
- "step": 17000
129
- },
130
- {
131
- "epoch": 4.335782247380465,
132
- "grad_norm": 0.4869837164878845,
133
- "learning_rate": 0.0008554565165020477,
134
- "loss": 1.0827,
135
- "step": 18000
136
  }
137
  ],
138
  "logging_steps": 1000,
@@ -152,7 +33,7 @@
152
  "attributes": {}
153
  }
154
  },
155
- "total_flos": 1.8006571361081754e+17,
156
  "train_batch_size": 64,
157
  "trial_name": null,
158
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.24087679152113695,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.24087679152113695,
13
+ "grad_norm": 0.5082331299781799,
14
  "learning_rate": 0.000991969806472336,
15
+ "loss": 1.4852,
16
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 1000,
 
33
  "attributes": {}
34
  }
35
  },
36
+ "total_flos": 1.0003876480548864e+16,
37
  "train_batch_size": 64,
38
  "trial_name": null,
39
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:774cac0098b0da6548d23bcab6e1119da0b3be368b10412f62ce5bbf50ef0802
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e18ec5730a38ec86822609eef175321578f30afb709c8cd04e2b0522a0e28c7
3
  size 5112