JonasGeiping commited on
Commit
a61dd96
1 Parent(s): f1830f0

Upload arch_budget_hours_24.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. arch_budget_hours_24.json +55 -0
arch_budget_hours_24.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ScriptableCrammedBERT"
4
+ ],
5
+ "num_transformer_layers": 16,
6
+ "hidden_size": 768,
7
+ "intermed_size": 3072,
8
+ "hidden_dropout_prob": 0.1,
9
+ "norm": "LayerNorm",
10
+ "norm_eps": 1e-12,
11
+ "norm_scheme": "pre",
12
+ "nonlin": "GELUglu",
13
+ "tie_weights": true,
14
+ "decoder_bias": false,
15
+ "sparse_prediction": 0.25,
16
+ "loss": "cross-entropy",
17
+ "objective_layout": "MLM",
18
+ "embedding": {
19
+ "vocab_size": null,
20
+ "pos_embedding": "scaled-sinusoidal",
21
+ "dropout_prob": 0.1,
22
+ "pad_token_id": 0,
23
+ "max_seq_length": 128,
24
+ "embedding_dim": 768,
25
+ "normalization": true,
26
+ "stable_low_precision": false
27
+ },
28
+ "attention": {
29
+ "type": "self-attention",
30
+ "causal_attention": false,
31
+ "num_attention_heads": 12,
32
+ "dropout_prob": 0.1,
33
+ "skip_output_projection": false,
34
+ "qkv_bias": false,
35
+ "rotary_embedding": false,
36
+ "seq_op_in_fp32": false,
37
+ "sequence_op": "torch-softmax"
38
+ },
39
+ "init": {
40
+ "type": "normal",
41
+ "std": 0.02
42
+ },
43
+ "ffn_layer_frequency": 1,
44
+ "skip_head_transform": true,
45
+ "use_bias": false,
46
+ "final_norm": true,
47
+ "num_labels": null,
48
+ "classification_head": {
49
+ "pooler": "avg",
50
+ "include_ff_layer": true,
51
+ "head_dim": 1024,
52
+ "nonlin": "Tanh",
53
+ "classifier_dropout": 0.1
54
+ }
55
+ }