File size: 1,275 Bytes
b0eb4bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "_name_or_path": "/usr/local/google/home/vishvak/Projects/datamux-pretraining-root/checkpoints/release_checkpoints/bert/base/gaussian_hadamard_index/qqp/10_gaussian_hadamard_index_5e-05_1_0_3_100000_128_4/checkpoint-100000",
  "architectures": [
    "MuxedBertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "binary_hadamard_epsilon": 0.0,
  "demuxing_variant": "index",
  "finetuning_task": "qqp",
  "gaussian_hadamard_norm": 1.0,
  "gradient_checkpointing": false,
  "head_temperature": 100.0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "hierarchical_softmax_buckets": 100,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "learn_muxing": 0,
  "legacy_demuxing": false,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "muxing_variant": "gaussian_hadamard",
  "num_attention_heads": 12,
  "num_hidden_demux_layers": 3,
  "num_hidden_layers": 12,
  "num_instances": 10,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "retrieval_loss_coeff": 0.0,
  "retrieval_percentage": 1.0,
  "task_loss_coeff": 1.0,
  "transformers_version": "4.4.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "use_hierarchical_softmax": false,
  "vocab_size": 30522
}