File size: 2,890 Bytes
6710be9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02db9d7
6710be9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{
  "architectures": [
    "SMAForSSL"
  ],
  "attention_dropout_prob": 0.0,
  "cross_attention_widening_factor": 1,
  "cross_eval_noising_args": null,
  "cross_train_noising_args": [
    [
      "RandomlySelectedCrossAttentionMasking",
      {
        "exclude_seen_reconstruction": true,
        "head_aggregation": "random_mix",
        "mask_self": true,
        "masking_ratio": 0.2,
        "num_per_query": 3,
        "select_initial_ratio": 1.0,
        "varying_length": true
      }
    ]
  ],
  "decoder_attention_channels": 128,
  "decoder_heads": 1,
  "decoder_latent_channels": 128,
  "decoder_type": "cross_attention",
  "dense_use_bias": true,
  "drop_path_rate": 0.0,
  "embedded_channels": 128,
  "encoder_cross_attention_channels": 128,
  "encoder_type": "self_attention",
  "final_project": true,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "initializer_range": 0.02,
  "input_channels": 1,
  "input_type": "continuous",
  "latent_channels": 128,
  "layer_norm_eps": 1e-12,
  "layernorm_eps": 1e-12,
  "loss_fn": "mse",
  "max_position_embeddings": 28,
  "model_type": "sma",
  "num_blocks": 1,
  "num_cross_attention_heads": 8,
  "num_discrete_tokens": 262,
  "num_latents": 128,
  "num_outputs": 2048,
  "num_self_attends_per_block": 4,
  "num_self_attention_heads": 8,
  "output_channels": 262,
  "pe_initializer_range": 0.02,
  "post_decoder_layers": null,
  "project_after_concat": true,
  "qk_channels": 128,
  "self_attention_widening_factor": 1,
  "share_decoder_queries": true,
  "share_embedding_weights": true,
  "teacher_args": {
    "auxiliary_loss_fn": "mse",
    "auxiliary_loss_weight": 1.0,
    "ema_args": {
      "ema_decay_end": 0.0,
      "ema_decay_start": 0.0
    },
    "eval_transform_args": [
      [
        "RandomlySelectedCrossAttentionMasking",
        {
          "exclude_seen_reconstruction": true,
          "head_aggregation": "random_mix",
          "mask_self": true,
          "masking_ratio": 0.2,
          "num_per_query": 3,
          "select_initial_ratio": 1.0,
          "varying_length": true
        }
      ]
    ],
    "mask_replace": 3,
    "num_layer_target_avg": null,
    "reconstruction_decoder_args": {
      "num_heads": 8,
      "num_outputs": 28,
      "output_channels": 1,
      "qk_channels": 128,
      "query_num_channels": 128,
      "share_decoder_queries": true,
      "share_embedding_weights": true,
      "use_query_residual": true,
      "v_channels": 128
    },
    "reconstruction_loss_fn": "mse",
    "reconstruction_loss_weight": 1.0,
    "reconstruction_weighted_loss": false,
    "target_normalization_fn": "layernorm",
    "train_transform_args": null
  },
  "teacher_name": "ReconstructionTeacher",
  "torch_dtype": "float32",
  "transformers_version": "4.26.0.dev0",
  "use_decoder": false,
  "use_position_embeddings": true,
  "use_query_residual": true,
  "v_channels": 128
}