File size: 2,898 Bytes
f52bf17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
  "cfg": {
    "amp": false,
    "chunk_size": -1,
    "flip_aug": false,
    "long_term": {
      "buffer_tokens": 2000,
      "count_usage": true,
      "max_mem_frames": 10,
      "max_num_tokens": 10000,
      "min_mem_frames": 5,
      "num_prototypes": 128
    },
    "max_internal_size": -1,
    "max_mem_frames": 5,
    "mem_every": 5,
    "model": {
      "aux_loss": {
        "query": {
          "enabled": true,
          "weight": 0.01
        },
        "sensory": {
          "enabled": true,
          "weight": 0.01
        }
      },
      "embed_dim": 256,
      "key_dim": 64,
      "mask_decoder": {
        "up_dims": [
          256,
          128,
          128,
          64,
          16
        ]
      },
      "mask_encoder": {
        "final_dim": 256,
        "type": "resnet18"
      },
      "object_summarizer": {
        "add_pe": true,
        "embed_dim": "${model.object_transformer.embed_dim}",
        "num_summaries": "${model.object_transformer.num_queries}"
      },
      "object_transformer": {
        "embed_dim": "${model.embed_dim}",
        "ff_dim": 2048,
        "num_blocks": 3,
        "num_heads": 8,
        "num_queries": 16,
        "pixel_self_attention": {
          "add_pe_to_qkv": [
            true,
            true,
            false
          ]
        },
        "query_self_attention": {
          "add_pe_to_qkv": [
            true,
            true,
            false
          ]
        },
        "read_from_memory": {
          "add_pe_to_qkv": [
            true,
            true,
            false
          ]
        },
        "read_from_past": {
          "add_pe_to_qkv": [
            true,
            true,
            false
          ]
        },
        "read_from_pixel": {
          "add_pe_to_qkv": [
            true,
            true,
            false
          ],
          "input_add_pe": false,
          "input_norm": false
        },
        "read_from_query": {
          "add_pe_to_qkv": [
            true,
            true,
            false
          ],
          "output_norm": false
        }
      },
      "pixel_dim": 256,
      "pixel_encoder": {
        "ms_dims": [
          1024,
          512,
          256,
          64,
          3
        ],
        "type": "resnet50"
      },
      "pixel_mean": [
        0.485,
        0.456,
        0.406
      ],
      "pixel_pe_scale": 32,
      "pixel_pe_temperature": 128,
      "pixel_std": [
        0.229,
        0.224,
        0.225
      ],
      "pretrained_resnet": false,
      "sensory_dim": 256,
      "value_dim": 256
    },
    "output_dir": null,
    "save_all": true,
    "save_aux": false,
    "save_scores": false,
    "stagger_updates": 5,
    "top_k": 30,
    "use_all_masks": false,
    "use_long_term": false,
    "visualize": false,
    "weights": "pretrained_models/matanyone.pth"
  },
  "single_object": true
}