File size: 2,319 Bytes
7fe0374
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
_target_: src.models.anomaly_clip_module.AnomalyCLIPModule

optimizer:
  _target_: torch.optim.AdamW
  _partial_: true
  lr: 0.001
  weight_decay: 0.2

scheduler:
  _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
  _partial_: true
  mode: min
  factor: 0.1
  patience: 5

# scheduler:
#   _target_: src.models.components.scheduler.WarmupCosineAnnealingLR
#   _partial_: true
#   warmup_epochs: 10
#   total_epoch: 50

net:
  _target_: src.models.components..sparc.SPARC
  arch: ViT-L/14@336px
  image_size: 336
  temperature: 0.07                     # softmax
  feature_map_idx: [5, 11, 17, 23]      # [0, 12, 23] [6, 12, 18] [5, 11, 17, 23] index of resnetblock in ViT
  share_weight: true                    # whether the adapter shares weights for different feature maps
  prompt_learner:
    _target_: src.models.components.coop.PromptEncoder
    _partial_: true
    tokenizer:
      _target_: src.models.components.clip.simple_tokenizer.SimpleTokenizer
    context_length: 77                    # defaut 77 for openai clip
    truncate: false
    class_names: ${prompt.class_names}
    prompt_normal: ${prompt.template.normal}
    prompt_abnormal: ${prompt.template.abnormal}
    prompt_templates: ${prompt.template.templates}
  adapter:
    _target_: torch.nn.Linear
    in_features: 1024                   # clip vit feature dim, defaut 1024 for openai clip
    out_features: 1024
    bias: false
    # _target_: src.models.components.adapter.BasicLayer
    # _partial_: true
    # input_resolution: [24, 24]          # (image_size - kerner_size) / stride + 1. eg. 24 = (224 - 14) / 14 + 1
    # window_size: 12
    # depth: 1                            # if depth < 2, thers is no window shift
    # num_heads: 8
    # hidden_features: null               # set null, same as nn.Linear
    # cpb_dim: 64
    # value_only: true
    # drop: 0.0
    # attn_drop: 0.1
  fusion:
    _target_: src.models.components.cross_modal.DotProductFusion
  embedding_dim: 768                    # clip fusion featrue dim, default 768, only effective for non null

loss:
  cross_entropy:
    _target_: torch.nn.CrossEntropyLoss
  focal:
    _target_: src.models.components.loss.FocalLoss
  dice:
    _target_: src.models.components.loss.BinaryDiceLoss

k_shot: false

filter: true

enable_validation: false

compile: false