sowa / SOWA /configs /model /sparc_hfwa.yaml
zongxiang's picture
Upload 116 files
7fe0374 verified
_target_: src.models.anomaly_clip_module.AnomalyCLIPModule
optimizer:
_target_: torch.optim.AdamW
_partial_: true
lr: 0.001
weight_decay: 0.2
scheduler:
_target_: torch.optim.lr_scheduler.ReduceLROnPlateau
_partial_: true
mode: min
factor: 0.1
patience: 5
# scheduler:
# _target_: src.models.components.scheduler.WarmupCosineAnnealingLR
# _partial_: true
# warmup_epochs: 10
# total_epoch: 50
net:
_target_: src.models.components.sparc.SPARC
arch: ViT-L/14@336px
image_size: 336
temperature: 0.07 # softmax
feature_map_idx: [5, 11, 17, 23] # [0, 12, 23] [6, 12, 18] [5, 11, 17, 23] index of resnetblock in ViT
prompt_learner:
_target_: src.models.components.coop.AnomalyPromptLearner
_partial_: true
tokenizer:
_target_: src.models.components.clip.simple_tokenizer.SimpleTokenizer
prompt_length: 12 # length of learnable prompts
context_length: 77 # defaut 77 for openai clip
truncate: false
class_names: ["object"]
# class_names: ${prompt.class_names}
# state_template: ${prompt.state_template}
state_template:
normal: ["{}"]
anomaly: ["damaged {}"]
text_encoder:
_target_: src.models.components.text_encoder.TextMapEncoder
_partial_: true
adapter:
_target_: src.models.components.adapter.BasicLayer
_partial_: true
input_resolution: [24, 24] # (image_size - kerner_size) / stride + 1. eg. 24 = (224 - 14) / 14 + 1
window_size: 12
depth: 1 # if depth < 2, thers is no window shift
num_heads: 8
hidden_features: null # set null, same as nn.Linear
cpb_dim: 64
value_only: true
drop: 0.0
attn_drop: 0.1
fusion:
_target_: src.models.components.cross_modal.DotProductFusion
embedding_dim: 768 # clip fusion featrue dim, default 768, only effective for non null
loss:
cross_entropy:
_target_: torch.nn.CrossEntropyLoss
focal:
_target_: src.models.components.loss.FocalLoss
dice:
_target_: src.models.components.loss.BinaryDiceLoss
k_shot: false
filter: true
enable_validation: false
compile: false