fffiloni commited on
Commit
26fdcb7
1 Parent(s): 2338cb6

Delete configs

Browse files
configs/xdecoder/svlp_focalt_lang.yaml DELETED
@@ -1,110 +0,0 @@
1
- # --------------------------------------------------------
2
- # X-Decoder -- Generalized Decoding for Pixel, Image, and Language
3
- # Copyright (c) 2022 Microsoft
4
- # Licensed under The MIT License [see LICENSE for details]
5
- # Written by Xueyan Zou (xueyan@cs.wisc.edu)
6
- # --------------------------------------------------------
7
-
8
- ##################
9
- # Task settings
10
- ##################
11
- VERBOSE: true
12
- MODEL:
13
- NAME: xdecoder_model
14
- HEAD: xdecoder_head
15
- DIM_PROJ: 512
16
- BACKBONE_DIM: 768
17
- TEXT:
18
- ARCH: vlpencoder
19
- NAME: transformer
20
- TOKENIZER: clip
21
- CONTEXT_LENGTH: 77 # 77
22
- WIDTH: 512
23
- HEADS: 8
24
- LAYERS: 12 # 6
25
- AUTOGRESSIVE: True
26
- BACKBONE:
27
- NAME: focal_dw
28
- PRETRAINED: ''
29
- LOAD_PRETRAINED: false
30
- FOCAL:
31
- PRETRAIN_IMG_SIZE: 224
32
- PATCH_SIZE: 4
33
- EMBED_DIM: 96
34
- DEPTHS: [2, 2, 6, 2]
35
- FOCAL_LEVELS: [3, 3, 3, 3]
36
- FOCAL_WINDOWS: [3, 3, 3, 3]
37
- DROP_PATH_RATE: 0.3
38
- MLP_RATIO: 4.0
39
- DROP_RATE: 0.0
40
- PATCH_NORM: True
41
- USE_CONV_EMBED: True
42
- SCALING_MODULATOR: True
43
- USE_CHECKPOINT: False
44
- USE_POSTLN: true
45
- USE_POSTLN_IN_MODULATION: false
46
- USE_LAYERSCALE: True
47
- OUT_FEATURES: ["res2", "res3", "res4", "res5"]
48
- OUT_INDICES: [0, 1, 2, 3]
49
- ENCODER:
50
- NAME: transformer_encoder_fpn
51
- IGNORE_VALUE: 255
52
- NUM_CLASSES: 133
53
- LOSS_WEIGHT: 1.0
54
- CONVS_DIM: 512
55
- MASK_DIM: 512
56
- NORM: "GN"
57
- IN_FEATURES: ["res2", "res3", "res4", "res5"]
58
- DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
59
- COMMON_STRIDE: 4
60
- TRANSFORMER_ENC_LAYERS: 6
61
- DECODER:
62
- NAME: xdecoder
63
- TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
64
- MASK: True
65
- GROUNDING:
66
- ENABLED: True
67
- MAX_LEN: 5
68
- TEXT_WEIGHT: 2.0
69
- CLASS_WEIGHT: 0.5
70
- DETECTION: False
71
- CAPTION:
72
- ENABLED: True
73
- PHRASE_PROB: 0.0
74
- SIM_THRES: 0.95
75
- CAPTIONING:
76
- ENABLED: True
77
- STEP: 50
78
- RETRIEVAL:
79
- ENABLED: True
80
- DIM_IMG: 768
81
- ENSEMBLE: True
82
- HIDDEN_DIM: 512
83
- NUM_OBJECT_QUERIES: 101
84
- NHEADS: 8
85
- DROPOUT: 0.0
86
- DIM_FEEDFORWARD: 2048
87
- PRE_NORM: False
88
- ENFORCE_INPUT_PROJ: False
89
- SIZE_DIVISIBILITY: 32
90
- TRAIN_NUM_POINTS: 12544
91
- OVERSAMPLE_RATIO: 3.0
92
- IMPORTANCE_SAMPLE_RATIO: 0.75
93
- DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query
94
- TOP_GROUNDING_LAYERS: 3
95
- TOP_CAPTION_LAYERS: 3
96
- TOP_CAPTIONING_LAYERS: 3
97
- TOP_RETRIEVAL_LAYERS: 3
98
- TOP_OPENIMAGE_LAYERS: 10
99
- TEST:
100
- SEMANTIC_ON: True
101
- INSTANCE_ON: True
102
- PANOPTIC_ON: True
103
- OVERLAP_THRESHOLD: 0.8
104
- OBJECT_MASK_THRESHOLD: 0.4
105
- SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false
106
- DETECTIONS_PER_IMAGE: 100
107
-
108
- INPUT:
109
- PIXEL_MEAN: [123.675, 116.280, 103.530]
110
- PIXEL_STD: [58.395, 57.120, 57.375]