jinlinyi commited on
Commit
19658e6
1 Parent(s): 22539eb
.gitattributes CHANGED
@@ -32,3 +32,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ assets/imgs/cityscape.jpg filter=lfs diff=lfs merge=lfs -text
36
+ assets/imgs/i4.png filter=lfs diff=lfs merge=lfs -text
37
+ models/cvpr2023.pth filter=lfs diff=lfs merge=lfs -text
38
+ models/paramnet_gsv_rpfpp.pth filter=lfs diff=lfs merge=lfs -text
39
+ models/paramnet_gsv_rpf.pth filter=lfs diff=lfs merge=lfs -text
40
+ assets/imgs/907px-Vincent_van_Gogh_-_De_slaapkamer_-_Google_Art_Project.jpg filter=lfs diff=lfs merge=lfs -text
41
+ assets/imgs/AdobeStock_286429091.jpeg filter=lfs diff=lfs merge=lfs -text
42
+ assets/imgs/AdobeStock_331358641.jpeg filter=lfs diff=lfs merge=lfs -text
43
+ assets/imgs/ filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ token = os.environ["GITHUB_TOKEN"]
4
+ os.system(f"pip install git+https://jinlinyi:{token}@github.com:jinlinyi/PerspectiveFields.git@dev")
5
+
6
+ import gradio as gr
7
+ import cv2
8
+ import copy
9
+ import torch
10
+ from PIL import Image, ImageDraw
11
+ from glob import glob
12
+ import numpy as np
13
+ import os.path as osp
14
+ from detectron2.config import get_cfg
15
+ from detectron2.data.detection_utils import read_image
16
+ from perspective2d.utils.predictor import VisualizationDemo
17
+ import perspective2d.modeling # noqa
18
+ from perspective2d.config import get_perspective2d_cfg_defaults
19
+ from perspective2d.utils import draw_from_r_p_f_cx_cy
20
+
21
+
22
+
23
+
24
+ title = "Perspective Fields Demo"
25
+
26
+ description = """
27
+ <p style="text-align: center">
28
+ <a href="https://jinlinyi.github.io/PerspectiveFields/" target="_blank">Project Page</a> |
29
+ <a href="https://arxiv.org/abs/2212.03239" target="_blank">Paper</a> |
30
+ <a href="https://github.com/jinlinyi/PerspectiveFields" target="_blank">Code</a> |
31
+ <a href="https://www.youtube.com/watch?v=sN5B_ZvMva8&themeRefresh=1" target="_blank">Video</a>
32
+ </p>
33
+ <h2>Gradio Demo</h2>
34
+ <p>Try our Gradio demo for Perspective Fields for single image camera calibration. You can click on one of the provided examples or upload your own image.</p>
35
+ <h3>Available Models:</h3>
36
+ <ol>
37
+ <li><strong>PersNet-360Cities:</strong> PerspectiveNet trained on the 360Cities dataset. This model predicts perspective fields and is designed to be robust and generalize well to both indoor and outdoor images.</li>
38
+ <li><strong>PersNet_Paramnet-GSV-uncentered:</strong> A combination of PerspectiveNet and ParamNet trained on the Google Street View (GSV) dataset. This model predicts camera Roll, Pitch, and Field of View (FoV), as well as the Principal Point location.</li>
39
+ <li><strong>PersNet_Paramnet-GSV-centered:</strong> PerspectiveNet+ParamNet trained on the GSV dataset. This model assumes the principal point is at the center of the image and predicts camera Roll, Pitch, and FoV.</li>
40
+ </ol>
41
+ """
42
+
43
+
44
+ article = """
45
+ <p style='text-align: center'><a href='https://arxiv.org/abs/2212.03239' target='_blank'>Perspective Fields for Single Image Camera Calibrations</a> | <a href='https://github.com/jinlinyi/PerspectiveFields' target='_blank'>Github Repo</a></p>
46
+ """
47
+
48
+ def setup_cfg(args):
49
+ cfgs = []
50
+ configs = args['config_file'].split('#')
51
+ weights_id = args['opts'].index('MODEL.WEIGHTS') + 1
52
+ weights = args['opts'][weights_id].split('#')
53
+ for i, conf in enumerate(configs):
54
+ if len(conf) != 0:
55
+ tmp_opts = copy.deepcopy(args['opts'])
56
+ tmp_opts[weights_id] = weights[i]
57
+ cfg = get_cfg()
58
+ get_perspective2d_cfg_defaults(cfg)
59
+ cfg.merge_from_file(conf)
60
+ cfg.merge_from_list(tmp_opts)
61
+ cfg.freeze()
62
+ cfgs.append(cfg)
63
+ return cfgs
64
+
65
+ def resize_fix_aspect_ratio(img, field, target_width=None, target_height=None):
66
+ height = img.shape[0]
67
+ width = img.shape[1]
68
+ if target_height is None:
69
+ factor = target_width / width
70
+ elif target_width is None:
71
+ factor = target_height / height
72
+ else:
73
+ factor = max(target_width / width, target_height / height)
74
+ if factor == target_width / width:
75
+ target_height = int(height * factor)
76
+ else:
77
+ target_width = int(width * factor)
78
+
79
+ img = cv2.resize(img, (target_width, target_height))
80
+ for key in field:
81
+ if key not in ['up', 'lati']:
82
+ continue
83
+ tmp = field[key].numpy()
84
+ transpose = len(tmp.shape) == 3
85
+ if transpose:
86
+ tmp = tmp.transpose(1,2,0)
87
+ tmp = cv2.resize(tmp, (target_width, target_height))
88
+ if transpose:
89
+ tmp = tmp.transpose(2,0,1)
90
+ field[key] = torch.tensor(tmp)
91
+ return img, field
92
+
93
+
94
+ def inference(img, model_type):
95
+ perspective_cfg_list = setup_cfg(model_zoo[model_type])
96
+ demo = VisualizationDemo(cfg_list=perspective_cfg_list)
97
+
98
+ # img = read_image(image_path, format="BGR")
99
+ img = img[..., ::-1] # rgb->bgr
100
+ pred = demo.run_on_image(img)
101
+ field = {
102
+ 'up': pred['pred_gravity_original'].cpu().detach(),
103
+ 'lati': pred['pred_latitude_original'].cpu().detach(),
104
+ }
105
+ img, field = resize_fix_aspect_ratio(img, field, 640)
106
+ if not model_zoo[model_type]['param']:
107
+ pred_vis = demo.draw(
108
+ image=img,
109
+ latimap=field['lati'],
110
+ gravity=field['up'],
111
+ latimap_format=pred['pred_latitude_original_mode'],
112
+ ).get_image()
113
+ param = "Not Implemented"
114
+ else:
115
+ if 'pred_general_vfov' not in pred.keys():
116
+ pred['pred_general_vfov'] = pred['pred_vfov']
117
+ if 'pred_rel_cx' not in pred.keys():
118
+ pred['pred_rel_cx'] = torch.FloatTensor([0])
119
+ if 'pred_rel_cy' not in pred.keys():
120
+ pred['pred_rel_cy'] = torch.FloatTensor([0])
121
+
122
+ r_p_f_rad = np.radians(
123
+ [
124
+ pred['pred_roll'].cpu().item(),
125
+ pred['pred_pitch'].cpu().item(),
126
+ pred['pred_general_vfov'].cpu().item(),
127
+ ]
128
+ )
129
+ cx_cy = [
130
+ pred['pred_rel_cx'].cpu().item(),
131
+ pred['pred_rel_cy'].cpu().item(),
132
+ ]
133
+ param = f"roll {pred['pred_roll'].cpu().item() :.2f}\npitch {pred['pred_pitch'].cpu().item() :.2f}\nfov {pred['pred_general_vfov'].cpu().item() :.2f}\n"
134
+ param += f"principal point {pred['pred_rel_cx'].cpu().item() :.2f} {pred['pred_rel_cy'].cpu().item() :.2f}"
135
+ pred_vis = draw_from_r_p_f_cx_cy(
136
+ img[:,:,::-1],
137
+ *r_p_f_rad,
138
+ *cx_cy,
139
+ 'rad',
140
+ up_color=(0,1,0),
141
+ )
142
+ return Image.fromarray(pred_vis), param
143
+
144
+ examples = []
145
+ for img_name in glob('assets/imgs/*.*g'):
146
+ examples.append([img_name])
147
+ print(examples)
148
+
149
+ model_zoo = {
150
+ 'PersNet-360Cities': {
151
+ 'weights': ['https://www.dropbox.com/s/czqrepqe7x70b7y/cvpr2023.pth'],
152
+ 'opts': ['MODEL.WEIGHTS', 'models/cvpr2023.pth'],
153
+ 'config_file': 'models/cvpr2023.yaml',
154
+ 'param': False,
155
+ },
156
+ 'PersNet_Paramnet-GSV-uncentered': {
157
+ 'weights': ['https://www.dropbox.com/s/ufdadxigewakzlz/paramnet_gsv_rpfpp.pth'],
158
+ 'opts': ['MODEL.WEIGHTS', 'models/paramnet_gsv_rpfpp.pth'],
159
+ 'config_file': 'models/paramnet_gsv_rpfpp.yaml',
160
+ 'param': True,
161
+ },
162
+ # trained on GSV dataset, predicts Perspective Fields + camera parameters (roll, pitch, fov), assuming centered principal point
163
+ 'PersNet_Paramnet-GSV-centered': {
164
+ 'weights': ['https://www.dropbox.com/s/g6xwbgnkggapyeu/paramnet_gsv_rpf.pth'],
165
+ 'opts': ['MODEL.WEIGHTS', 'models/paramnet_gsv_rpf.pth'],
166
+ 'config_file': 'models/paramnet_gsv_rpf.yaml',
167
+ 'param': True,
168
+ },
169
+ }
170
+ for model_id in model_zoo[model_id]:
171
+ html = model_zoo[model_id]['weights']
172
+ if not os.path.exists(os.path.join('models', html.split('/')[-1])):
173
+ os.system(f"wget -P models/ {html}")
174
+
175
+ info = """Select model\n"""
176
+ gr.Interface(
177
+ fn=inference,
178
+ inputs=[
179
+ "image",
180
+ gr.Radio(
181
+ list(model_zoo.keys()),
182
+ value=list(sorted(model_zoo.keys()))[0],
183
+ label="Model",
184
+ info=info,
185
+ ),
186
+ ],
187
+ outputs=[gr.Image(label='Perspective Fields'), gr.Textbox(label='Pred Camera Parameters')],
188
+ title=title,
189
+ description=description,
190
+ article=article,
191
+ examples=examples,
192
+ ).launch(share=True)
assets/imgs/907px-Vincent_van_Gogh_-_De_slaapkamer_-_Google_Art_Project.jpg ADDED

Git LFS Details

  • SHA256: ae7438d983c5d9e101e152d69aa7406062008eb3dad62838ff54582341bc5e03
  • Pointer size: 131 Bytes
  • Size of remote file: 238 kB
assets/imgs/AdobeStock_286429091.jpeg ADDED

Git LFS Details

  • SHA256: ffabb0b4ec9cff8008b56a8da336a142d02f078df9d44985cd02cbcb4a31ea0c
  • Pointer size: 132 Bytes
  • Size of remote file: 2.87 MB
assets/imgs/AdobeStock_331358641.jpeg ADDED

Git LFS Details

  • SHA256: c0c351c7354bd2bed17279f910434d84142d4eb249228da10d5cec79de8141e8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.36 MB
assets/imgs/cityscape.jpg ADDED

Git LFS Details

  • SHA256: b94997a7fd59f80d41f302fee151ab31ef8bda84d7ecbb811c72d07e93a0831d
  • Pointer size: 131 Bytes
  • Size of remote file: 292 kB
assets/imgs/i4.png ADDED

Git LFS Details

  • SHA256: 9ed83a9912a073326766736b8869cb5bdfee6b7539995bc55719a7517f73e755
  • Pointer size: 132 Bytes
  • Size of remote file: 1.3 MB
models/cvpr2023.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7107c87663d08cd45159f5c36913bba4c5e42c6d424d065c7e662481e88bab4
3
+ size 307275893
models/cvpr2023.yaml ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ AUGMENTATION: true
5
+ AUGMENTATION_FUN: default
6
+ AUGMENTATION_TYPE: geometry
7
+ FILTER_EMPTY_ANNOTATIONS: true
8
+ NUM_WORKERS: 8
9
+ REPEAT_THRESHOLD: 0.0
10
+ RESIZE:
11
+ - 320
12
+ - 320
13
+ SAMPLER_TRAIN: TrainingSampler
14
+ DATASETS:
15
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
16
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
17
+ PROPOSAL_FILES_TEST: []
18
+ PROPOSAL_FILES_TRAIN: []
19
+ TEST:
20
+ - stanford2d3d_test
21
+ - tartanair_test
22
+ TRAIN:
23
+ - cities360_train
24
+ - rgbdpano_train
25
+ DEBUG_ON: false
26
+ GLOBAL:
27
+ HACK: 1.0
28
+ INPUT:
29
+ CROP:
30
+ ENABLED: false
31
+ SIZE:
32
+ - 0.9
33
+ - 0.9
34
+ TYPE: relative_range
35
+ FORMAT: BGR
36
+ MASK_FORMAT: polygon
37
+ MAX_SIZE_TEST: 1333
38
+ MAX_SIZE_TRAIN: 1333
39
+ MIN_SIZE_TEST: 800
40
+ MIN_SIZE_TRAIN:
41
+ - 800
42
+ MIN_SIZE_TRAIN_SAMPLING: choice
43
+ ONLINE_CROP: false
44
+ RANDOM_FLIP: horizontal
45
+ MODEL:
46
+ ANCHOR_GENERATOR:
47
+ ANGLES:
48
+ - - -90
49
+ - 0
50
+ - 90
51
+ ASPECT_RATIOS:
52
+ - - 0.5
53
+ - 1.0
54
+ - 2.0
55
+ NAME: DefaultAnchorGenerator
56
+ OFFSET: 0.0
57
+ SIZES:
58
+ - - 32
59
+ - 64
60
+ - 128
61
+ - 256
62
+ - 512
63
+ BACKBONE:
64
+ FREEZE_AT: 2
65
+ NAME: build_mit_backbone
66
+ CENTER_ON: false
67
+ DEVICE: cuda
68
+ FPN:
69
+ FUSE_TYPE: sum
70
+ IN_FEATURES: []
71
+ NORM: ''
72
+ OUT_CHANNELS: 256
73
+ FPN_CENTER_HEAD:
74
+ COMMON_STRIDE: 4
75
+ CONVS_DIM: 128
76
+ IGNORE_VALUE: 360
77
+ IN_FEATURES:
78
+ - p2
79
+ - p3
80
+ - p4
81
+ - p5
82
+ LOSS_WEIGHT: 1.0
83
+ NAME: CenterFPNHead
84
+ NORM: GN
85
+ NUM_CLASSES: 30
86
+ FPN_GRAVITY_HEAD:
87
+ COMMON_STRIDE: 4
88
+ CONVS_DIM: 128
89
+ IGNORE_VALUE: 360
90
+ IN_FEATURES:
91
+ - p2
92
+ - p3
93
+ - p4
94
+ - p5
95
+ LOSS_WEIGHT: 1.0
96
+ NAME: GravityFPNHead
97
+ NORM: GN
98
+ NUM_CLASSES: 361
99
+ FPN_HEADS:
100
+ NAME: StandardFPNHeads
101
+ FPN_LATITUDE_HEAD:
102
+ COMMON_STRIDE: 4
103
+ CONVS_DIM: 128
104
+ IGNORE_VALUE: -1
105
+ IN_FEATURES:
106
+ - p2
107
+ - p3
108
+ - p4
109
+ - p5
110
+ LOSS_WEIGHT: 1.0
111
+ NAME: LatitudeFPNHead
112
+ NORM: GN
113
+ NUM_CLASSES: 9
114
+ FREEZE: []
115
+ GRAVITY_DECODER:
116
+ IGNORE_VALUE: 72
117
+ LOSS_TYPE: classification
118
+ LOSS_WEIGHT: 1.0
119
+ NAME: GravityDecoder
120
+ NUM_CLASSES: 73
121
+ GRAVITY_ON: true
122
+ HEIGHT_DECODER:
123
+ LOSS_WEIGHT: 1.0
124
+ NAME: HeightDecoder
125
+ HEIGHT_ON: false
126
+ KEYPOINT_ON: false
127
+ LATITUDE_DECODER:
128
+ IGNORE_VALUE: -1
129
+ LOSS_TYPE: classification
130
+ LOSS_WEIGHT: 1.0
131
+ NAME: LatitudeDecoder
132
+ NUM_CLASSES: 180
133
+ LATITUDE_ON: true
134
+ LOAD_PROPOSALS: false
135
+ MASK_ON: false
136
+ META_ARCHITECTURE: PersFormer
137
+ PANOPTIC_FPN:
138
+ COMBINE:
139
+ ENABLED: true
140
+ INSTANCES_CONFIDENCE_THRESH: 0.5
141
+ OVERLAP_THRESH: 0.5
142
+ STUFF_AREA_LIMIT: 4096
143
+ INSTANCE_LOSS_WEIGHT: 1.0
144
+ PARAM_DECODER:
145
+ DEBUG_LAT: false
146
+ DEBUG_UP: false
147
+ INPUT_SIZE: 320
148
+ LOSS_TYPE: regression
149
+ LOSS_WEIGHT: 1.0
150
+ NAME: ParamNet
151
+ PREDICT_PARAMS:
152
+ - roll
153
+ - pitch
154
+ - rel_focal
155
+ - rel_cx
156
+ - rel_cy
157
+ SYNTHETIC_PRETRAIN: false
158
+ PERSFORMER_HEADS:
159
+ NAME: StandardPersformerHeads
160
+ PIXEL_MEAN:
161
+ - 103.53
162
+ - 116.28
163
+ - 123.675
164
+ PIXEL_STD:
165
+ - 1.0
166
+ - 1.0
167
+ - 1.0
168
+ PROPOSAL_GENERATOR:
169
+ MIN_SIZE: 0
170
+ NAME: RPN
171
+ RECOVER_PP: false
172
+ RECOVER_RPF: false
173
+ RESNETS:
174
+ DEFORM_MODULATED: false
175
+ DEFORM_NUM_GROUPS: 1
176
+ DEFORM_ON_PER_STAGE:
177
+ - false
178
+ - false
179
+ - false
180
+ - false
181
+ DEPTH: 50
182
+ NORM: FrozenBN
183
+ NUM_GROUPS: 1
184
+ OUT_FEATURES:
185
+ - res4
186
+ RES2_OUT_CHANNELS: 256
187
+ RES5_DILATION: 1
188
+ STEM_OUT_CHANNELS: 64
189
+ STRIDE_IN_1X1: true
190
+ WIDTH_PER_GROUP: 64
191
+ RETINANET:
192
+ BBOX_REG_LOSS_TYPE: smooth_l1
193
+ BBOX_REG_WEIGHTS: &id002
194
+ - 1.0
195
+ - 1.0
196
+ - 1.0
197
+ - 1.0
198
+ FOCAL_LOSS_ALPHA: 0.25
199
+ FOCAL_LOSS_GAMMA: 2.0
200
+ IN_FEATURES:
201
+ - p3
202
+ - p4
203
+ - p5
204
+ - p6
205
+ - p7
206
+ IOU_LABELS:
207
+ - 0
208
+ - -1
209
+ - 1
210
+ IOU_THRESHOLDS:
211
+ - 0.4
212
+ - 0.5
213
+ NMS_THRESH_TEST: 0.5
214
+ NORM: ''
215
+ NUM_CLASSES: 80
216
+ NUM_CONVS: 4
217
+ PRIOR_PROB: 0.01
218
+ SCORE_THRESH_TEST: 0.05
219
+ SMOOTH_L1_LOSS_BETA: 0.1
220
+ TOPK_CANDIDATES_TEST: 1000
221
+ ROI_BOX_CASCADE_HEAD:
222
+ BBOX_REG_WEIGHTS:
223
+ - &id001
224
+ - 10.0
225
+ - 10.0
226
+ - 5.0
227
+ - 5.0
228
+ - - 20.0
229
+ - 20.0
230
+ - 10.0
231
+ - 10.0
232
+ - - 30.0
233
+ - 30.0
234
+ - 15.0
235
+ - 15.0
236
+ IOUS:
237
+ - 0.5
238
+ - 0.6
239
+ - 0.7
240
+ ROI_BOX_HEAD:
241
+ BBOX_REG_LOSS_TYPE: smooth_l1
242
+ BBOX_REG_LOSS_WEIGHT: 1.0
243
+ BBOX_REG_WEIGHTS: *id001
244
+ CLS_AGNOSTIC_BBOX_REG: false
245
+ CONV_DIM: 256
246
+ FC_DIM: 1024
247
+ NAME: ''
248
+ NORM: ''
249
+ NUM_CONV: 0
250
+ NUM_FC: 0
251
+ POOLER_RESOLUTION: 14
252
+ POOLER_SAMPLING_RATIO: 0
253
+ POOLER_TYPE: ROIAlignV2
254
+ SMOOTH_L1_BETA: 0.0
255
+ TRAIN_ON_PRED_BOXES: false
256
+ ROI_HEADS:
257
+ BATCH_SIZE_PER_IMAGE: 512
258
+ IN_FEATURES:
259
+ - res4
260
+ IOU_LABELS:
261
+ - 0
262
+ - 1
263
+ IOU_THRESHOLDS:
264
+ - 0.5
265
+ NAME: Res5ROIHeads
266
+ NMS_THRESH_TEST: 0.5
267
+ NUM_CLASSES: 80
268
+ POSITIVE_FRACTION: 0.25
269
+ PROPOSAL_APPEND_GT: true
270
+ SCORE_THRESH_TEST: 0.05
271
+ ROI_KEYPOINT_HEAD:
272
+ CONV_DIMS:
273
+ - 512
274
+ - 512
275
+ - 512
276
+ - 512
277
+ - 512
278
+ - 512
279
+ - 512
280
+ - 512
281
+ LOSS_WEIGHT: 1.0
282
+ MIN_KEYPOINTS_PER_IMAGE: 1
283
+ NAME: KRCNNConvDeconvUpsampleHead
284
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
285
+ NUM_KEYPOINTS: 17
286
+ POOLER_RESOLUTION: 14
287
+ POOLER_SAMPLING_RATIO: 0
288
+ POOLER_TYPE: ROIAlignV2
289
+ ROI_MASK_HEAD:
290
+ CLS_AGNOSTIC_MASK: false
291
+ CONV_DIM: 256
292
+ NAME: MaskRCNNConvUpsampleHead
293
+ NORM: ''
294
+ NUM_CONV: 0
295
+ POOLER_RESOLUTION: 14
296
+ POOLER_SAMPLING_RATIO: 0
297
+ POOLER_TYPE: ROIAlignV2
298
+ RPN:
299
+ BATCH_SIZE_PER_IMAGE: 256
300
+ BBOX_REG_LOSS_TYPE: smooth_l1
301
+ BBOX_REG_LOSS_WEIGHT: 1.0
302
+ BBOX_REG_WEIGHTS: *id002
303
+ BOUNDARY_THRESH: -1
304
+ CONV_DIMS:
305
+ - -1
306
+ HEAD_NAME: StandardRPNHead
307
+ IN_FEATURES:
308
+ - res4
309
+ IOU_LABELS:
310
+ - 0
311
+ - -1
312
+ - 1
313
+ IOU_THRESHOLDS:
314
+ - 0.3
315
+ - 0.7
316
+ LOSS_WEIGHT: 1.0
317
+ NMS_THRESH: 0.7
318
+ POSITIVE_FRACTION: 0.5
319
+ POST_NMS_TOPK_TEST: 1000
320
+ POST_NMS_TOPK_TRAIN: 2000
321
+ PRE_NMS_TOPK_TEST: 6000
322
+ PRE_NMS_TOPK_TRAIN: 12000
323
+ SMOOTH_L1_BETA: 0.0
324
+ SEM_SEG_HEAD:
325
+ COMMON_STRIDE: 4
326
+ CONVS_DIM: 128
327
+ IGNORE_VALUE: 255
328
+ IN_FEATURES:
329
+ - p2
330
+ - p3
331
+ - p4
332
+ - p5
333
+ LOSS_WEIGHT: 1.0
334
+ NAME: SemSegFPNHead
335
+ NORM: GN
336
+ NUM_CLASSES: 54
337
+ WEIGHTS: init_model_weights/ade_pretrained.pth
338
+ OUTPUT_DIR: /home/jinlinyi/exp/densefield/e08_persformer/e15_latiup_mixed_v3
339
+ OVERFIT_ON: false
340
+ SEED: -1
341
+ SOLVER:
342
+ AMP:
343
+ ENABLED: false
344
+ BASE_LR: 0.001
345
+ BIAS_LR_FACTOR: 1.0
346
+ CHECKPOINT_PERIOD: 2000
347
+ CLIP_GRADIENTS:
348
+ CLIP_TYPE: value
349
+ CLIP_VALUE: 1.0
350
+ ENABLED: false
351
+ NORM_TYPE: 2.0
352
+ GAMMA: 0.1
353
+ IMS_PER_BATCH: 64
354
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
355
+ MAX_ITER: 90000
356
+ MOMENTUM: 0.9
357
+ NESTEROV: false
358
+ REFERENCE_WORLD_SIZE: 0
359
+ STEPS:
360
+ - 40000
361
+ - 60000
362
+ WARMUP_FACTOR: 0.001
363
+ WARMUP_ITERS: 1000
364
+ WARMUP_METHOD: linear
365
+ WEIGHT_DECAY: 0.0001
366
+ WEIGHT_DECAY_BIAS: null
367
+ WEIGHT_DECAY_NORM: 0.0
368
+ TEST:
369
+ AUG:
370
+ ENABLED: false
371
+ FLIP: true
372
+ MAX_SIZE: 4000
373
+ MIN_SIZES:
374
+ - 400
375
+ - 500
376
+ - 600
377
+ - 700
378
+ - 800
379
+ - 900
380
+ - 1000
381
+ - 1100
382
+ - 1200
383
+ DETECTIONS_PER_IMAGE: 100
384
+ EVAL_PERIOD: 2000
385
+ EXPECTED_RESULTS: []
386
+ KEYPOINT_OKS_SIGMAS: []
387
+ PRECISE_BN:
388
+ ENABLED: false
389
+ NUM_ITER: 200
390
+ VERSION: 2
391
+ VIS_PERIOD: 200
models/paramnet_gsv_rpf.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3965db8523f573f8037b43696cd08d1e63a498218e28a4ee7ebd50b373bbd56d
3
+ size 418607881
models/paramnet_gsv_rpf.yaml ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ AUGMENTATION: true
5
+ AUGMENTATION_TYPE: geometry
6
+ FILTER_EMPTY_ANNOTATIONS: true
7
+ NUM_WORKERS: 8
8
+ REPEAT_THRESHOLD: 0.0
9
+ RESIZE:
10
+ - 320
11
+ - 320
12
+ SAMPLER_TRAIN: TrainingSampler
13
+ DATASETS:
14
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
15
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
16
+ PROPOSAL_FILES_TEST: []
17
+ PROPOSAL_FILES_TRAIN: []
18
+ TEST:
19
+ - gsv_val
20
+ - gsv_test
21
+ TRAIN:
22
+ - gsv_train
23
+ GLOBAL:
24
+ HACK: 1.0
25
+ INPUT:
26
+ CROP:
27
+ ENABLED: false
28
+ SIZE:
29
+ - 0.9
30
+ - 0.9
31
+ TYPE: relative_range
32
+ FORMAT: BGR
33
+ MASK_FORMAT: polygon
34
+ MAX_SIZE_TEST: 1333
35
+ MAX_SIZE_TRAIN: 1333
36
+ MIN_SIZE_TEST: 800
37
+ MIN_SIZE_TRAIN:
38
+ - 800
39
+ MIN_SIZE_TRAIN_SAMPLING: choice
40
+ ONLINE_CROP: false
41
+ RANDOM_FLIP: horizontal
42
+ MODEL:
43
+ ANCHOR_GENERATOR:
44
+ ANGLES:
45
+ - - -90
46
+ - 0
47
+ - 90
48
+ ASPECT_RATIOS:
49
+ - - 0.5
50
+ - 1.0
51
+ - 2.0
52
+ NAME: DefaultAnchorGenerator
53
+ OFFSET: 0.0
54
+ SIZES:
55
+ - - 32
56
+ - 64
57
+ - 128
58
+ - 256
59
+ - 512
60
+ BACKBONE:
61
+ FREEZE_AT: 2
62
+ NAME: build_mit_backbone
63
+ CENTER_ON: false
64
+ DEVICE: cuda
65
+ FPN:
66
+ FUSE_TYPE: sum
67
+ IN_FEATURES: []
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ FPN_CENTER_HEAD:
71
+ COMMON_STRIDE: 4
72
+ CONVS_DIM: 128
73
+ IGNORE_VALUE: 360
74
+ IN_FEATURES:
75
+ - p2
76
+ - p3
77
+ - p4
78
+ - p5
79
+ LOSS_WEIGHT: 1.0
80
+ NAME: CenterFPNHead
81
+ NORM: GN
82
+ NUM_CLASSES: 30
83
+ FPN_GRAVITY_HEAD:
84
+ COMMON_STRIDE: 4
85
+ CONVS_DIM: 128
86
+ IGNORE_VALUE: 360
87
+ IN_FEATURES:
88
+ - p2
89
+ - p3
90
+ - p4
91
+ - p5
92
+ LOSS_WEIGHT: 1.0
93
+ NAME: GravityFPNHead
94
+ NORM: GN
95
+ NUM_CLASSES: 361
96
+ FPN_HEADS:
97
+ NAME: StandardFPNHeads
98
+ FPN_LATITUDE_HEAD:
99
+ COMMON_STRIDE: 4
100
+ CONVS_DIM: 128
101
+ IGNORE_VALUE: -1
102
+ IN_FEATURES:
103
+ - p2
104
+ - p3
105
+ - p4
106
+ - p5
107
+ LOSS_WEIGHT: 1.0
108
+ NAME: LatitudeFPNHead
109
+ NORM: GN
110
+ NUM_CLASSES: 9
111
+ FREEZE: []
112
+ GRAVITY_DECODER:
113
+ IGNORE_VALUE: 72
114
+ LOSS_TYPE: regression
115
+ LOSS_WEIGHT: 1.0
116
+ NAME: GravityDecoder
117
+ NUM_CLASSES: 73
118
+ GRAVITY_ON: true
119
+ HEIGHT_DECODER:
120
+ LOSS_WEIGHT: 1.0
121
+ NAME: HeightDecoder
122
+ HEIGHT_ON: false
123
+ KEYPOINT_ON: false
124
+ LATITUDE_DECODER:
125
+ IGNORE_VALUE: -1
126
+ LOSS_TYPE: regression
127
+ LOSS_WEIGHT: 1.0
128
+ NAME: LatitudeDecoder
129
+ NUM_CLASSES: 1
130
+ LATITUDE_ON: true
131
+ LOAD_PROPOSALS: false
132
+ MASK_ON: false
133
+ META_ARCHITECTURE: PersFormer
134
+ PANOPTIC_FPN:
135
+ COMBINE:
136
+ ENABLED: true
137
+ INSTANCES_CONFIDENCE_THRESH: 0.5
138
+ OVERLAP_THRESH: 0.5
139
+ STUFF_AREA_LIMIT: 4096
140
+ INSTANCE_LOSS_WEIGHT: 1.0
141
+ PARAM_DECODER:
142
+ LOSS_TYPE: regression
143
+ LOSS_WEIGHT: 1.0
144
+ NAME: ParamNet
145
+ PERSFORMER_HEADS:
146
+ NAME: StandardPersformerHeads
147
+ PIXEL_MEAN:
148
+ - 103.53
149
+ - 116.28
150
+ - 123.675
151
+ PIXEL_STD:
152
+ - 1.0
153
+ - 1.0
154
+ - 1.0
155
+ PROPOSAL_GENERATOR:
156
+ MIN_SIZE: 0
157
+ NAME: RPN
158
+ RECOVER_PP: false
159
+ RECOVER_RPF: true
160
+ RESNETS:
161
+ DEFORM_MODULATED: false
162
+ DEFORM_NUM_GROUPS: 1
163
+ DEFORM_ON_PER_STAGE:
164
+ - false
165
+ - false
166
+ - false
167
+ - false
168
+ DEPTH: 50
169
+ NORM: FrozenBN
170
+ NUM_GROUPS: 1
171
+ OUT_FEATURES:
172
+ - res4
173
+ RES2_OUT_CHANNELS: 256
174
+ RES5_DILATION: 1
175
+ STEM_OUT_CHANNELS: 64
176
+ STRIDE_IN_1X1: true
177
+ WIDTH_PER_GROUP: 64
178
+ RETINANET:
179
+ BBOX_REG_LOSS_TYPE: smooth_l1
180
+ BBOX_REG_WEIGHTS: &id002
181
+ - 1.0
182
+ - 1.0
183
+ - 1.0
184
+ - 1.0
185
+ FOCAL_LOSS_ALPHA: 0.25
186
+ FOCAL_LOSS_GAMMA: 2.0
187
+ IN_FEATURES:
188
+ - p3
189
+ - p4
190
+ - p5
191
+ - p6
192
+ - p7
193
+ IOU_LABELS:
194
+ - 0
195
+ - -1
196
+ - 1
197
+ IOU_THRESHOLDS:
198
+ - 0.4
199
+ - 0.5
200
+ NMS_THRESH_TEST: 0.5
201
+ NORM: ''
202
+ NUM_CLASSES: 80
203
+ NUM_CONVS: 4
204
+ PRIOR_PROB: 0.01
205
+ SCORE_THRESH_TEST: 0.05
206
+ SMOOTH_L1_LOSS_BETA: 0.1
207
+ TOPK_CANDIDATES_TEST: 1000
208
+ ROI_BOX_CASCADE_HEAD:
209
+ BBOX_REG_WEIGHTS:
210
+ - &id001
211
+ - 10.0
212
+ - 10.0
213
+ - 5.0
214
+ - 5.0
215
+ - - 20.0
216
+ - 20.0
217
+ - 10.0
218
+ - 10.0
219
+ - - 30.0
220
+ - 30.0
221
+ - 15.0
222
+ - 15.0
223
+ IOUS:
224
+ - 0.5
225
+ - 0.6
226
+ - 0.7
227
+ ROI_BOX_HEAD:
228
+ BBOX_REG_LOSS_TYPE: smooth_l1
229
+ BBOX_REG_LOSS_WEIGHT: 1.0
230
+ BBOX_REG_WEIGHTS: *id001
231
+ CLS_AGNOSTIC_BBOX_REG: false
232
+ CONV_DIM: 256
233
+ FC_DIM: 1024
234
+ NAME: ''
235
+ NORM: ''
236
+ NUM_CONV: 0
237
+ NUM_FC: 0
238
+ POOLER_RESOLUTION: 14
239
+ POOLER_SAMPLING_RATIO: 0
240
+ POOLER_TYPE: ROIAlignV2
241
+ SMOOTH_L1_BETA: 0.0
242
+ TRAIN_ON_PRED_BOXES: false
243
+ ROI_HEADS:
244
+ BATCH_SIZE_PER_IMAGE: 512
245
+ IN_FEATURES:
246
+ - res4
247
+ IOU_LABELS:
248
+ - 0
249
+ - 1
250
+ IOU_THRESHOLDS:
251
+ - 0.5
252
+ NAME: Res5ROIHeads
253
+ NMS_THRESH_TEST: 0.5
254
+ NUM_CLASSES: 80
255
+ POSITIVE_FRACTION: 0.25
256
+ PROPOSAL_APPEND_GT: true
257
+ SCORE_THRESH_TEST: 0.05
258
+ ROI_KEYPOINT_HEAD:
259
+ CONV_DIMS:
260
+ - 512
261
+ - 512
262
+ - 512
263
+ - 512
264
+ - 512
265
+ - 512
266
+ - 512
267
+ - 512
268
+ LOSS_WEIGHT: 1.0
269
+ MIN_KEYPOINTS_PER_IMAGE: 1
270
+ NAME: KRCNNConvDeconvUpsampleHead
271
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
272
+ NUM_KEYPOINTS: 17
273
+ POOLER_RESOLUTION: 14
274
+ POOLER_SAMPLING_RATIO: 0
275
+ POOLER_TYPE: ROIAlignV2
276
+ ROI_MASK_HEAD:
277
+ CLS_AGNOSTIC_MASK: false
278
+ CONV_DIM: 256
279
+ NAME: MaskRCNNConvUpsampleHead
280
+ NORM: ''
281
+ NUM_CONV: 0
282
+ POOLER_RESOLUTION: 14
283
+ POOLER_SAMPLING_RATIO: 0
284
+ POOLER_TYPE: ROIAlignV2
285
+ RPN:
286
+ BATCH_SIZE_PER_IMAGE: 256
287
+ BBOX_REG_LOSS_TYPE: smooth_l1
288
+ BBOX_REG_LOSS_WEIGHT: 1.0
289
+ BBOX_REG_WEIGHTS: *id002
290
+ BOUNDARY_THRESH: -1
291
+ CONV_DIMS:
292
+ - -1
293
+ HEAD_NAME: StandardRPNHead
294
+ IN_FEATURES:
295
+ - res4
296
+ IOU_LABELS:
297
+ - 0
298
+ - -1
299
+ - 1
300
+ IOU_THRESHOLDS:
301
+ - 0.3
302
+ - 0.7
303
+ LOSS_WEIGHT: 1.0
304
+ NMS_THRESH: 0.7
305
+ POSITIVE_FRACTION: 0.5
306
+ POST_NMS_TOPK_TEST: 1000
307
+ POST_NMS_TOPK_TRAIN: 2000
308
+ PRE_NMS_TOPK_TEST: 6000
309
+ PRE_NMS_TOPK_TRAIN: 12000
310
+ SMOOTH_L1_BETA: 0.0
311
+ SEM_SEG_HEAD:
312
+ COMMON_STRIDE: 4
313
+ CONVS_DIM: 128
314
+ IGNORE_VALUE: 255
315
+ IN_FEATURES:
316
+ - p2
317
+ - p3
318
+ - p4
319
+ - p5
320
+ LOSS_WEIGHT: 1.0
321
+ NAME: SemSegFPNHead
322
+ NORM: GN
323
+ NUM_CLASSES: 54
324
+ WEIGHTS: /home/jinlinyi/exp/densefield/e32_gsv_regress_ade/e04-lr-3-rebalanced/model_final.pth
325
+ OUTPUT_DIR: /home/jinlinyi/exp/densefield/e33_recover_rpf/all-adepretrained-lr-2
326
+ SEED: -1
327
+ SOLVER:
328
+ AMP:
329
+ ENABLED: false
330
+ BASE_LR: 0.01
331
+ BIAS_LR_FACTOR: 1.0
332
+ CHECKPOINT_PERIOD: 500
333
+ CLIP_GRADIENTS:
334
+ CLIP_TYPE: value
335
+ CLIP_VALUE: 1.0
336
+ ENABLED: false
337
+ NORM_TYPE: 2.0
338
+ GAMMA: 0.1
339
+ IMS_PER_BATCH: 64
340
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
341
+ MAX_ITER: 90000
342
+ MOMENTUM: 0.9
343
+ NESTEROV: false
344
+ REFERENCE_WORLD_SIZE: 0
345
+ STEPS:
346
+ - 40000
347
+ - 60000
348
+ WARMUP_FACTOR: 0.001
349
+ WARMUP_ITERS: 1000
350
+ WARMUP_METHOD: linear
351
+ WEIGHT_DECAY: 0.0001
352
+ WEIGHT_DECAY_BIAS: null
353
+ WEIGHT_DECAY_NORM: 0.0
354
+ TEST:
355
+ AUG:
356
+ ENABLED: false
357
+ FLIP: true
358
+ MAX_SIZE: 4000
359
+ MIN_SIZES:
360
+ - 400
361
+ - 500
362
+ - 600
363
+ - 700
364
+ - 800
365
+ - 900
366
+ - 1000
367
+ - 1100
368
+ - 1200
369
+ DETECTIONS_PER_IMAGE: 100
370
+ EVAL_PERIOD: 500
371
+ EXPECTED_RESULTS: []
372
+ KEYPOINT_OKS_SIGMAS: []
373
+ PRECISE_BN:
374
+ ENABLED: false
375
+ NUM_ITER: 200
376
+ VERSION: 2
377
+ VIS_PERIOD: 500
models/paramnet_gsv_rpfpp.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f00aae81c9e0247b96ae9d6dad13dc2395b3078f3b4229dc19cfdd6a13b94ce
3
+ size 418607881
models/paramnet_gsv_rpfpp.yaml ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ AUGMENTATION: true
5
+ AUGMENTATION_FUN: uniform_vfov_crop_resize
6
+ AUGMENTATION_TYPE: geometry
7
+ FILTER_EMPTY_ANNOTATIONS: true
8
+ NUM_WORKERS: 8
9
+ REPEAT_THRESHOLD: 0.0
10
+ RESIZE:
11
+ - 320
12
+ - 320
13
+ SAMPLER_TRAIN: TrainingSampler
14
+ DATASETS:
15
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
16
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
17
+ PROPOSAL_FILES_TEST: []
18
+ PROPOSAL_FILES_TRAIN: []
19
+ TEST:
20
+ - gsv_test_crop_uniform
21
+ TRAIN:
22
+ - gsv_train
23
+ DEBUG_ON: false
24
+ GLOBAL:
25
+ HACK: 1.0
26
+ INPUT:
27
+ CROP:
28
+ ENABLED: false
29
+ SIZE:
30
+ - 0.9
31
+ - 0.9
32
+ TYPE: relative_range
33
+ FORMAT: BGR
34
+ MASK_FORMAT: polygon
35
+ MAX_SIZE_TEST: 1333
36
+ MAX_SIZE_TRAIN: 1333
37
+ MIN_SIZE_TEST: 800
38
+ MIN_SIZE_TRAIN:
39
+ - 800
40
+ MIN_SIZE_TRAIN_SAMPLING: choice
41
+ ONLINE_CROP: false
42
+ RANDOM_FLIP: horizontal
43
+ MODEL:
44
+ ANCHOR_GENERATOR:
45
+ ANGLES:
46
+ - - -90
47
+ - 0
48
+ - 90
49
+ ASPECT_RATIOS:
50
+ - - 0.5
51
+ - 1.0
52
+ - 2.0
53
+ NAME: DefaultAnchorGenerator
54
+ OFFSET: 0.0
55
+ SIZES:
56
+ - - 32
57
+ - 64
58
+ - 128
59
+ - 256
60
+ - 512
61
+ BACKBONE:
62
+ FREEZE_AT: 2
63
+ NAME: build_mit_backbone
64
+ CENTER_ON: false
65
+ DEVICE: cuda
66
+ FPN:
67
+ FUSE_TYPE: sum
68
+ IN_FEATURES: []
69
+ NORM: ''
70
+ OUT_CHANNELS: 256
71
+ FPN_CENTER_HEAD:
72
+ COMMON_STRIDE: 4
73
+ CONVS_DIM: 128
74
+ IGNORE_VALUE: 360
75
+ IN_FEATURES:
76
+ - p2
77
+ - p3
78
+ - p4
79
+ - p5
80
+ LOSS_WEIGHT: 1.0
81
+ NAME: CenterFPNHead
82
+ NORM: GN
83
+ NUM_CLASSES: 30
84
+ FPN_GRAVITY_HEAD:
85
+ COMMON_STRIDE: 4
86
+ CONVS_DIM: 128
87
+ IGNORE_VALUE: 360
88
+ IN_FEATURES:
89
+ - p2
90
+ - p3
91
+ - p4
92
+ - p5
93
+ LOSS_WEIGHT: 1.0
94
+ NAME: GravityFPNHead
95
+ NORM: GN
96
+ NUM_CLASSES: 361
97
+ FPN_HEADS:
98
+ NAME: StandardFPNHeads
99
+ FPN_LATITUDE_HEAD:
100
+ COMMON_STRIDE: 4
101
+ CONVS_DIM: 128
102
+ IGNORE_VALUE: -1
103
+ IN_FEATURES:
104
+ - p2
105
+ - p3
106
+ - p4
107
+ - p5
108
+ LOSS_WEIGHT: 1.0
109
+ NAME: LatitudeFPNHead
110
+ NORM: GN
111
+ NUM_CLASSES: 9
112
+ FREEZE: []
113
+ GRAVITY_DECODER:
114
+ IGNORE_VALUE: 72
115
+ LOSS_TYPE: regression
116
+ LOSS_WEIGHT: 1.0
117
+ NAME: GravityDecoder
118
+ NUM_CLASSES: 73
119
+ GRAVITY_ON: true
120
+ HEIGHT_DECODER:
121
+ LOSS_WEIGHT: 1.0
122
+ NAME: HeightDecoder
123
+ HEIGHT_ON: false
124
+ KEYPOINT_ON: false
125
+ LATITUDE_DECODER:
126
+ IGNORE_VALUE: -1
127
+ LOSS_TYPE: regression
128
+ LOSS_WEIGHT: 1.0
129
+ NAME: LatitudeDecoder
130
+ NUM_CLASSES: 1
131
+ LATITUDE_ON: true
132
+ LOAD_PROPOSALS: false
133
+ MASK_ON: false
134
+ META_ARCHITECTURE: PersFormer
135
+ PANOPTIC_FPN:
136
+ COMBINE:
137
+ ENABLED: true
138
+ INSTANCES_CONFIDENCE_THRESH: 0.5
139
+ OVERLAP_THRESH: 0.5
140
+ STUFF_AREA_LIMIT: 4096
141
+ INSTANCE_LOSS_WEIGHT: 1.0
142
+ PARAM_DECODER:
143
+ DEBUG_LAT: false
144
+ DEBUG_UP: false
145
+ INPUT_SIZE: 64
146
+ LOSS_TYPE: regression
147
+ LOSS_WEIGHT: 0.1
148
+ NAME: ParamNetConvNextRegress
149
+ PREDICT_PARAMS:
150
+ - roll
151
+ - pitch
152
+ - general_vfov
153
+ - rel_cx
154
+ - rel_cy
155
+ SYNTHETIC_PRETRAIN: false
156
+ PERSFORMER_HEADS:
157
+ NAME: StandardPersformerHeads
158
+ PIXEL_MEAN:
159
+ - 103.53
160
+ - 116.28
161
+ - 123.675
162
+ PIXEL_STD:
163
+ - 1.0
164
+ - 1.0
165
+ - 1.0
166
+ PROPOSAL_GENERATOR:
167
+ MIN_SIZE: 0
168
+ NAME: RPN
169
+ RECOVER_PP: true
170
+ RECOVER_RPF: true
171
+ RESNETS:
172
+ DEFORM_MODULATED: false
173
+ DEFORM_NUM_GROUPS: 1
174
+ DEFORM_ON_PER_STAGE:
175
+ - false
176
+ - false
177
+ - false
178
+ - false
179
+ DEPTH: 50
180
+ NORM: FrozenBN
181
+ NUM_GROUPS: 1
182
+ OUT_FEATURES:
183
+ - res4
184
+ RES2_OUT_CHANNELS: 256
185
+ RES5_DILATION: 1
186
+ STEM_OUT_CHANNELS: 64
187
+ STRIDE_IN_1X1: true
188
+ WIDTH_PER_GROUP: 64
189
+ RETINANET:
190
+ BBOX_REG_LOSS_TYPE: smooth_l1
191
+ BBOX_REG_WEIGHTS: &id002
192
+ - 1.0
193
+ - 1.0
194
+ - 1.0
195
+ - 1.0
196
+ FOCAL_LOSS_ALPHA: 0.25
197
+ FOCAL_LOSS_GAMMA: 2.0
198
+ IN_FEATURES:
199
+ - p3
200
+ - p4
201
+ - p5
202
+ - p6
203
+ - p7
204
+ IOU_LABELS:
205
+ - 0
206
+ - -1
207
+ - 1
208
+ IOU_THRESHOLDS:
209
+ - 0.4
210
+ - 0.5
211
+ NMS_THRESH_TEST: 0.5
212
+ NORM: ''
213
+ NUM_CLASSES: 80
214
+ NUM_CONVS: 4
215
+ PRIOR_PROB: 0.01
216
+ SCORE_THRESH_TEST: 0.05
217
+ SMOOTH_L1_LOSS_BETA: 0.1
218
+ TOPK_CANDIDATES_TEST: 1000
219
+ ROI_BOX_CASCADE_HEAD:
220
+ BBOX_REG_WEIGHTS:
221
+ - &id001
222
+ - 10.0
223
+ - 10.0
224
+ - 5.0
225
+ - 5.0
226
+ - - 20.0
227
+ - 20.0
228
+ - 10.0
229
+ - 10.0
230
+ - - 30.0
231
+ - 30.0
232
+ - 15.0
233
+ - 15.0
234
+ IOUS:
235
+ - 0.5
236
+ - 0.6
237
+ - 0.7
238
+ ROI_BOX_HEAD:
239
+ BBOX_REG_LOSS_TYPE: smooth_l1
240
+ BBOX_REG_LOSS_WEIGHT: 1.0
241
+ BBOX_REG_WEIGHTS: *id001
242
+ CLS_AGNOSTIC_BBOX_REG: false
243
+ CONV_DIM: 256
244
+ FC_DIM: 1024
245
+ NAME: ''
246
+ NORM: ''
247
+ NUM_CONV: 0
248
+ NUM_FC: 0
249
+ POOLER_RESOLUTION: 14
250
+ POOLER_SAMPLING_RATIO: 0
251
+ POOLER_TYPE: ROIAlignV2
252
+ SMOOTH_L1_BETA: 0.0
253
+ TRAIN_ON_PRED_BOXES: false
254
+ ROI_HEADS:
255
+ BATCH_SIZE_PER_IMAGE: 512
256
+ IN_FEATURES:
257
+ - res4
258
+ IOU_LABELS:
259
+ - 0
260
+ - 1
261
+ IOU_THRESHOLDS:
262
+ - 0.5
263
+ NAME: Res5ROIHeads
264
+ NMS_THRESH_TEST: 0.5
265
+ NUM_CLASSES: 80
266
+ POSITIVE_FRACTION: 0.25
267
+ PROPOSAL_APPEND_GT: true
268
+ SCORE_THRESH_TEST: 0.05
269
+ ROI_KEYPOINT_HEAD:
270
+ CONV_DIMS:
271
+ - 512
272
+ - 512
273
+ - 512
274
+ - 512
275
+ - 512
276
+ - 512
277
+ - 512
278
+ - 512
279
+ LOSS_WEIGHT: 1.0
280
+ MIN_KEYPOINTS_PER_IMAGE: 1
281
+ NAME: KRCNNConvDeconvUpsampleHead
282
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
283
+ NUM_KEYPOINTS: 17
284
+ POOLER_RESOLUTION: 14
285
+ POOLER_SAMPLING_RATIO: 0
286
+ POOLER_TYPE: ROIAlignV2
287
+ ROI_MASK_HEAD:
288
+ CLS_AGNOSTIC_MASK: false
289
+ CONV_DIM: 256
290
+ NAME: MaskRCNNConvUpsampleHead
291
+ NORM: ''
292
+ NUM_CONV: 0
293
+ POOLER_RESOLUTION: 14
294
+ POOLER_SAMPLING_RATIO: 0
295
+ POOLER_TYPE: ROIAlignV2
296
+ RPN:
297
+ BATCH_SIZE_PER_IMAGE: 256
298
+ BBOX_REG_LOSS_TYPE: smooth_l1
299
+ BBOX_REG_LOSS_WEIGHT: 1.0
300
+ BBOX_REG_WEIGHTS: *id002
301
+ BOUNDARY_THRESH: -1
302
+ CONV_DIMS:
303
+ - -1
304
+ HEAD_NAME: StandardRPNHead
305
+ IN_FEATURES:
306
+ - res4
307
+ IOU_LABELS:
308
+ - 0
309
+ - -1
310
+ - 1
311
+ IOU_THRESHOLDS:
312
+ - 0.3
313
+ - 0.7
314
+ LOSS_WEIGHT: 1.0
315
+ NMS_THRESH: 0.7
316
+ POSITIVE_FRACTION: 0.5
317
+ POST_NMS_TOPK_TEST: 1000
318
+ POST_NMS_TOPK_TRAIN: 2000
319
+ PRE_NMS_TOPK_TEST: 6000
320
+ PRE_NMS_TOPK_TRAIN: 12000
321
+ SMOOTH_L1_BETA: 0.0
322
+ SEM_SEG_HEAD:
323
+ COMMON_STRIDE: 4
324
+ CONVS_DIM: 128
325
+ IGNORE_VALUE: 255
326
+ IN_FEATURES:
327
+ - p2
328
+ - p3
329
+ - p4
330
+ - p5
331
+ LOSS_WEIGHT: 1.0
332
+ NAME: SemSegFPNHead
333
+ NORM: GN
334
+ NUM_CLASSES: 54
335
+ WEIGHTS: /home/jinlinyi/exp/densefield/e32_gsv_regress_ade/e04-lr-3-rebalanced/model_final.pth
336
+ OUTPUT_DIR: /home/jinlinyi/exp/densefield/e36_recover_rpfpp_v3_small/regression-all-lr2-rebalanced
337
+ OVERFIT_ON: false
338
+ SEED: -1
339
+ SOLVER:
340
+ AMP:
341
+ ENABLED: false
342
+ BASE_LR: 0.01
343
+ BIAS_LR_FACTOR: 1.0
344
+ CHECKPOINT_PERIOD: 500
345
+ CLIP_GRADIENTS:
346
+ CLIP_TYPE: value
347
+ CLIP_VALUE: 1.0
348
+ ENABLED: false
349
+ NORM_TYPE: 2.0
350
+ GAMMA: 0.1
351
+ IMS_PER_BATCH: 64
352
+ LR_SCHEDULER_NAME: WarmupCosineLR
353
+ MAX_ITER: 90000
354
+ MOMENTUM: 0.9
355
+ NESTEROV: false
356
+ REFERENCE_WORLD_SIZE: 0
357
+ STEPS:
358
+ - 40000
359
+ - 60000
360
+ WARMUP_FACTOR: 0.001
361
+ WARMUP_ITERS: 1000
362
+ WARMUP_METHOD: linear
363
+ WEIGHT_DECAY: 0.0001
364
+ WEIGHT_DECAY_BIAS: null
365
+ WEIGHT_DECAY_NORM: 0.0
366
+ TEST:
367
+ AUG:
368
+ ENABLED: false
369
+ FLIP: true
370
+ MAX_SIZE: 4000
371
+ MIN_SIZES:
372
+ - 400
373
+ - 500
374
+ - 600
375
+ - 700
376
+ - 800
377
+ - 900
378
+ - 1000
379
+ - 1100
380
+ - 1200
381
+ DETECTIONS_PER_IMAGE: 100
382
+ EVAL_PERIOD: 500
383
+ EXPECTED_RESULTS: []
384
+ KEYPOINT_OKS_SIGMAS: []
385
+ PRECISE_BN:
386
+ ENABLED: false
387
+ NUM_ITER: 200
388
+ VERSION: 2
389
+ VIS_PERIOD: 500
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch==1.10.0+cu113
3
+ torchvision==0.11.1+cu113