Add config from convert_rt_detr_original_pytorch_checkpoint_to_pytorch.py
Browse files- config.json +35 -12
config.json
CHANGED
@@ -10,16 +10,6 @@
|
|
10 |
],
|
11 |
"attention_dropout": 0.0,
|
12 |
"auxiliary_loss": true,
|
13 |
-
"backbone": "resnet18d",
|
14 |
-
"backbone_config": null,
|
15 |
-
"backbone_kwargs": {
|
16 |
-
"features_only": true,
|
17 |
-
"out_indices": [
|
18 |
-
2,
|
19 |
-
3,
|
20 |
-
4
|
21 |
-
]
|
22 |
-
},
|
23 |
"batch_norm_eps": 1e-05,
|
24 |
"box_noise_scale": 1.0,
|
25 |
"d_model": 256,
|
@@ -33,8 +23,17 @@
|
|
33 |
],
|
34 |
"decoder_layers": 3,
|
35 |
"decoder_n_points": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
"disable_custom_kernels": true,
|
|
|
|
|
37 |
"dropout": 0.0,
|
|
|
38 |
"encode_proj_layers": [
|
39 |
2
|
40 |
],
|
@@ -57,7 +56,14 @@
|
|
57 |
],
|
58 |
"focal_loss_alpha": 0.75,
|
59 |
"focal_loss_gamma": 2.0,
|
|
|
60 |
"hidden_expansion": 0.5,
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
"id2label": {
|
62 |
"0": "person",
|
63 |
"1": "bicycle",
|
@@ -226,6 +232,7 @@
|
|
226 |
},
|
227 |
"label_noise_ratio": 0.5,
|
228 |
"layer_norm_eps": 1e-05,
|
|
|
229 |
"learn_initial_query": false,
|
230 |
"matcher_alpha": 0.25,
|
231 |
"matcher_bbox_cost": 5.0,
|
@@ -234,15 +241,31 @@
|
|
234 |
"matcher_giou_cost": 2.0,
|
235 |
"model_type": "rt_detr",
|
236 |
"normalize_before": false,
|
|
|
237 |
"num_denoising": 100,
|
238 |
"num_feature_levels": 3,
|
239 |
"num_queries": 300,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
"positional_encoding_temperature": 10000,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
"torch_dtype": "float32",
|
242 |
"transformers_version": "4.42.0.dev0",
|
243 |
"use_focal_loss": true,
|
244 |
-
"use_pretrained_backbone": true,
|
245 |
-
"use_timm_backbone": true,
|
246 |
"weight_loss_bbox": 5.0,
|
247 |
"weight_loss_giou": 2.0,
|
248 |
"weight_loss_vfl": 1.0,
|
|
|
10 |
],
|
11 |
"attention_dropout": 0.0,
|
12 |
"auxiliary_loss": true,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
"batch_norm_eps": 1e-05,
|
14 |
"box_noise_scale": 1.0,
|
15 |
"d_model": 256,
|
|
|
23 |
],
|
24 |
"decoder_layers": 3,
|
25 |
"decoder_n_points": 4,
|
26 |
+
"depths": [
|
27 |
+
2,
|
28 |
+
2,
|
29 |
+
2,
|
30 |
+
2
|
31 |
+
],
|
32 |
"disable_custom_kernels": true,
|
33 |
+
"downsample_in_bottleneck": false,
|
34 |
+
"downsample_in_first_stage": false,
|
35 |
"dropout": 0.0,
|
36 |
+
"embedding_size": 64,
|
37 |
"encode_proj_layers": [
|
38 |
2
|
39 |
],
|
|
|
56 |
],
|
57 |
"focal_loss_alpha": 0.75,
|
58 |
"focal_loss_gamma": 2.0,
|
59 |
+
"hidden_act": "relu",
|
60 |
"hidden_expansion": 0.5,
|
61 |
+
"hidden_sizes": [
|
62 |
+
64,
|
63 |
+
128,
|
64 |
+
256,
|
65 |
+
512
|
66 |
+
],
|
67 |
"id2label": {
|
68 |
"0": "person",
|
69 |
"1": "bicycle",
|
|
|
232 |
},
|
233 |
"label_noise_ratio": 0.5,
|
234 |
"layer_norm_eps": 1e-05,
|
235 |
+
"layer_type": "basic",
|
236 |
"learn_initial_query": false,
|
237 |
"matcher_alpha": 0.25,
|
238 |
"matcher_bbox_cost": 5.0,
|
|
|
241 |
"matcher_giou_cost": 2.0,
|
242 |
"model_type": "rt_detr",
|
243 |
"normalize_before": false,
|
244 |
+
"num_channels": 3,
|
245 |
"num_denoising": 100,
|
246 |
"num_feature_levels": 3,
|
247 |
"num_queries": 300,
|
248 |
+
"out_features": [
|
249 |
+
"stage2",
|
250 |
+
"stage3",
|
251 |
+
"stage4"
|
252 |
+
],
|
253 |
+
"out_indices": [
|
254 |
+
2,
|
255 |
+
3,
|
256 |
+
4
|
257 |
+
],
|
258 |
"positional_encoding_temperature": 10000,
|
259 |
+
"stage_names": [
|
260 |
+
"stem",
|
261 |
+
"stage1",
|
262 |
+
"stage2",
|
263 |
+
"stage3",
|
264 |
+
"stage4"
|
265 |
+
],
|
266 |
"torch_dtype": "float32",
|
267 |
"transformers_version": "4.42.0.dev0",
|
268 |
"use_focal_loss": true,
|
|
|
|
|
269 |
"weight_loss_bbox": 5.0,
|
270 |
"weight_loss_giou": 2.0,
|
271 |
"weight_loss_vfl": 1.0,
|