PekingU
/

rtdetr_r18vd

@@ -10,16 +10,6 @@
   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": true,
-  "backbone": "resnet18d",
-  "backbone_config": null,
-  "backbone_kwargs": {
-    "features_only": true,
-    "out_indices": [
-      2,
-      3,
-      4
-    ]
-  },
   "batch_norm_eps": 1e-05,
   "box_noise_scale": 1.0,
   "d_model": 256,
@@ -33,8 +23,17 @@
   ],
   "decoder_layers": 3,
   "decoder_n_points": 4,
   "disable_custom_kernels": true,
   "dropout": 0.0,
   "encode_proj_layers": [
     2
   ],
@@ -57,7 +56,14 @@
   ],
   "focal_loss_alpha": 0.75,
   "focal_loss_gamma": 2.0,
   "hidden_expansion": 0.5,
   "id2label": {
     "0": "person",
     "1": "bicycle",
@@ -226,6 +232,7 @@
   },
   "label_noise_ratio": 0.5,
   "layer_norm_eps": 1e-05,
   "learn_initial_query": false,
   "matcher_alpha": 0.25,
   "matcher_bbox_cost": 5.0,
@@ -234,15 +241,31 @@
   "matcher_giou_cost": 2.0,
   "model_type": "rt_detr",
   "normalize_before": false,
   "num_denoising": 100,
   "num_feature_levels": 3,
   "num_queries": 300,
   "positional_encoding_temperature": 10000,
   "torch_dtype": "float32",
   "transformers_version": "4.42.0.dev0",
   "use_focal_loss": true,
-  "use_pretrained_backbone": true,
-  "use_timm_backbone": true,
   "weight_loss_bbox": 5.0,
   "weight_loss_giou": 2.0,
   "weight_loss_vfl": 1.0,

   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": true,
   "batch_norm_eps": 1e-05,
   "box_noise_scale": 1.0,
   "d_model": 256,
   ],
   "decoder_layers": 3,
   "decoder_n_points": 4,
+  "depths": [
+    2,
+    2,
+    2,
+    2
+  ],
   "disable_custom_kernels": true,
+  "downsample_in_bottleneck": false,
+  "downsample_in_first_stage": false,
   "dropout": 0.0,
+  "embedding_size": 64,
   "encode_proj_layers": [
     2
   ],
   ],
   "focal_loss_alpha": 0.75,
   "focal_loss_gamma": 2.0,
+  "hidden_act": "relu",
   "hidden_expansion": 0.5,
+  "hidden_sizes": [
+    64,
+    128,
+    256,
+    512
+  ],
   "id2label": {
     "0": "person",
     "1": "bicycle",
   },
   "label_noise_ratio": 0.5,
   "layer_norm_eps": 1e-05,
+  "layer_type": "basic",
   "learn_initial_query": false,
   "matcher_alpha": 0.25,
   "matcher_bbox_cost": 5.0,
   "matcher_giou_cost": 2.0,
   "model_type": "rt_detr",
   "normalize_before": false,
+  "num_channels": 3,
   "num_denoising": 100,
   "num_feature_levels": 3,
   "num_queries": 300,
+  "out_features": [
+    "stage2",
+    "stage3",
+    "stage4"
+  ],
+  "out_indices": [
+    2,
+    3,
+    4
+  ],
   "positional_encoding_temperature": 10000,
+  "stage_names": [
+    "stem",
+    "stage1",
+    "stage2",
+    "stage3",
+    "stage4"
+  ],
   "torch_dtype": "float32",
   "transformers_version": "4.42.0.dev0",
   "use_focal_loss": true,
   "weight_loss_bbox": 5.0,
   "weight_loss_giou": 2.0,
   "weight_loss_vfl": 1.0,