sbchoi commited on
Commit
54d1b51
1 Parent(s): 6449f7d

Add config from convert_rt_detr_original_pytorch_checkpoint_to_pytorch.py

Browse files
Files changed (1) hide show
  1. config.json +36 -13
config.json CHANGED
@@ -10,16 +10,6 @@
10
  ],
11
  "attention_dropout": 0.0,
12
  "auxiliary_loss": true,
13
- "backbone": "resnet50d",
14
- "backbone_config": null,
15
- "backbone_kwargs": {
16
- "features_only": true,
17
- "out_indices": [
18
- 2,
19
- 3,
20
- 4
21
- ]
22
- },
23
  "batch_norm_eps": 1e-05,
24
  "box_noise_scale": 1.0,
25
  "d_model": 256,
@@ -33,8 +23,17 @@
33
  ],
34
  "decoder_layers": 6,
35
  "decoder_n_points": 4,
 
 
 
 
 
 
36
  "disable_custom_kernels": true,
 
 
37
  "dropout": 0.0,
 
38
  "encode_proj_layers": [
39
  2
40
  ],
@@ -57,7 +56,14 @@
57
  ],
58
  "focal_loss_alpha": 0.75,
59
  "focal_loss_gamma": 2.0,
 
60
  "hidden_expansion": 1.0,
 
 
 
 
 
 
61
  "id2label": {
62
  "0": "person",
63
  "1": "bicycle",
@@ -226,6 +232,7 @@
226
  },
227
  "label_noise_ratio": 0.5,
228
  "layer_norm_eps": 1e-05,
 
229
  "learn_initial_query": false,
230
  "matcher_alpha": 0.25,
231
  "matcher_bbox_cost": 5.0,
@@ -234,15 +241,31 @@
234
  "matcher_giou_cost": 2.0,
235
  "model_type": "rt_detr",
236
  "normalize_before": false,
 
237
  "num_denoising": 100,
238
  "num_feature_levels": 3,
239
  "num_queries": 300,
 
 
 
 
 
 
 
 
 
 
240
  "positional_encoding_temperature": 10000,
 
 
 
 
 
 
 
241
  "torch_dtype": "float32",
242
- "transformers_version": "4.41.0.dev0",
243
  "use_focal_loss": true,
244
- "use_pretrained_backbone": true,
245
- "use_timm_backbone": true,
246
  "weight_loss_bbox": 5.0,
247
  "weight_loss_giou": 2.0,
248
  "weight_loss_vfl": 1.0,
 
10
  ],
11
  "attention_dropout": 0.0,
12
  "auxiliary_loss": true,
 
 
 
 
 
 
 
 
 
 
13
  "batch_norm_eps": 1e-05,
14
  "box_noise_scale": 1.0,
15
  "d_model": 256,
 
23
  ],
24
  "decoder_layers": 6,
25
  "decoder_n_points": 4,
26
+ "depths": [
27
+ 3,
28
+ 4,
29
+ 6,
30
+ 3
31
+ ],
32
  "disable_custom_kernels": true,
33
+ "downsample_in_bottleneck": false,
34
+ "downsample_in_first_stage": false,
35
  "dropout": 0.0,
36
+ "embedding_size": 64,
37
  "encode_proj_layers": [
38
  2
39
  ],
 
56
  ],
57
  "focal_loss_alpha": 0.75,
58
  "focal_loss_gamma": 2.0,
59
+ "hidden_act": "relu",
60
  "hidden_expansion": 1.0,
61
+ "hidden_sizes": [
62
+ 256,
63
+ 512,
64
+ 1024,
65
+ 2048
66
+ ],
67
  "id2label": {
68
  "0": "person",
69
  "1": "bicycle",
 
232
  },
233
  "label_noise_ratio": 0.5,
234
  "layer_norm_eps": 1e-05,
235
+ "layer_type": "bottleneck",
236
  "learn_initial_query": false,
237
  "matcher_alpha": 0.25,
238
  "matcher_bbox_cost": 5.0,
 
241
  "matcher_giou_cost": 2.0,
242
  "model_type": "rt_detr",
243
  "normalize_before": false,
244
+ "num_channels": 3,
245
  "num_denoising": 100,
246
  "num_feature_levels": 3,
247
  "num_queries": 300,
248
+ "out_features": [
249
+ "stage2",
250
+ "stage3",
251
+ "stage4"
252
+ ],
253
+ "out_indices": [
254
+ 2,
255
+ 3,
256
+ 4
257
+ ],
258
  "positional_encoding_temperature": 10000,
259
+ "stage_names": [
260
+ "stem",
261
+ "stage1",
262
+ "stage2",
263
+ "stage3",
264
+ "stage4"
265
+ ],
266
  "torch_dtype": "float32",
267
+ "transformers_version": "4.42.0.dev0",
268
  "use_focal_loss": true,
 
 
269
  "weight_loss_bbox": 5.0,
270
  "weight_loss_giou": 2.0,
271
  "weight_loss_vfl": 1.0,