nguyenminh4099 commited on
Commit
a816101
·
verified ·
1 Parent(s): ce60298

Upload folder using huggingface_hub

Browse files
Files changed (32) hide show
  1. .gitattributes +2 -0
  2. 20260315_224809/bboxes_3d/examples/val_1/38a28a3aaf2647f2a8c0e90e31267bf8.png +3 -0
  3. 20260315_224809/bboxes_3d/examples/val_2/38a28a3aaf2647f2a8c0e90e31267bf8.png +3 -0
  4. 20260315_224809/bboxes_3d/metrics_details.json +0 -0
  5. 20260315_224809/bboxes_3d/metrics_summary.json +198 -0
  6. 20260315_224809/bboxes_3d/plots/barrier_pr.pdf +0 -0
  7. 20260315_224809/bboxes_3d/plots/barrier_tp.pdf +0 -0
  8. 20260315_224809/bboxes_3d/plots/bicycle_pr.pdf +0 -0
  9. 20260315_224809/bboxes_3d/plots/bicycle_tp.pdf +0 -0
  10. 20260315_224809/bboxes_3d/plots/bus_pr.pdf +0 -0
  11. 20260315_224809/bboxes_3d/plots/bus_tp.pdf +0 -0
  12. 20260315_224809/bboxes_3d/plots/car_pr.pdf +0 -0
  13. 20260315_224809/bboxes_3d/plots/car_tp.pdf +0 -0
  14. 20260315_224809/bboxes_3d/plots/construction_vehicle_pr.pdf +0 -0
  15. 20260315_224809/bboxes_3d/plots/construction_vehicle_tp.pdf +0 -0
  16. 20260315_224809/bboxes_3d/plots/dist_pr_0.5.pdf +0 -0
  17. 20260315_224809/bboxes_3d/plots/dist_pr_1.0.pdf +0 -0
  18. 20260315_224809/bboxes_3d/plots/dist_pr_2.0.pdf +0 -0
  19. 20260315_224809/bboxes_3d/plots/dist_pr_4.0.pdf +0 -0
  20. 20260315_224809/bboxes_3d/plots/motorcycle_pr.pdf +0 -0
  21. 20260315_224809/bboxes_3d/plots/motorcycle_tp.pdf +0 -0
  22. 20260315_224809/bboxes_3d/plots/pedestrian_pr.pdf +0 -0
  23. 20260315_224809/bboxes_3d/plots/pedestrian_tp.pdf +0 -0
  24. 20260315_224809/bboxes_3d/plots/summary.pdf +0 -0
  25. 20260315_224809/bboxes_3d/plots/traffic_cone_pr.pdf +0 -0
  26. 20260315_224809/bboxes_3d/plots/traffic_cone_tp.pdf +0 -0
  27. 20260315_224809/bboxes_3d/plots/trailer_pr.pdf +0 -0
  28. 20260315_224809/bboxes_3d/plots/trailer_tp.pdf +0 -0
  29. 20260315_224809/bboxes_3d/plots/truck_pr.pdf +0 -0
  30. 20260315_224809/bboxes_3d/plots/truck_tp.pdf +0 -0
  31. 20260315_224809/bboxes_3d/results_nusc.json +0 -0
  32. 20260315_224809/logs.log +1834 -0
.gitattributes CHANGED
@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  20260314_223104/bboxes_3d/examples/val_1/38a28a3aaf2647f2a8c0e90e31267bf8.png filter=lfs diff=lfs merge=lfs -text
37
  20260314_223104/bboxes_3d/examples/val_2/38a28a3aaf2647f2a8c0e90e31267bf8.png filter=lfs diff=lfs merge=lfs -text
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  20260314_223104/bboxes_3d/examples/val_1/38a28a3aaf2647f2a8c0e90e31267bf8.png filter=lfs diff=lfs merge=lfs -text
37
  20260314_223104/bboxes_3d/examples/val_2/38a28a3aaf2647f2a8c0e90e31267bf8.png filter=lfs diff=lfs merge=lfs -text
38
+ 20260315_224809/bboxes_3d/examples/val_1/38a28a3aaf2647f2a8c0e90e31267bf8.png filter=lfs diff=lfs merge=lfs -text
39
+ 20260315_224809/bboxes_3d/examples/val_2/38a28a3aaf2647f2a8c0e90e31267bf8.png filter=lfs diff=lfs merge=lfs -text
20260315_224809/bboxes_3d/examples/val_1/38a28a3aaf2647f2a8c0e90e31267bf8.png ADDED

Git LFS Details

  • SHA256: 15e3a35220de2dea4496370a7ad999d2f74cd0e0396f2551202b8c3fe5c7671e
  • Pointer size: 131 Bytes
  • Size of remote file: 138 kB
20260315_224809/bboxes_3d/examples/val_2/38a28a3aaf2647f2a8c0e90e31267bf8.png ADDED

Git LFS Details

  • SHA256: 15e3a35220de2dea4496370a7ad999d2f74cd0e0396f2551202b8c3fe5c7671e
  • Pointer size: 131 Bytes
  • Size of remote file: 138 kB
20260315_224809/bboxes_3d/metrics_details.json ADDED
The diff for this file is too large to render. See raw diff
 
20260315_224809/bboxes_3d/metrics_summary.json ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_aps": {
3
+ "car": {
4
+ "0.5": 0.0,
5
+ "1.0": 0.0,
6
+ "2.0": 0.0,
7
+ "4.0": 0.009927908302700019
8
+ },
9
+ "truck": {
10
+ "0.5": 0.0,
11
+ "1.0": 0.0,
12
+ "2.0": 0.0,
13
+ "4.0": 0.0
14
+ },
15
+ "bus": {
16
+ "0.5": 0.0,
17
+ "1.0": 0.0,
18
+ "2.0": 0.0,
19
+ "4.0": 0.0
20
+ },
21
+ "trailer": {
22
+ "0.5": 0.0,
23
+ "1.0": 0.0,
24
+ "2.0": 0.0,
25
+ "4.0": 0.0
26
+ },
27
+ "construction_vehicle": {
28
+ "0.5": 0.0,
29
+ "1.0": 0.0,
30
+ "2.0": 0.0,
31
+ "4.0": 0.0
32
+ },
33
+ "pedestrian": {
34
+ "0.5": 0.0,
35
+ "1.0": 0.0,
36
+ "2.0": 0.0,
37
+ "4.0": 0.0
38
+ },
39
+ "motorcycle": {
40
+ "0.5": 0.0,
41
+ "1.0": 0.0,
42
+ "2.0": 0.0,
43
+ "4.0": 0.0
44
+ },
45
+ "bicycle": {
46
+ "0.5": 0.0,
47
+ "1.0": 0.0,
48
+ "2.0": 0.0,
49
+ "4.0": 0.0
50
+ },
51
+ "traffic_cone": {
52
+ "0.5": 0.0,
53
+ "1.0": 0.0,
54
+ "2.0": 0.0,
55
+ "4.0": 0.0
56
+ },
57
+ "barrier": {
58
+ "0.5": 0.0,
59
+ "1.0": 0.0,
60
+ "2.0": 0.0,
61
+ "4.0": 0.0
62
+ }
63
+ },
64
+ "mean_dist_aps": {
65
+ "car": 0.0024819770756750047,
66
+ "truck": 0.0,
67
+ "bus": 0.0,
68
+ "trailer": 0.0,
69
+ "construction_vehicle": 0.0,
70
+ "pedestrian": 0.0,
71
+ "motorcycle": 0.0,
72
+ "bicycle": 0.0,
73
+ "traffic_cone": 0.0,
74
+ "barrier": 0.0
75
+ },
76
+ "mean_ap": 0.0002481977075675005,
77
+ "label_tp_errors": {
78
+ "car": {
79
+ "trans_err": 1.1822891487922438,
80
+ "scale_err": 0.9073979925542556,
81
+ "orient_err": 1.4475431464436141,
82
+ "vel_err": 0.25779051477943044,
83
+ "attr_err": 0.4033333333333333
84
+ },
85
+ "truck": {
86
+ "trans_err": 1.0,
87
+ "scale_err": 1.0,
88
+ "orient_err": 1.0,
89
+ "vel_err": 1.0,
90
+ "attr_err": 1.0
91
+ },
92
+ "bus": {
93
+ "trans_err": 1.0,
94
+ "scale_err": 1.0,
95
+ "orient_err": 1.0,
96
+ "vel_err": 1.0,
97
+ "attr_err": 1.0
98
+ },
99
+ "trailer": {
100
+ "trans_err": 1.0,
101
+ "scale_err": 1.0,
102
+ "orient_err": 1.0,
103
+ "vel_err": 1.0,
104
+ "attr_err": 1.0
105
+ },
106
+ "construction_vehicle": {
107
+ "trans_err": 1.0,
108
+ "scale_err": 1.0,
109
+ "orient_err": 1.0,
110
+ "vel_err": 1.0,
111
+ "attr_err": 1.0
112
+ },
113
+ "pedestrian": {
114
+ "trans_err": 1.0,
115
+ "scale_err": 1.0,
116
+ "orient_err": 1.0,
117
+ "vel_err": 1.0,
118
+ "attr_err": 1.0
119
+ },
120
+ "motorcycle": {
121
+ "trans_err": 1.0,
122
+ "scale_err": 1.0,
123
+ "orient_err": 1.0,
124
+ "vel_err": 1.0,
125
+ "attr_err": 1.0
126
+ },
127
+ "bicycle": {
128
+ "trans_err": 1.0,
129
+ "scale_err": 1.0,
130
+ "orient_err": 1.0,
131
+ "vel_err": 1.0,
132
+ "attr_err": 1.0
133
+ },
134
+ "traffic_cone": {
135
+ "trans_err": 1.0,
136
+ "scale_err": 1.0,
137
+ "orient_err": NaN,
138
+ "vel_err": NaN,
139
+ "attr_err": NaN
140
+ },
141
+ "barrier": {
142
+ "trans_err": 1.0,
143
+ "scale_err": 1.0,
144
+ "orient_err": 1.0,
145
+ "vel_err": NaN,
146
+ "attr_err": NaN
147
+ }
148
+ },
149
+ "tp_errors": {
150
+ "trans_err": 1.0182289148792243,
151
+ "scale_err": 0.9907397992554255,
152
+ "orient_err": 1.0497270162715127,
153
+ "vel_err": 0.9072238143474288,
154
+ "attr_err": 0.9254166666666667
155
+ },
156
+ "tp_scores": {
157
+ "trans_err": 0.0,
158
+ "scale_err": 0.009260200744574454,
159
+ "orient_err": 0.0,
160
+ "vel_err": 0.09277618565257117,
161
+ "attr_err": 0.07458333333333333
162
+ },
163
+ "nd_score": 0.017786070826831646,
164
+ "eval_time": 0.11112022399902344,
165
+ "cfg": {
166
+ "class_range": {
167
+ "car": 50,
168
+ "truck": 50,
169
+ "bus": 50,
170
+ "trailer": 50,
171
+ "construction_vehicle": 50,
172
+ "pedestrian": 40,
173
+ "motorcycle": 40,
174
+ "bicycle": 40,
175
+ "traffic_cone": 30,
176
+ "barrier": 30
177
+ },
178
+ "dist_fcn": "center_distance",
179
+ "dist_ths": [
180
+ 0.5,
181
+ 1.0,
182
+ 2.0,
183
+ 4.0
184
+ ],
185
+ "dist_th_tp": 2.0,
186
+ "min_recall": 0.1,
187
+ "min_precision": 0.1,
188
+ "max_boxes_per_sample": 500,
189
+ "mean_ap_weight": 5
190
+ },
191
+ "meta": {
192
+ "use_camera": true,
193
+ "use_lidar": false,
194
+ "use_radar": false,
195
+ "use_map": false,
196
+ "use_external": false
197
+ }
198
+ }
20260315_224809/bboxes_3d/plots/barrier_pr.pdf ADDED
Binary file (12 kB). View file
 
20260315_224809/bboxes_3d/plots/barrier_tp.pdf ADDED
Binary file (12.5 kB). View file
 
20260315_224809/bboxes_3d/plots/bicycle_pr.pdf ADDED
Binary file (12.2 kB). View file
 
20260315_224809/bboxes_3d/plots/bicycle_tp.pdf ADDED
Binary file (12.5 kB). View file
 
20260315_224809/bboxes_3d/plots/bus_pr.pdf ADDED
Binary file (12.3 kB). View file
 
20260315_224809/bboxes_3d/plots/bus_tp.pdf ADDED
Binary file (12.6 kB). View file
 
20260315_224809/bboxes_3d/plots/car_pr.pdf ADDED
Binary file (12.5 kB). View file
 
20260315_224809/bboxes_3d/plots/car_tp.pdf ADDED
Binary file (15.3 kB). View file
 
20260315_224809/bboxes_3d/plots/construction_vehicle_pr.pdf ADDED
Binary file (12.4 kB). View file
 
20260315_224809/bboxes_3d/plots/construction_vehicle_tp.pdf ADDED
Binary file (12.6 kB). View file
 
20260315_224809/bboxes_3d/plots/dist_pr_0.5.pdf ADDED
Binary file (14.7 kB). View file
 
20260315_224809/bboxes_3d/plots/dist_pr_1.0.pdf ADDED
Binary file (14.7 kB). View file
 
20260315_224809/bboxes_3d/plots/dist_pr_2.0.pdf ADDED
Binary file (14.9 kB). View file
 
20260315_224809/bboxes_3d/plots/dist_pr_4.0.pdf ADDED
Binary file (15 kB). View file
 
20260315_224809/bboxes_3d/plots/motorcycle_pr.pdf ADDED
Binary file (12.1 kB). View file
 
20260315_224809/bboxes_3d/plots/motorcycle_tp.pdf ADDED
Binary file (12.4 kB). View file
 
20260315_224809/bboxes_3d/plots/pedestrian_pr.pdf ADDED
Binary file (12 kB). View file
 
20260315_224809/bboxes_3d/plots/pedestrian_tp.pdf ADDED
Binary file (12.5 kB). View file
 
20260315_224809/bboxes_3d/plots/summary.pdf ADDED
Binary file (31.7 kB). View file
 
20260315_224809/bboxes_3d/plots/traffic_cone_pr.pdf ADDED
Binary file (12.4 kB). View file
 
20260315_224809/bboxes_3d/plots/traffic_cone_tp.pdf ADDED
Binary file (12.7 kB). View file
 
20260315_224809/bboxes_3d/plots/trailer_pr.pdf ADDED
Binary file (11.8 kB). View file
 
20260315_224809/bboxes_3d/plots/trailer_tp.pdf ADDED
Binary file (11.9 kB). View file
 
20260315_224809/bboxes_3d/plots/truck_pr.pdf ADDED
Binary file (12.3 kB). View file
 
20260315_224809/bboxes_3d/plots/truck_tp.pdf ADDED
Binary file (12.4 kB). View file
 
20260315_224809/bboxes_3d/results_nusc.json ADDED
The diff for this file is too large to render. See raw diff
 
20260315_224809/logs.log ADDED
@@ -0,0 +1,1834 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026/03/15 22:48:11 - bevformer - INFO -
2
+ ------------------------------------------------------------
3
+ System environment:
4
+ sys.platform: darwin
5
+ Python: 3.10.0 (default, Oct 17 2021, 11:56:26) [Clang 13.0.0 ]
6
+ CUDA available: False
7
+ numpy_random_seed: 321
8
+ GCC: Apple clang version 15.0.0 (clang-1500.3.9.4)
9
+ PyTorch: 2.10.0
10
+ PyTorch compiling details: PyTorch built with:
11
+ - GCC 4.2
12
+ - C++ Version: 201703
13
+ - clang 15.0.0
14
+ - OpenMP 201811
15
+ - LAPACK is enabled (usually provided by MKL)
16
+ - NNPACK is enabled
17
+ - CPU capability usage: DEFAULT
18
+ - Build settings: BLAS_INFO=accelerate, BUILD_TYPE=Release, COMMIT_SHA=449b1768410104d3ed79d3bcfe4ba1d65c7f22c0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_PYTORCH_QNNPACK -DAT_BUILD_ARM_VEC256_WITH_SLEEF -DUSE_XNNPACK -DUSE_PYTORCH_METAL_EXPORT -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DUSE_COREML_DELEGATE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=braced-scalar-init -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wvla-extension -Wsuggest-override -Wnewline-eof -Winconsistent-missing-override -Winconsistent-missing-destructor-override -Wno-pass-failed -Wno-error=old-style-cast -Wconstant-conversion -Qunused-arguments -faligned-new -fno-math-errno -fno-trapping-math -Werror=format -DUSE_MPS -Wno-missing-braces, LAPACK_INFO=accelerate, TORCH_VERSION=2.10.0, USE_CUDA=OFF, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EIGEN_FOR_BLAS=ON, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=OFF, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF,
19
+
20
+ TorchVision: 0.25.0
21
+ OpenCV: 4.13.0
22
+ MMEngine: 0.8.5
23
+
24
+ Runtime environment:
25
+ dist_cfg: {'backend': 'nccl'}
26
+ seed: 321
27
+ Distributed launcher: none
28
+ Distributed training: False
29
+ GPU number: 1
30
+ ------------------------------------------------------------
31
+
32
+ 2026/03/15 22:48:11 - bevformer - INFO - Set random seed to 321, deterministic: False
33
+ 2026/03/15 22:48:11 - bevformer - INFO - Building model
34
+ 2026/03/15 22:48:12 - bevformer - INFO - Model architecture:
35
+ BEVFormerDetector(
36
+ (data_preprocessor): BaseDataPreprocessor()
37
+ (pts_bbox_head): BEVFormerHead(
38
+ (loss_cls): FocalLoss()
39
+ (loss_bbox): L1Loss()
40
+ (loss_iou): GIoULoss()
41
+ (transformer): PerceptionTransformer(
42
+ (encoder): BEVFormerEncoder(
43
+ (layers): ModuleList(
44
+ (0-2): 3 x BEVFormerLayer(
45
+ (attentions): ModuleList(
46
+ (0): TemporalSelfAttention(
47
+ (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
48
+ (attention_weights): Linear(in_features=512, out_features=64, bias=True)
49
+ (value_proj): Linear(in_features=256, out_features=256, bias=True)
50
+ (output_proj): Linear(in_features=256, out_features=256, bias=True)
51
+ (dropout): Dropout(p=0.1, inplace=False)
52
+ )
53
+ (1): SpatialCrossAttention(
54
+ (deformable_attention): MSDeformableAttention3D(
55
+ (sampling_offsets): Linear(in_features=256, out_features=128, bias=True)
56
+ (attention_weights): Linear(in_features=256, out_features=64, bias=True)
57
+ (value_proj): Linear(in_features=256, out_features=256, bias=True)
58
+ )
59
+ (output_proj): Linear(in_features=256, out_features=256, bias=True)
60
+ (dropout): Dropout(p=0.1, inplace=False)
61
+ )
62
+ )
63
+ (ffns): ModuleList(
64
+ (0): FFN(
65
+ (layers): Sequential(
66
+ (0): Sequential(
67
+ (0): Linear(in_features=256, out_features=512, bias=True)
68
+ (1): ReLU(inplace=True)
69
+ (2): Dropout(p=0.1, inplace=False)
70
+ )
71
+ (1): Linear(in_features=512, out_features=256, bias=True)
72
+ (2): Dropout(p=0.1, inplace=False)
73
+ )
74
+ (dropout_layer): Identity()
75
+ (gamma2): Identity()
76
+ )
77
+ )
78
+ (norms): ModuleList(
79
+ (0-2): 3 x LayerNorm((256,), eps=1e-05, elementwise_affine=True)
80
+ )
81
+ )
82
+ )
83
+ )
84
+ (decoder): DetectionTransformerDecoder(
85
+ (layers): ModuleList(
86
+ (0-5): 6 x DetrTransformerDecoderLayer(
87
+ (attentions): ModuleList(
88
+ (0): MultiheadAttention(
89
+ (q_proj): Linear(in_features=256, out_features=256, bias=True)
90
+ (k_proj): Linear(in_features=256, out_features=256, bias=True)
91
+ (v_proj): Linear(in_features=256, out_features=256, bias=True)
92
+ (output_proj): Linear(in_features=256, out_features=256, bias=True)
93
+ (dropout): Dropout(p=0.1, inplace=False)
94
+ )
95
+ (1): CustomMSDeformableAttention(
96
+ (sampling_offsets): Linear(in_features=256, out_features=64, bias=True)
97
+ (attention_weights): Linear(in_features=256, out_features=32, bias=True)
98
+ (value_proj): Linear(in_features=256, out_features=256, bias=True)
99
+ (output_proj): Linear(in_features=256, out_features=256, bias=True)
100
+ (dropout): Dropout(p=0.1, inplace=False)
101
+ )
102
+ )
103
+ (ffns): ModuleList(
104
+ (0): FFN(
105
+ (layers): Sequential(
106
+ (0): Sequential(
107
+ (0): Linear(in_features=256, out_features=512, bias=True)
108
+ (1): ReLU(inplace=True)
109
+ (2): Dropout(p=0.1, inplace=False)
110
+ )
111
+ (1): Linear(in_features=512, out_features=256, bias=True)
112
+ (2): Dropout(p=0.1, inplace=False)
113
+ )
114
+ (dropout_layer): Identity()
115
+ (gamma2): Identity()
116
+ )
117
+ )
118
+ (norms): ModuleList(
119
+ (0-2): 3 x LayerNorm((256,), eps=1e-05, elementwise_affine=True)
120
+ )
121
+ )
122
+ )
123
+ )
124
+ (reference_points): Linear(in_features=256, out_features=3, bias=True)
125
+ (can_bus_mlp): Sequential(
126
+ (0): Linear(in_features=18, out_features=128, bias=True)
127
+ (1): ReLU(inplace=True)
128
+ (2): Linear(in_features=128, out_features=256, bias=True)
129
+ (3): ReLU(inplace=True)
130
+ (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
131
+ )
132
+ )
133
+ (cls_branches): ModuleList(
134
+ (0-5): 6 x Sequential(
135
+ (0): Linear(in_features=256, out_features=256, bias=True)
136
+ (1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
137
+ (2): ReLU(inplace=True)
138
+ (3): Linear(in_features=256, out_features=10, bias=True)
139
+ )
140
+ )
141
+ (reg_branches): ModuleList(
142
+ (0-5): 6 x Sequential(
143
+ (0): Linear(in_features=256, out_features=256, bias=True)
144
+ (1): ReLU()
145
+ (2): Linear(in_features=256, out_features=256, bias=True)
146
+ (3): ReLU()
147
+ (4): Linear(in_features=256, out_features=10, bias=True)
148
+ )
149
+ )
150
+ (bev_embedding): Embedding(2500, 256)
151
+ (object_query_embedding): Embedding(900, 512)
152
+ (positional_encoding): LearnedPositionalEncoding(num_feats=128, row_num_embed=50, col_num_embed=50)
153
+ )
154
+ (img_backbone): ResNet(
155
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
156
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
157
+ (relu): ReLU(inplace=True)
158
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
159
+ (layer1): ResLayer(
160
+ (0): Bottleneck(
161
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
162
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
163
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
164
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
165
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
166
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
167
+ (relu): ReLU(inplace=True)
168
+ (downsample): Sequential(
169
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
170
+ (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
171
+ )
172
+ )
173
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
174
+ (1): Bottleneck(
175
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
176
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
177
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
178
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
179
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
180
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
181
+ (relu): ReLU(inplace=True)
182
+ )
183
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
184
+ (2): Bottleneck(
185
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
186
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
187
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
188
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
189
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
190
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
191
+ (relu): ReLU(inplace=True)
192
+ )
193
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
194
+ )
195
+ (layer2): ResLayer(
196
+ (0): Bottleneck(
197
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
198
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
199
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
200
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
201
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
202
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
203
+ (relu): ReLU(inplace=True)
204
+ (downsample): Sequential(
205
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
206
+ (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
207
+ )
208
+ )
209
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
210
+ (1): Bottleneck(
211
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
212
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
213
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
214
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
215
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
216
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
217
+ (relu): ReLU(inplace=True)
218
+ )
219
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
220
+ (2): Bottleneck(
221
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
222
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
223
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
224
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
225
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
226
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
227
+ (relu): ReLU(inplace=True)
228
+ )
229
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
230
+ (3): Bottleneck(
231
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
232
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
233
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
234
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
235
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
236
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
237
+ (relu): ReLU(inplace=True)
238
+ )
239
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
240
+ )
241
+ (layer3): ResLayer(
242
+ (0): Bottleneck(
243
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
244
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
245
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
246
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
247
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
248
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
249
+ (relu): ReLU(inplace=True)
250
+ (downsample): Sequential(
251
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
252
+ (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
253
+ )
254
+ )
255
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
256
+ (1): Bottleneck(
257
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
258
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
259
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
260
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
261
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
262
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
263
+ (relu): ReLU(inplace=True)
264
+ )
265
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
266
+ (2): Bottleneck(
267
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
268
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
269
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
270
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
271
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
272
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
273
+ (relu): ReLU(inplace=True)
274
+ )
275
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
276
+ (3): Bottleneck(
277
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
278
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
279
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
280
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
281
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
282
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
283
+ (relu): ReLU(inplace=True)
284
+ )
285
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
286
+ (4): Bottleneck(
287
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
288
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
289
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
290
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
291
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
292
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
293
+ (relu): ReLU(inplace=True)
294
+ )
295
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
296
+ (5): Bottleneck(
297
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
298
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
299
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
300
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
301
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
302
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
303
+ (relu): ReLU(inplace=True)
304
+ )
305
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
306
+ )
307
+ (layer4): ResLayer(
308
+ (0): Bottleneck(
309
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
310
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
311
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
312
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
313
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
314
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
315
+ (relu): ReLU(inplace=True)
316
+ (downsample): Sequential(
317
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
318
+ (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
319
+ )
320
+ )
321
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
322
+ (1): Bottleneck(
323
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
324
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
325
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
326
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
327
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
328
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
329
+ (relu): ReLU(inplace=True)
330
+ )
331
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
332
+ (2): Bottleneck(
333
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
334
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
335
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
336
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
337
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
338
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
339
+ (relu): ReLU(inplace=True)
340
+ )
341
+ init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
342
+ )
343
+ )
344
+ init_cfg=[{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
345
+ (img_neck): FPN(
346
+ (lateral_convs): ModuleList(
347
+ (0): ConvModule(
348
+ (conv): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
349
+ )
350
+ )
351
+ (fpn_convs): ModuleList(
352
+ (0): ConvModule(
353
+ (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
354
+ )
355
+ )
356
+ )
357
+ init_cfg={'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
358
+ (grid_mask): GridMask()
359
+ )
360
+ 2026/03/15 22:48:12 - bevformer - INFO - Wrapping model
361
+ 2026/03/15 22:48:12 - bevformer - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
362
+ 2026/03/15 22:48:12 - bevformer - INFO - Hooks will be executed in the following order:
363
+ before_run:
364
+ (VERY_HIGH ) RuntimeInfoHook
365
+ (BELOW_NORMAL) LoggerHook
366
+ (VERY_LOW ) CheckpointHookV2
367
+ (VERY_LOW ) CheckpointUploader
368
+ (VERY_LOW ) CheckpointResumer
369
+ --------------------
370
+ before_train:
371
+ (VERY_HIGH ) RuntimeInfoHook
372
+ (NORMAL ) IterTimerHook
373
+ (VERY_LOW ) CheckpointHookV2
374
+ (VERY_LOW ) CheckpointUploader
375
+ (VERY_LOW ) CheckpointResumer
376
+ --------------------
377
+ before_train_epoch:
378
+ (VERY_HIGH ) RuntimeInfoHook
379
+ (NORMAL ) IterTimerHook
380
+ (NORMAL ) DistSamplerSeedHook
381
+ --------------------
382
+ before_train_iter:
383
+ (VERY_HIGH ) RuntimeInfoHook
384
+ (NORMAL ) IterTimerHook
385
+ --------------------
386
+ after_train_iter:
387
+ (VERY_HIGH ) RuntimeInfoHook
388
+ (NORMAL ) IterTimerHook
389
+ (BELOW_NORMAL) LoggerHook
390
+ (LOW ) ParamSchedulerHook
391
+ (VERY_LOW ) CheckpointHookV2
392
+ (VERY_LOW ) CheckpointUploader
393
+ --------------------
394
+ after_train_epoch:
395
+ (NORMAL ) IterTimerHook
396
+ (LOW ) ParamSchedulerHook
397
+ (VERY_LOW ) CheckpointHookV2
398
+ (VERY_LOW ) CheckpointUploader
399
+ --------------------
400
+ before_val:
401
+ (VERY_HIGH ) RuntimeInfoHook
402
+ (VERY_LOW ) CheckpointHookV2
403
+ (VERY_LOW ) CheckpointResumer
404
+ --------------------
405
+ before_val_epoch:
406
+ (NORMAL ) IterTimerHook
407
+ --------------------
408
+ before_val_iter:
409
+ (NORMAL ) IterTimerHook
410
+ --------------------
411
+ after_val_iter:
412
+ (NORMAL ) IterTimerHook
413
+ (BELOW_NORMAL) LoggerHook
414
+ --------------------
415
+ after_val_epoch:
416
+ (VERY_HIGH ) RuntimeInfoHook
417
+ (NORMAL ) IterTimerHook
418
+ (BELOW_NORMAL) LoggerHook
419
+ (LOW ) ParamSchedulerHook
420
+ (VERY_LOW ) CheckpointHookV2
421
+ (VERY_LOW ) CheckpointUploader
422
+ --------------------
423
+ after_val:
424
+ (VERY_HIGH ) RuntimeInfoHook
425
+ --------------------
426
+ after_train:
427
+ (VERY_HIGH ) RuntimeInfoHook
428
+ (VERY_LOW ) CheckpointHookV2
429
+ (VERY_LOW ) CheckpointUploader
430
+ --------------------
431
+ before_test:
432
+ (VERY_HIGH ) RuntimeInfoHook
433
+ --------------------
434
+ before_test_epoch:
435
+ (NORMAL ) IterTimerHook
436
+ --------------------
437
+ before_test_iter:
438
+ (NORMAL ) IterTimerHook
439
+ --------------------
440
+ after_test_iter:
441
+ (NORMAL ) IterTimerHook
442
+ (BELOW_NORMAL) LoggerHook
443
+ --------------------
444
+ after_test_epoch:
445
+ (VERY_HIGH ) RuntimeInfoHook
446
+ (NORMAL ) IterTimerHook
447
+ (BELOW_NORMAL) LoggerHook
448
+ --------------------
449
+ after_test:
450
+ (VERY_HIGH ) RuntimeInfoHook
451
+ --------------------
452
+ after_run:
453
+ (BELOW_NORMAL) LoggerHook
454
+ --------------------
455
+ 2026/03/15 22:48:12 - bevformer - INFO - Config:
456
+ _dim_ = 256
457
+ _ffn_dim_ = 512
458
+ _num_levels_ = 1
459
+ _pos_dim_ = 128
460
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
461
+ bev_h_ = 50
462
+ bev_w_ = 50
463
+ by_epoch = False
464
+ class_names = [
465
+ 'car',
466
+ 'truck',
467
+ 'construction_vehicle',
468
+ 'bus',
469
+ 'trailer',
470
+ 'barrier',
471
+ 'motorcycle',
472
+ 'bicycle',
473
+ 'pedestrian',
474
+ 'traffic_cone',
475
+ ]
476
+ custom_hooks = [
477
+ dict(
478
+ by_epoch=False,
479
+ clean_local=False,
480
+ interval=1,
481
+ repo_id='5421Project',
482
+ type='CheckpointUploader'),
483
+ dict(repo_id='5421Project', resume_type='last', type='CheckpointResumer'),
484
+ ]
485
+ data = dict(
486
+ nonshuffler_sampler=dict(type='DistributedSampler'),
487
+ samples_per_gpu=1,
488
+ shuffler_sampler=dict(type='DistributedGroupSampler'),
489
+ test=dict(
490
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
491
+ bev_size=(
492
+ 50,
493
+ 50,
494
+ ),
495
+ classes=[
496
+ 'car',
497
+ 'truck',
498
+ 'construction_vehicle',
499
+ 'bus',
500
+ 'trailer',
501
+ 'barrier',
502
+ 'motorcycle',
503
+ 'bicycle',
504
+ 'pedestrian',
505
+ 'traffic_cone',
506
+ ],
507
+ data_root='data/nuscenes/v1.0-mini/',
508
+ frame=[
509
+ -3,
510
+ -2,
511
+ -1,
512
+ ],
513
+ modality=dict(
514
+ use_camera=True,
515
+ use_external=False,
516
+ use_lidar=False,
517
+ use_map=False,
518
+ use_radar=False),
519
+ pipeline=[
520
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
521
+ dict(
522
+ mean=[
523
+ 123.675,
524
+ 116.28,
525
+ 103.53,
526
+ ],
527
+ std=[
528
+ 58.395,
529
+ 57.12,
530
+ 57.375,
531
+ ],
532
+ to_rgb=True,
533
+ type='NormalizeMultiviewImage'),
534
+ dict(
535
+ flip=False,
536
+ img_scale=(
537
+ 800,
538
+ 450,
539
+ ),
540
+ pts_scale_ratio=[
541
+ 1.0,
542
+ ],
543
+ transforms=[
544
+ dict(
545
+ scales=[
546
+ 0.5,
547
+ ], type='RandomScaleImageMultiViewImage'),
548
+ dict(size_divisor=32, type='PadMultiViewImage'),
549
+ dict(
550
+ class_names=[
551
+ 'car',
552
+ 'truck',
553
+ 'construction_vehicle',
554
+ 'bus',
555
+ 'trailer',
556
+ 'barrier',
557
+ 'motorcycle',
558
+ 'bicycle',
559
+ 'pedestrian',
560
+ 'traffic_cone',
561
+ ],
562
+ type='CustomDefaultFormatBundle3D'),
563
+ dict(keys=[
564
+ 'img',
565
+ ], type='CustomCollect3D'),
566
+ ],
567
+ type='MultiScaleFlipAug3D'),
568
+ ],
569
+ test_mode=True,
570
+ type='CustomNuScenesDataset'),
571
+ train=dict(
572
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_train.pkl',
573
+ bev_size=(
574
+ 50,
575
+ 50,
576
+ ),
577
+ box_type_3d='LiDAR',
578
+ classes=[
579
+ 'car',
580
+ 'truck',
581
+ 'construction_vehicle',
582
+ 'bus',
583
+ 'trailer',
584
+ 'barrier',
585
+ 'motorcycle',
586
+ 'bicycle',
587
+ 'pedestrian',
588
+ 'traffic_cone',
589
+ ],
590
+ data_root='data/nuscenes/v1.0-mini/',
591
+ modality=dict(
592
+ use_camera=True,
593
+ use_external=False,
594
+ use_lidar=False,
595
+ use_map=False,
596
+ use_radar=False),
597
+ pipeline=[
598
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
599
+ dict(
600
+ type='LoadAnnotations3D',
601
+ with_bbox_3d=True,
602
+ with_label_3d=True),
603
+ dict(
604
+ point_cloud_range=[
605
+ -51.2,
606
+ -51.2,
607
+ -5.0,
608
+ 51.2,
609
+ 51.2,
610
+ 3.0,
611
+ ],
612
+ type='ObjectRangeFilter'),
613
+ dict(
614
+ classes=[
615
+ 'car',
616
+ 'truck',
617
+ 'construction_vehicle',
618
+ 'bus',
619
+ 'trailer',
620
+ 'barrier',
621
+ 'motorcycle',
622
+ 'bicycle',
623
+ 'pedestrian',
624
+ 'traffic_cone',
625
+ ],
626
+ type='ObjectNameFilter'),
627
+ dict(type='PhotoMetricDistortionMultiViewImage'),
628
+ dict(
629
+ mean=[
630
+ 123.675,
631
+ 116.28,
632
+ 103.53,
633
+ ],
634
+ std=[
635
+ 58.395,
636
+ 57.12,
637
+ 57.375,
638
+ ],
639
+ to_rgb=True,
640
+ type='NormalizeMultiviewImage'),
641
+ dict(scales=[
642
+ 0.5,
643
+ ], type='RandomScaleImageMultiViewImage'),
644
+ dict(size_divisor=32, type='PadMultiViewImage'),
645
+ dict(
646
+ class_names=[
647
+ 'car',
648
+ 'truck',
649
+ 'construction_vehicle',
650
+ 'bus',
651
+ 'trailer',
652
+ 'barrier',
653
+ 'motorcycle',
654
+ 'bicycle',
655
+ 'pedestrian',
656
+ 'traffic_cone',
657
+ ],
658
+ type='CustomDefaultFormatBundle3D'),
659
+ dict(
660
+ keys=[
661
+ 'gt_bboxes_3d',
662
+ 'gt_labels_3d',
663
+ 'img',
664
+ ],
665
+ type='CustomCollect3D'),
666
+ dict(type='TypeConverter'),
667
+ ],
668
+ queue_length=4,
669
+ test_mode=False,
670
+ type='CustomNuScenesDataset',
671
+ use_valid_flag=True),
672
+ val=dict(
673
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
674
+ bev_size=(
675
+ 50,
676
+ 50,
677
+ ),
678
+ classes=[
679
+ 'car',
680
+ 'truck',
681
+ 'construction_vehicle',
682
+ 'bus',
683
+ 'trailer',
684
+ 'barrier',
685
+ 'motorcycle',
686
+ 'bicycle',
687
+ 'pedestrian',
688
+ 'traffic_cone',
689
+ ],
690
+ data_root='data/nuscenes/v1.0-mini/',
691
+ frame=(),
692
+ frames=[
693
+ -3,
694
+ -2,
695
+ -1,
696
+ ],
697
+ modality=dict(
698
+ use_camera=True,
699
+ use_external=False,
700
+ use_lidar=False,
701
+ use_map=False,
702
+ use_radar=False),
703
+ pipeline=[
704
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
705
+ dict(
706
+ mean=[
707
+ 123.675,
708
+ 116.28,
709
+ 103.53,
710
+ ],
711
+ std=[
712
+ 58.395,
713
+ 57.12,
714
+ 57.375,
715
+ ],
716
+ to_rgb=True,
717
+ type='NormalizeMultiviewImage'),
718
+ dict(
719
+ flip=False,
720
+ img_scale=(
721
+ 800,
722
+ 450,
723
+ ),
724
+ pts_scale_ratio=[
725
+ 1.0,
726
+ ],
727
+ transforms=[
728
+ dict(
729
+ scales=[
730
+ 0.5,
731
+ ], type='RandomScaleImageMultiViewImage'),
732
+ dict(size_divisor=32, type='PadMultiViewImage'),
733
+ dict(
734
+ class_names=[
735
+ 'car',
736
+ 'truck',
737
+ 'construction_vehicle',
738
+ 'bus',
739
+ 'trailer',
740
+ 'barrier',
741
+ 'motorcycle',
742
+ 'bicycle',
743
+ 'pedestrian',
744
+ 'traffic_cone',
745
+ ],
746
+ type='CustomDefaultFormatBundle3D'),
747
+ dict(keys=[
748
+ 'img',
749
+ ], type='CustomCollect3D'),
750
+ ],
751
+ type='MultiScaleFlipAug3D'),
752
+ ],
753
+ samples_per_gpu=1,
754
+ test_mode=True,
755
+ type='CustomNuScenesDataset'),
756
+ workers_per_gpu=4)
757
+ data_root = 'data/nuscenes/v1.0-mini/'
758
+ dataset_type = 'CustomNuScenesDataset'
759
+ decoder = dict(
760
+ num_layers=6,
761
+ return_intermediate=True,
762
+ transformerlayers=dict(
763
+ attn_cfgs=[
764
+ dict(
765
+ dropout=0.1,
766
+ embed_dims=256,
767
+ num_heads=8,
768
+ type='MultiheadAttention'),
769
+ dict(
770
+ embed_dims=256,
771
+ num_levels=1,
772
+ type='CustomMSDeformableAttention'),
773
+ ],
774
+ ffn_cfgs=dict(
775
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
776
+ operation_order=(
777
+ 'self_attn',
778
+ 'norm',
779
+ 'cross_attn',
780
+ 'norm',
781
+ 'ffn',
782
+ 'norm',
783
+ ),
784
+ type='DetrTransformerDecoderLayer'),
785
+ type='DetectionTransformerDecoder')
786
+ default_hooks = dict(
787
+ checkpoint=dict(
788
+ by_epoch=False,
789
+ interval=1,
790
+ max_keep_ckpts=1,
791
+ save_best=[
792
+ 'loss',
793
+ 'mAP',
794
+ 'NDS',
795
+ ],
796
+ type='CheckpointHookV2'),
797
+ logger=dict(
798
+ interval=1,
799
+ interval_exp_name=1000,
800
+ log_metric_by_epoch=False,
801
+ type='LoggerHook'),
802
+ param_scheduler=dict(type='ParamSchedulerHook'),
803
+ runtime_info=dict(type='RuntimeInfoHook'),
804
+ sampler_seed=dict(type='DistSamplerSeedHook'),
805
+ timer=dict(type='IterTimerHook'))
806
+ encoder = dict(
807
+ num_layers=3,
808
+ num_points_in_pillar=8,
809
+ pc_range=[
810
+ -51.2,
811
+ -51.2,
812
+ -5.0,
813
+ 51.2,
814
+ 51.2,
815
+ 3.0,
816
+ ],
817
+ return_intermediate=False,
818
+ transformerlayers=dict(
819
+ attn_cfgs=[
820
+ dict(embed_dims=256, num_levels=1, type='TemporalSelfAttention'),
821
+ dict(
822
+ deformable_attention=dict(
823
+ embed_dims=256,
824
+ num_levels=1,
825
+ num_points=8,
826
+ type='MSDeformableAttention3D'),
827
+ embed_dims=256,
828
+ pc_range=[
829
+ -51.2,
830
+ -51.2,
831
+ -5.0,
832
+ 51.2,
833
+ 51.2,
834
+ 3.0,
835
+ ],
836
+ type='SpatialCrossAttention'),
837
+ ],
838
+ ffn_cfgs=dict(
839
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
840
+ operation_order=(
841
+ 'self_attn',
842
+ 'norm',
843
+ 'cross_attn',
844
+ 'norm',
845
+ 'ffn',
846
+ 'norm',
847
+ ),
848
+ type='BEVFormerLayer'),
849
+ type='BEVFormerEncoder')
850
+ env_cfg = dict(dist_cfg=dict(backend='nccl'))
851
+ experiment_name = 'debug'
852
+ file_client_args = dict(backend='disk')
853
+ frames = [
854
+ -3,
855
+ -2,
856
+ -1,
857
+ ]
858
+ gpu_ids = range(0, 1)
859
+ img_norm_cfg = dict(
860
+ mean=[
861
+ 123.675,
862
+ 116.28,
863
+ 103.53,
864
+ ],
865
+ std=[
866
+ 58.395,
867
+ 57.12,
868
+ 57.375,
869
+ ],
870
+ to_rgb=True)
871
+ input_modality = dict(
872
+ use_camera=True,
873
+ use_external=False,
874
+ use_lidar=False,
875
+ use_map=False,
876
+ use_radar=False)
877
+ interval = 1
878
+ launcher = 'none'
879
+ load_from = None
880
+ log_interval = 1
881
+ log_processor = dict(window_size=20)
882
+ lr_config = dict(
883
+ min_lr_ratio=0.001,
884
+ policy='CosineAnnealing',
885
+ warmup='linear',
886
+ warmup_iters=500,
887
+ warmup_ratio=0.3333333333333333)
888
+ max_epochs = 5
889
+ max_iters = 2
890
+ model = dict(
891
+ img_backbone=dict(
892
+ depth=50,
893
+ frozen_stages=1,
894
+ norm_cfg=dict(requires_grad=False, type='BN'),
895
+ norm_eval=True,
896
+ num_stages=4,
897
+ out_indices=(3, ),
898
+ style='pytorch',
899
+ type='ResNet'),
900
+ img_neck=dict(
901
+ add_extra_convs='on_output',
902
+ in_channels=[
903
+ 2048,
904
+ ],
905
+ num_outs=1,
906
+ out_channels=256,
907
+ relu_before_extra_convs=True,
908
+ start_level=0,
909
+ type='FPN'),
910
+ pretrained=dict(img='torchvision://resnet50'),
911
+ pts_bbox_head=dict(
912
+ as_two_stage=False,
913
+ bbox_coder=dict(
914
+ max_num=300,
915
+ num_classes=10,
916
+ pc_range=[
917
+ -51.2,
918
+ -51.2,
919
+ -5.0,
920
+ 51.2,
921
+ 51.2,
922
+ 3.0,
923
+ ],
924
+ post_center_range=[
925
+ -61.2,
926
+ -61.2,
927
+ -10.0,
928
+ 61.2,
929
+ 61.2,
930
+ 10.0,
931
+ ],
932
+ type='NMSFreeCoder',
933
+ voxel_size=[
934
+ 0.2,
935
+ 0.2,
936
+ 8,
937
+ ]),
938
+ bev_h=50,
939
+ bev_w=50,
940
+ in_channels=256,
941
+ loss_bbox=dict(loss_weight=0.5, type='L1Loss'),
942
+ loss_cls=dict(
943
+ alpha=0.25,
944
+ gamma=2.0,
945
+ loss_weight=2.0,
946
+ type='FocalLoss',
947
+ use_sigmoid=True),
948
+ loss_iou=dict(loss_weight=0.25, type='GIoULoss'),
949
+ num_classes=10,
950
+ num_query=900,
951
+ positional_encoding=dict(
952
+ col_num_embed=50,
953
+ num_feats=128,
954
+ row_num_embed=50,
955
+ type='LearnedPositionalEncoding'),
956
+ sync_cls_avg_factor=True,
957
+ transformer=dict(
958
+ decoder=dict(
959
+ num_layers=6,
960
+ return_intermediate=True,
961
+ transformerlayers=dict(
962
+ attn_cfgs=[
963
+ dict(
964
+ dropout=0.1,
965
+ embed_dims=256,
966
+ num_heads=8,
967
+ type='MultiheadAttention'),
968
+ dict(
969
+ embed_dims=256,
970
+ num_levels=1,
971
+ type='CustomMSDeformableAttention'),
972
+ ],
973
+ ffn_cfgs=dict(
974
+ feedforward_channels=512,
975
+ ffn_drop=0.1,
976
+ num_fcs=2,
977
+ type='FFN'),
978
+ operation_order=(
979
+ 'self_attn',
980
+ 'norm',
981
+ 'cross_attn',
982
+ 'norm',
983
+ 'ffn',
984
+ 'norm',
985
+ ),
986
+ type='DetrTransformerDecoderLayer'),
987
+ type='DetectionTransformerDecoder'),
988
+ embed_dims=256,
989
+ encoder=dict(
990
+ num_layers=3,
991
+ num_points_in_pillar=8,
992
+ pc_range=[
993
+ -51.2,
994
+ -51.2,
995
+ -5.0,
996
+ 51.2,
997
+ 51.2,
998
+ 3.0,
999
+ ],
1000
+ return_intermediate=False,
1001
+ transformerlayers=dict(
1002
+ attn_cfgs=[
1003
+ dict(
1004
+ embed_dims=256,
1005
+ num_levels=1,
1006
+ type='TemporalSelfAttention'),
1007
+ dict(
1008
+ deformable_attention=dict(
1009
+ embed_dims=256,
1010
+ num_levels=1,
1011
+ num_points=8,
1012
+ type='MSDeformableAttention3D'),
1013
+ embed_dims=256,
1014
+ pc_range=[
1015
+ -51.2,
1016
+ -51.2,
1017
+ -5.0,
1018
+ 51.2,
1019
+ 51.2,
1020
+ 3.0,
1021
+ ],
1022
+ type='SpatialCrossAttention'),
1023
+ ],
1024
+ ffn_cfgs=dict(
1025
+ feedforward_channels=512,
1026
+ ffn_drop=0.1,
1027
+ num_fcs=2,
1028
+ type='FFN'),
1029
+ operation_order=(
1030
+ 'self_attn',
1031
+ 'norm',
1032
+ 'cross_attn',
1033
+ 'norm',
1034
+ 'ffn',
1035
+ 'norm',
1036
+ ),
1037
+ type='BEVFormerLayer'),
1038
+ type='BEVFormerEncoder'),
1039
+ num_cams=6,
1040
+ num_feature_levels=1,
1041
+ rotate_prev_bev=True,
1042
+ type='PerceptionTransformer',
1043
+ use_can_bus=True,
1044
+ use_shift=True),
1045
+ type='BEVFormerHead',
1046
+ with_box_refine=True),
1047
+ train_cfg=dict(
1048
+ pts=dict(
1049
+ assigner=dict(
1050
+ cls_cost=dict(type='FocalCost', weight=2.0),
1051
+ iou_cost=dict(type='SmoothL1Cost', weight=0.25),
1052
+ pc_range=[
1053
+ -51.2,
1054
+ -51.2,
1055
+ -5.0,
1056
+ 51.2,
1057
+ 51.2,
1058
+ 3.0,
1059
+ ],
1060
+ reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
1061
+ type='HungarianAssigner3D'),
1062
+ grid_size=[
1063
+ 512,
1064
+ 512,
1065
+ 1,
1066
+ ],
1067
+ out_size_factor=4,
1068
+ point_cloud_range=[
1069
+ -51.2,
1070
+ -51.2,
1071
+ -5.0,
1072
+ 51.2,
1073
+ 51.2,
1074
+ 3.0,
1075
+ ],
1076
+ voxel_size=[
1077
+ 0.2,
1078
+ 0.2,
1079
+ 8,
1080
+ ])),
1081
+ type='BEVFormerDetector',
1082
+ use_grid_mask=True,
1083
+ video_test_mode=True)
1084
+ optim_wrapper = dict(
1085
+ optimizer=dict(lr=0.0001, type='AdamW', weight_decay=0.01),
1086
+ type='OptimWrapper')
1087
+ optimizer = dict(lr=0.0001, type='AdamW', weight_decay=0.01)
1088
+ param_scheduler = dict(
1089
+ milestones=[
1090
+ 1,
1091
+ 2,
1092
+ ], type='MultiStepLR')
1093
+ point_cloud_range = [
1094
+ -51.2,
1095
+ -51.2,
1096
+ -5.0,
1097
+ 51.2,
1098
+ 51.2,
1099
+ 3.0,
1100
+ ]
1101
+ pts_bbox_head = dict(
1102
+ as_two_stage=False,
1103
+ bbox_coder=dict(
1104
+ max_num=300,
1105
+ num_classes=10,
1106
+ pc_range=[
1107
+ -51.2,
1108
+ -51.2,
1109
+ -5.0,
1110
+ 51.2,
1111
+ 51.2,
1112
+ 3.0,
1113
+ ],
1114
+ post_center_range=[
1115
+ -61.2,
1116
+ -61.2,
1117
+ -10.0,
1118
+ 61.2,
1119
+ 61.2,
1120
+ 10.0,
1121
+ ],
1122
+ type='NMSFreeCoder',
1123
+ voxel_size=[
1124
+ 0.2,
1125
+ 0.2,
1126
+ 8,
1127
+ ]),
1128
+ bev_h=50,
1129
+ bev_w=50,
1130
+ in_channels=256,
1131
+ loss_bbox=dict(loss_weight=0.5, type='L1Loss'),
1132
+ loss_cls=dict(
1133
+ alpha=0.25,
1134
+ gamma=2.0,
1135
+ loss_weight=2.0,
1136
+ type='FocalLoss',
1137
+ use_sigmoid=True),
1138
+ loss_iou=dict(loss_weight=0.25, type='GIoULoss'),
1139
+ num_classes=10,
1140
+ num_query=900,
1141
+ positional_encoding=dict(
1142
+ col_num_embed=50,
1143
+ num_feats=128,
1144
+ row_num_embed=50,
1145
+ type='LearnedPositionalEncoding'),
1146
+ sync_cls_avg_factor=True,
1147
+ transformer=dict(
1148
+ decoder=dict(
1149
+ num_layers=6,
1150
+ return_intermediate=True,
1151
+ transformerlayers=dict(
1152
+ attn_cfgs=[
1153
+ dict(
1154
+ dropout=0.1,
1155
+ embed_dims=256,
1156
+ num_heads=8,
1157
+ type='MultiheadAttention'),
1158
+ dict(
1159
+ embed_dims=256,
1160
+ num_levels=1,
1161
+ type='CustomMSDeformableAttention'),
1162
+ ],
1163
+ ffn_cfgs=dict(
1164
+ feedforward_channels=512,
1165
+ ffn_drop=0.1,
1166
+ num_fcs=2,
1167
+ type='FFN'),
1168
+ operation_order=(
1169
+ 'self_attn',
1170
+ 'norm',
1171
+ 'cross_attn',
1172
+ 'norm',
1173
+ 'ffn',
1174
+ 'norm',
1175
+ ),
1176
+ type='DetrTransformerDecoderLayer'),
1177
+ type='DetectionTransformerDecoder'),
1178
+ embed_dims=256,
1179
+ encoder=dict(
1180
+ num_layers=3,
1181
+ num_points_in_pillar=8,
1182
+ pc_range=[
1183
+ -51.2,
1184
+ -51.2,
1185
+ -5.0,
1186
+ 51.2,
1187
+ 51.2,
1188
+ 3.0,
1189
+ ],
1190
+ return_intermediate=False,
1191
+ transformerlayers=dict(
1192
+ attn_cfgs=[
1193
+ dict(
1194
+ embed_dims=256,
1195
+ num_levels=1,
1196
+ type='TemporalSelfAttention'),
1197
+ dict(
1198
+ deformable_attention=dict(
1199
+ embed_dims=256,
1200
+ num_levels=1,
1201
+ num_points=8,
1202
+ type='MSDeformableAttention3D'),
1203
+ embed_dims=256,
1204
+ pc_range=[
1205
+ -51.2,
1206
+ -51.2,
1207
+ -5.0,
1208
+ 51.2,
1209
+ 51.2,
1210
+ 3.0,
1211
+ ],
1212
+ type='SpatialCrossAttention'),
1213
+ ],
1214
+ ffn_cfgs=dict(
1215
+ feedforward_channels=512,
1216
+ ffn_drop=0.1,
1217
+ num_fcs=2,
1218
+ type='FFN'),
1219
+ operation_order=(
1220
+ 'self_attn',
1221
+ 'norm',
1222
+ 'cross_attn',
1223
+ 'norm',
1224
+ 'ffn',
1225
+ 'norm',
1226
+ ),
1227
+ type='BEVFormerLayer'),
1228
+ type='BEVFormerEncoder'),
1229
+ num_cams=6,
1230
+ num_feature_levels=1,
1231
+ rotate_prev_bev=True,
1232
+ type='PerceptionTransformer',
1233
+ use_can_bus=True,
1234
+ use_shift=True),
1235
+ type='BEVFormerHead',
1236
+ with_box_refine=True)
1237
+ queue_length = 4
1238
+ resume = False
1239
+ scales = [
1240
+ 0.5,
1241
+ ]
1242
+ test_cfg = dict(max_iters=1)
1243
+ test_dataloader = dict(
1244
+ batch_size=1,
1245
+ collate_fn=dict(type='test_collate'),
1246
+ dataset=dict(
1247
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
1248
+ bev_size=(
1249
+ 50,
1250
+ 50,
1251
+ ),
1252
+ classes=[
1253
+ 'car',
1254
+ 'truck',
1255
+ 'construction_vehicle',
1256
+ 'bus',
1257
+ 'trailer',
1258
+ 'barrier',
1259
+ 'motorcycle',
1260
+ 'bicycle',
1261
+ 'pedestrian',
1262
+ 'traffic_cone',
1263
+ ],
1264
+ data_root='data/nuscenes/v1.0-mini/',
1265
+ frame=[
1266
+ -3,
1267
+ -2,
1268
+ -1,
1269
+ ],
1270
+ modality=dict(
1271
+ use_camera=True,
1272
+ use_external=False,
1273
+ use_lidar=False,
1274
+ use_map=False,
1275
+ use_radar=False),
1276
+ pipeline=[
1277
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
1278
+ dict(
1279
+ mean=[
1280
+ 123.675,
1281
+ 116.28,
1282
+ 103.53,
1283
+ ],
1284
+ std=[
1285
+ 58.395,
1286
+ 57.12,
1287
+ 57.375,
1288
+ ],
1289
+ to_rgb=True,
1290
+ type='NormalizeMultiviewImage'),
1291
+ dict(
1292
+ flip=False,
1293
+ img_scale=(
1294
+ 800,
1295
+ 450,
1296
+ ),
1297
+ pts_scale_ratio=[
1298
+ 1.0,
1299
+ ],
1300
+ transforms=[
1301
+ dict(
1302
+ scales=[
1303
+ 0.5,
1304
+ ], type='RandomScaleImageMultiViewImage'),
1305
+ dict(size_divisor=32, type='PadMultiViewImage'),
1306
+ dict(
1307
+ class_names=[
1308
+ 'car',
1309
+ 'truck',
1310
+ 'construction_vehicle',
1311
+ 'bus',
1312
+ 'trailer',
1313
+ 'barrier',
1314
+ 'motorcycle',
1315
+ 'bicycle',
1316
+ 'pedestrian',
1317
+ 'traffic_cone',
1318
+ ],
1319
+ type='CustomDefaultFormatBundle3D'),
1320
+ dict(keys=[
1321
+ 'img',
1322
+ ], type='CustomCollect3D'),
1323
+ ],
1324
+ type='MultiScaleFlipAug3D'),
1325
+ ],
1326
+ test_mode=True,
1327
+ type='CustomNuScenesDataset'),
1328
+ num_workers=0,
1329
+ sampler=dict(shuffle=True, type='DefaultSampler'))
1330
+ test_evaluator = dict(metrics=[
1331
+ dict(
1332
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
1333
+ data_root='data/nuscenes/v1.0-mini/',
1334
+ type='src.NuScenesMetric',
1335
+ version='v1.0-mini'),
1336
+ ])
1337
+ test_max_iters = 1
1338
+ test_pipeline = [
1339
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
1340
+ dict(
1341
+ mean=[
1342
+ 123.675,
1343
+ 116.28,
1344
+ 103.53,
1345
+ ],
1346
+ std=[
1347
+ 58.395,
1348
+ 57.12,
1349
+ 57.375,
1350
+ ],
1351
+ to_rgb=True,
1352
+ type='NormalizeMultiviewImage'),
1353
+ dict(
1354
+ flip=False,
1355
+ img_scale=(
1356
+ 800,
1357
+ 450,
1358
+ ),
1359
+ pts_scale_ratio=[
1360
+ 1.0,
1361
+ ],
1362
+ transforms=[
1363
+ dict(scales=[
1364
+ 0.5,
1365
+ ], type='RandomScaleImageMultiViewImage'),
1366
+ dict(size_divisor=32, type='PadMultiViewImage'),
1367
+ dict(
1368
+ class_names=[
1369
+ 'car',
1370
+ 'truck',
1371
+ 'construction_vehicle',
1372
+ 'bus',
1373
+ 'trailer',
1374
+ 'barrier',
1375
+ 'motorcycle',
1376
+ 'bicycle',
1377
+ 'pedestrian',
1378
+ 'traffic_cone',
1379
+ ],
1380
+ type='CustomDefaultFormatBundle3D'),
1381
+ dict(keys=[
1382
+ 'img',
1383
+ ], type='CustomCollect3D'),
1384
+ ],
1385
+ type='MultiScaleFlipAug3D'),
1386
+ ]
1387
+ train_cfg = dict(by_epoch=False, max_epochs=5, max_iters=2, val_interval=1)
1388
+ train_dataloader = dict(
1389
+ batch_size=1,
1390
+ collate_fn=dict(type='train_collate'),
1391
+ dataset=dict(
1392
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_train.pkl',
1393
+ bev_size=(
1394
+ 50,
1395
+ 50,
1396
+ ),
1397
+ box_type_3d='LiDAR',
1398
+ classes=[
1399
+ 'car',
1400
+ 'truck',
1401
+ 'construction_vehicle',
1402
+ 'bus',
1403
+ 'trailer',
1404
+ 'barrier',
1405
+ 'motorcycle',
1406
+ 'bicycle',
1407
+ 'pedestrian',
1408
+ 'traffic_cone',
1409
+ ],
1410
+ data_root='data/nuscenes/v1.0-mini/',
1411
+ modality=dict(
1412
+ use_camera=True,
1413
+ use_external=False,
1414
+ use_lidar=False,
1415
+ use_map=False,
1416
+ use_radar=False),
1417
+ pipeline=[
1418
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
1419
+ dict(
1420
+ type='LoadAnnotations3D',
1421
+ with_bbox_3d=True,
1422
+ with_label_3d=True),
1423
+ dict(
1424
+ point_cloud_range=[
1425
+ -51.2,
1426
+ -51.2,
1427
+ -5.0,
1428
+ 51.2,
1429
+ 51.2,
1430
+ 3.0,
1431
+ ],
1432
+ type='ObjectRangeFilter'),
1433
+ dict(
1434
+ classes=[
1435
+ 'car',
1436
+ 'truck',
1437
+ 'construction_vehicle',
1438
+ 'bus',
1439
+ 'trailer',
1440
+ 'barrier',
1441
+ 'motorcycle',
1442
+ 'bicycle',
1443
+ 'pedestrian',
1444
+ 'traffic_cone',
1445
+ ],
1446
+ type='ObjectNameFilter'),
1447
+ dict(type='PhotoMetricDistortionMultiViewImage'),
1448
+ dict(
1449
+ mean=[
1450
+ 123.675,
1451
+ 116.28,
1452
+ 103.53,
1453
+ ],
1454
+ std=[
1455
+ 58.395,
1456
+ 57.12,
1457
+ 57.375,
1458
+ ],
1459
+ to_rgb=True,
1460
+ type='NormalizeMultiviewImage'),
1461
+ dict(scales=[
1462
+ 0.5,
1463
+ ], type='RandomScaleImageMultiViewImage'),
1464
+ dict(size_divisor=32, type='PadMultiViewImage'),
1465
+ dict(
1466
+ class_names=[
1467
+ 'car',
1468
+ 'truck',
1469
+ 'construction_vehicle',
1470
+ 'bus',
1471
+ 'trailer',
1472
+ 'barrier',
1473
+ 'motorcycle',
1474
+ 'bicycle',
1475
+ 'pedestrian',
1476
+ 'traffic_cone',
1477
+ ],
1478
+ type='CustomDefaultFormatBundle3D'),
1479
+ dict(
1480
+ keys=[
1481
+ 'gt_bboxes_3d',
1482
+ 'gt_labels_3d',
1483
+ 'img',
1484
+ ],
1485
+ type='CustomCollect3D'),
1486
+ dict(type='TypeConverter'),
1487
+ ],
1488
+ queue_length=4,
1489
+ test_mode=False,
1490
+ type='CustomNuScenesDataset',
1491
+ use_valid_flag=True),
1492
+ num_workers=0,
1493
+ sampler=dict(shuffle=True, type='DefaultSampler'))
1494
+ train_pipeline = [
1495
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
1496
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
1497
+ dict(
1498
+ point_cloud_range=[
1499
+ -51.2,
1500
+ -51.2,
1501
+ -5.0,
1502
+ 51.2,
1503
+ 51.2,
1504
+ 3.0,
1505
+ ],
1506
+ type='ObjectRangeFilter'),
1507
+ dict(
1508
+ classes=[
1509
+ 'car',
1510
+ 'truck',
1511
+ 'construction_vehicle',
1512
+ 'bus',
1513
+ 'trailer',
1514
+ 'barrier',
1515
+ 'motorcycle',
1516
+ 'bicycle',
1517
+ 'pedestrian',
1518
+ 'traffic_cone',
1519
+ ],
1520
+ type='ObjectNameFilter'),
1521
+ dict(type='PhotoMetricDistortionMultiViewImage'),
1522
+ dict(
1523
+ mean=[
1524
+ 123.675,
1525
+ 116.28,
1526
+ 103.53,
1527
+ ],
1528
+ std=[
1529
+ 58.395,
1530
+ 57.12,
1531
+ 57.375,
1532
+ ],
1533
+ to_rgb=True,
1534
+ type='NormalizeMultiviewImage'),
1535
+ dict(scales=[
1536
+ 0.5,
1537
+ ], type='RandomScaleImageMultiViewImage'),
1538
+ dict(size_divisor=32, type='PadMultiViewImage'),
1539
+ dict(
1540
+ class_names=[
1541
+ 'car',
1542
+ 'truck',
1543
+ 'construction_vehicle',
1544
+ 'bus',
1545
+ 'trailer',
1546
+ 'barrier',
1547
+ 'motorcycle',
1548
+ 'bicycle',
1549
+ 'pedestrian',
1550
+ 'traffic_cone',
1551
+ ],
1552
+ type='CustomDefaultFormatBundle3D'),
1553
+ dict(
1554
+ keys=[
1555
+ 'gt_bboxes_3d',
1556
+ 'gt_labels_3d',
1557
+ 'img',
1558
+ ], type='CustomCollect3D'),
1559
+ dict(type='TypeConverter'),
1560
+ ]
1561
+ transformer = dict(
1562
+ decoder=dict(
1563
+ num_layers=6,
1564
+ return_intermediate=True,
1565
+ transformerlayers=dict(
1566
+ attn_cfgs=[
1567
+ dict(
1568
+ dropout=0.1,
1569
+ embed_dims=256,
1570
+ num_heads=8,
1571
+ type='MultiheadAttention'),
1572
+ dict(
1573
+ embed_dims=256,
1574
+ num_levels=1,
1575
+ type='CustomMSDeformableAttention'),
1576
+ ],
1577
+ ffn_cfgs=dict(
1578
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
1579
+ operation_order=(
1580
+ 'self_attn',
1581
+ 'norm',
1582
+ 'cross_attn',
1583
+ 'norm',
1584
+ 'ffn',
1585
+ 'norm',
1586
+ ),
1587
+ type='DetrTransformerDecoderLayer'),
1588
+ type='DetectionTransformerDecoder'),
1589
+ embed_dims=256,
1590
+ encoder=dict(
1591
+ num_layers=3,
1592
+ num_points_in_pillar=8,
1593
+ pc_range=[
1594
+ -51.2,
1595
+ -51.2,
1596
+ -5.0,
1597
+ 51.2,
1598
+ 51.2,
1599
+ 3.0,
1600
+ ],
1601
+ return_intermediate=False,
1602
+ transformerlayers=dict(
1603
+ attn_cfgs=[
1604
+ dict(
1605
+ embed_dims=256, num_levels=1,
1606
+ type='TemporalSelfAttention'),
1607
+ dict(
1608
+ deformable_attention=dict(
1609
+ embed_dims=256,
1610
+ num_levels=1,
1611
+ num_points=8,
1612
+ type='MSDeformableAttention3D'),
1613
+ embed_dims=256,
1614
+ pc_range=[
1615
+ -51.2,
1616
+ -51.2,
1617
+ -5.0,
1618
+ 51.2,
1619
+ 51.2,
1620
+ 3.0,
1621
+ ],
1622
+ type='SpatialCrossAttention'),
1623
+ ],
1624
+ ffn_cfgs=dict(
1625
+ feedforward_channels=512, ffn_drop=0.1, num_fcs=2, type='FFN'),
1626
+ operation_order=(
1627
+ 'self_attn',
1628
+ 'norm',
1629
+ 'cross_attn',
1630
+ 'norm',
1631
+ 'ffn',
1632
+ 'norm',
1633
+ ),
1634
+ type='BEVFormerLayer'),
1635
+ type='BEVFormerEncoder'),
1636
+ num_cams=6,
1637
+ num_feature_levels=1,
1638
+ rotate_prev_bev=True,
1639
+ type='PerceptionTransformer',
1640
+ use_can_bus=True,
1641
+ use_shift=True)
1642
+ val_cfg = dict(max_iters=1)
1643
+ val_dataloader = dict(
1644
+ batch_size=1,
1645
+ collate_fn=dict(type='test_collate'),
1646
+ dataset=dict(
1647
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
1648
+ bev_size=(
1649
+ 50,
1650
+ 50,
1651
+ ),
1652
+ classes=[
1653
+ 'car',
1654
+ 'truck',
1655
+ 'construction_vehicle',
1656
+ 'bus',
1657
+ 'trailer',
1658
+ 'barrier',
1659
+ 'motorcycle',
1660
+ 'bicycle',
1661
+ 'pedestrian',
1662
+ 'traffic_cone',
1663
+ ],
1664
+ data_root='data/nuscenes/v1.0-mini/',
1665
+ frame=(),
1666
+ frames=[
1667
+ -3,
1668
+ -2,
1669
+ -1,
1670
+ ],
1671
+ modality=dict(
1672
+ use_camera=True,
1673
+ use_external=False,
1674
+ use_lidar=False,
1675
+ use_map=False,
1676
+ use_radar=False),
1677
+ pipeline=[
1678
+ dict(to_float32=True, type='LoadMultiViewImageFromFiles'),
1679
+ dict(
1680
+ mean=[
1681
+ 123.675,
1682
+ 116.28,
1683
+ 103.53,
1684
+ ],
1685
+ std=[
1686
+ 58.395,
1687
+ 57.12,
1688
+ 57.375,
1689
+ ],
1690
+ to_rgb=True,
1691
+ type='NormalizeMultiviewImage'),
1692
+ dict(
1693
+ flip=False,
1694
+ img_scale=(
1695
+ 800,
1696
+ 450,
1697
+ ),
1698
+ pts_scale_ratio=[
1699
+ 1.0,
1700
+ ],
1701
+ transforms=[
1702
+ dict(
1703
+ scales=[
1704
+ 0.5,
1705
+ ], type='RandomScaleImageMultiViewImage'),
1706
+ dict(size_divisor=32, type='PadMultiViewImage'),
1707
+ dict(
1708
+ class_names=[
1709
+ 'car',
1710
+ 'truck',
1711
+ 'construction_vehicle',
1712
+ 'bus',
1713
+ 'trailer',
1714
+ 'barrier',
1715
+ 'motorcycle',
1716
+ 'bicycle',
1717
+ 'pedestrian',
1718
+ 'traffic_cone',
1719
+ ],
1720
+ type='CustomDefaultFormatBundle3D'),
1721
+ dict(keys=[
1722
+ 'img',
1723
+ ], type='CustomCollect3D'),
1724
+ ],
1725
+ type='MultiScaleFlipAug3D'),
1726
+ ],
1727
+ samples_per_gpu=1,
1728
+ test_mode=True,
1729
+ type='CustomNuScenesDataset'),
1730
+ num_workers=0,
1731
+ sampler=dict(shuffle=True, type='DefaultSampler'))
1732
+ val_evaluator = dict(metrics=[
1733
+ dict(
1734
+ ann_file='data/nuscenes/v1.0-mini/nuscenes_infos_temporal_val.pkl',
1735
+ classes=[
1736
+ 'car',
1737
+ 'truck',
1738
+ 'construction_vehicle',
1739
+ 'bus',
1740
+ 'trailer',
1741
+ 'barrier',
1742
+ 'motorcycle',
1743
+ 'bicycle',
1744
+ 'pedestrian',
1745
+ 'traffic_cone',
1746
+ ],
1747
+ data_root='data/nuscenes/v1.0-mini/',
1748
+ jsonfile_prefix='results',
1749
+ modality=dict(
1750
+ use_camera=True,
1751
+ use_external=False,
1752
+ use_lidar=False,
1753
+ use_map=False,
1754
+ use_radar=False),
1755
+ plot_every_run=True,
1756
+ plot_examples=1,
1757
+ type='src.NuScenesMetric',
1758
+ version='v1.0-mini'),
1759
+ ])
1760
+ val_interval = 1
1761
+ val_max_iters = 1
1762
+ version = 'v1.0-mini'
1763
+ visualizer = dict(
1764
+ type='Visualizer',
1765
+ vis_backends=[
1766
+ dict(type='LocalVisBackend'),
1767
+ dict(type='TensorboardVisBackend'),
1768
+ ])
1769
+ voxel_size = [
1770
+ 0.2,
1771
+ 0.2,
1772
+ 8,
1773
+ ]
1774
+ work_dir = 'experiment'
1775
+
1776
+ 2026/03/15 22:48:13 - bevformer - INFO - See full config in 'experiment/debug/bevformer_tiny_test.py'.
1777
+ 2026/03/15 22:48:15 - bevformer - INFO - Checkpoints will be saved to 'experiment/debug' after every 1 steps.
1778
+ 2026/03/15 22:48:15 - bevformer - INFO - Initialize best checkpoints by train phase.
1779
+ 2026/03/15 22:48:15 - bevformer - INFO - Set best path for 'loss' None.
1780
+ 2026/03/15 22:48:15 - bevformer - INFO - Set best path for 'mAP' None.
1781
+ 2026/03/15 22:48:15 - bevformer - INFO - Set best path for 'NDS' None.
1782
+ 2026/03/15 22:48:15 - bevformer - INFO - The best checkpoints will be saved to 'experiment/debug' based on ['loss', 'mAP', 'NDS'] with rules ['less', 'greater', 'greater'] after every 1 steps.
1783
+ 2026/03/15 22:48:15 - bevformer - INFO - Keep maximum 1 checkpoints in local.
1784
+ 2026/03/15 22:48:15 - bevformer - INFO - Checkpoints will be pushed to repo 'https://huggingface.co/5421Project/debug' after every 1 steps.
1785
+ 2026/03/15 22:48:39 - bevformer - INFO - Epoch(train) [1][ 1/323] lr: 1.0000e-04 eta: 0:00:24 time: 24.1245 data_time: 1.0970 loss: 58.3901 loss_cls: 2.3029 loss_bbox: 7.7319 d0.loss_cls: 2.2785 d0.loss_bbox: 7.3663 d1.loss_cls: 2.2856 d1.loss_bbox: 7.4108 d2.loss_cls: 2.2401 d2.loss_bbox: 7.3151 d3.loss_cls: 2.2908 d3.loss_bbox: 7.3724 d4.loss_cls: 2.3867 d4.loss_bbox: 7.4091
1786
+ 2026/03/15 22:48:39 - bevformer - INFO - Saving checkpoint at 1 iterations
1787
+ 2026/03/15 22:48:40 - bevformer - INFO - Saving best checkpoints...
1788
+ 2026/03/15 22:48:40 - bevformer - INFO - Set 'best_score_loss' to +/-inf as it is not in message hub.
1789
+ 2026/03/15 22:48:40 - bevformer - INFO - [loss]: Best score: inf, current score: 58.39009094238281
1790
+ 2026/03/15 22:48:41 - bevformer - INFO - The best checkpoint with 58.3901 loss at 1 iter is saved to 'best_loss_iter_1.pth'.
1791
+ 2026/03/15 22:48:41 - bevformer - INFO - Resaving checkpoint at 1 iter...
1792
+ 2026/03/15 22:48:42 - bevformer - INFO - Pushing checkpoint at 1 steps...
1793
+ 2026/03/15 22:48:52 - bevformer - INFO - Pushed last checkpoint 'experiment/debug/iter_1.pth' to repo
1794
+ 2026/03/15 22:48:56 - bevformer - INFO - Pushed best checkpoint 'best_loss_iter_1.pth' of [loss]...
1795
+ 2026/03/15 22:48:59 - bevformer - INFO - Epoch(val) [0][ 1/81] eta: 0:02:55 time: 2.1911 data_time: 0.6630
1796
+ 2026/03/15 22:49:10 - bevformer - INFO - Epoch(val) [0][81/81] NDS: 0.0185 mAP: 0.0002 data_time: 0.6630 time: 2.1911
1797
+ 2026/03/15 22:49:10 - bevformer - INFO - Save best checkpoints after val epoch.
1798
+ 2026/03/15 22:49:10 - bevformer - INFO - Saving best checkpoints...
1799
+ 2026/03/15 22:49:10 - bevformer - INFO - Set 'best_score_mAP' to +/-inf as it is not in message hub.
1800
+ 2026/03/15 22:49:10 - bevformer - INFO - [mAP]: Best score: -inf, current score: 0.00023853881585173065
1801
+ 2026/03/15 22:49:10 - bevformer - INFO - The best checkpoint with 0.0002 mAP at 1 iter is saved to 'best_mAP_iter_1.pth'.
1802
+ 2026/03/15 22:49:10 - bevformer - INFO - Set 'best_score_NDS' to +/-inf as it is not in message hub.
1803
+ 2026/03/15 22:49:10 - bevformer - INFO - [NDS]: Best score: -inf, current score: 0.018478272642075605
1804
+ 2026/03/15 22:49:11 - bevformer - INFO - The best checkpoint with 0.0185 NDS at 1 iter is saved to 'best_NDS_iter_1.pth'.
1805
+ 2026/03/15 22:49:11 - bevformer - INFO - Resaving checkpoint at 1 iter...
1806
+ 2026/03/15 22:49:14 - bevformer - INFO - Pushed best checkpoint 'best_mAP_iter_1.pth' of [mAP]...
1807
+ 2026/03/15 22:49:16 - bevformer - INFO - Pushed best checkpoint 'best_NDS_iter_1.pth' of [NDS]...
1808
+ 2026/03/15 22:49:40 - bevformer - INFO - Epoch(train) [1][ 2/323] lr: 1.0000e-04 eta: 0:00:00 time: 32.7943 data_time: 10.6893 loss: 54.7058 loss_cls: 2.2897 loss_bbox: 7.1520 d0.loss_cls: 2.2585 d0.loss_bbox: 6.7714 d1.loss_cls: 2.2267 d1.loss_bbox: 6.7523 d2.loss_cls: 2.1501 d2.loss_bbox: 6.7822 d3.loss_cls: 2.2182 d3.loss_bbox: 6.8959 d4.loss_cls: 2.2799 d4.loss_bbox: 6.9290
1809
+ 2026/03/15 22:49:40 - bevformer - INFO - Saving checkpoint at 2 iterations
1810
+ 2026/03/15 22:49:42 - bevformer - INFO - Saving best checkpoints...
1811
+ 2026/03/15 22:49:42 - bevformer - INFO - Got best score ['loss'] from message hub
1812
+ 2026/03/15 22:49:42 - bevformer - INFO - [loss]: Best score: 58.39009094238281, current score: 51.02156066894531
1813
+ 2026/03/15 22:49:42 - bevformer - INFO - The previous best checkpoint 'experiment/debug/best_loss_iter_1.pth' is removed
1814
+ 2026/03/15 22:49:42 - bevformer - INFO - The best checkpoint with 51.0216 loss at 2 iter is saved to 'best_loss_iter_2.pth'.
1815
+ 2026/03/15 22:49:42 - bevformer - INFO - Resaving checkpoint at 2 iter...
1816
+ 2026/03/15 22:49:44 - bevformer - INFO - Pushing checkpoint at 2 steps...
1817
+ 2026/03/15 22:49:47 - bevformer - INFO - Pushed last checkpoint 'experiment/debug/iter_2.pth' to repo
1818
+ 2026/03/15 22:49:48 - bevformer - INFO - Removed 'iter_1.pth' from repo
1819
+ 2026/03/15 22:49:53 - bevformer - INFO - Pushed best checkpoint 'best_loss_iter_2.pth' of [loss]...
1820
+ 2026/03/15 22:49:55 - bevformer - INFO - Removed 'best_loss_iter_1.pth' from repo
1821
+ 2026/03/15 22:50:00 - bevformer - INFO - Epoch(val) [0][ 1/81] eta: 0:03:12 time: 2.3014 data_time: 0.7300
1822
+ 2026/03/15 22:50:11 - bevformer - INFO - Epoch(val) [0][81/81] NDS: 0.0178 mAP: 0.0002 data_time: 0.7300 time: 2.3014
1823
+ 2026/03/15 22:50:11 - bevformer - INFO - Save best checkpoints after val epoch.
1824
+ 2026/03/15 22:50:11 - bevformer - INFO - Saving best checkpoints...
1825
+ 2026/03/15 22:50:11 - bevformer - INFO - Got best score ['mAP'] from message hub
1826
+ 2026/03/15 22:50:11 - bevformer - INFO - [mAP]: Best score: 0.00023853881585173065, current score: 0.0002481977075675005
1827
+ 2026/03/15 22:50:11 - bevformer - INFO - The previous best checkpoint 'experiment/debug/best_mAP_iter_1.pth' is removed
1828
+ 2026/03/15 22:50:12 - bevformer - INFO - The best checkpoint with 0.0002 mAP at 2 iter is saved to 'best_mAP_iter_2.pth'.
1829
+ 2026/03/15 22:50:12 - bevformer - INFO - Got best score ['NDS'] from message hub
1830
+ 2026/03/15 22:50:12 - bevformer - INFO - [NDS]: Best score: 0.018478272642075605, current score: 0.017786070826831646
1831
+ 2026/03/15 22:50:12 - bevformer - INFO - Resaving checkpoint at 2 iter...
1832
+ 2026/03/15 22:50:15 - bevformer - INFO - Pushed best checkpoint 'best_mAP_iter_2.pth' of [mAP]...
1833
+ 2026/03/15 22:50:16 - bevformer - INFO - Removed 'best_mAP_iter_1.pth' from repo
1834
+ 2026/03/15 22:50:17 - bevformer - INFO - Pushing visualizing data and safetensors to repo after training...