Yisen-Feng commited on
Commit
51a9675
·
verified ·
1 Parent(s): f44c7c5

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/config.txt +228 -0
  2. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/eval_results.txt +110 -0
  3. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/EVAL_GPU_0_all_losses_cls_loss/events.out.tfevents.1729874043.autodl-container-b3ec4da47b-bc5fbea1.663163.4 +3 -0
  4. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/EVAL_GPU_0_all_losses_reg_loss/events.out.tfevents.1729874043.autodl-container-b3ec4da47b-bc5fbea1.663163.5 +3 -0
  5. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/EVAL_GPU_0_all_losses_vtm_loss/events.out.tfevents.1729874043.autodl-container-b3ec4da47b-bc5fbea1.663163.6 +3 -0
  6. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/events.out.tfevents.1729871205.autodl-container-b3ec4da47b-bc5fbea1.663163.0 +3 -0
  7. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/events.out.tfevents.1729871205.autodl-container-b3ec4da47b-bc5fbea1.663164.0 +3 -0
  8. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/train_GPU_0_all_losses_cls_loss/events.out.tfevents.1729871427.autodl-container-b3ec4da47b-bc5fbea1.663163.1 +3 -0
  9. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/train_GPU_0_all_losses_reg_loss/events.out.tfevents.1729871427.autodl-container-b3ec4da47b-bc5fbea1.663163.2 +3 -0
  10. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/train_GPU_0_all_losses_vtm_loss/events.out.tfevents.1729871427.autodl-container-b3ec4da47b-bc5fbea1.663163.3 +3 -0
  11. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/model_3_21.966959215281364.pth.tar +3 -0
  12. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/nlq_predictions_epoch_val_top10_3.json +0 -0
  13. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/nlq_predictions_epoch_val_top10_3_noscore.json +0 -0
  14. ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/run.sh +1 -0
  15. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/config.txt +226 -0
  16. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/eval_results.txt +44 -0
  17. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/EVAL_GPU_0_all_losses_cls_loss/events.out.tfevents.1728637028.autodl-container-b3ec4da47b-bc5fbea1.33573.4 +3 -0
  18. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/EVAL_GPU_0_all_losses_reg_loss/events.out.tfevents.1728637028.autodl-container-b3ec4da47b-bc5fbea1.33573.5 +3 -0
  19. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/EVAL_GPU_0_all_losses_vtm_loss/events.out.tfevents.1728637028.autodl-container-b3ec4da47b-bc5fbea1.33573.6 +3 -0
  20. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632235.autodl-container-b3ec4da47b-bc5fbea1.31554.0 +3 -0
  21. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632235.autodl-container-b3ec4da47b-bc5fbea1.31555.0 +3 -0
  22. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632793.autodl-container-b3ec4da47b-bc5fbea1.33573.0 +3 -0
  23. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632793.autodl-container-b3ec4da47b-bc5fbea1.33574.0 +3 -0
  24. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/train_GPU_0_all_losses_cls_loss/events.out.tfevents.1728633033.autodl-container-b3ec4da47b-bc5fbea1.33573.1 +3 -0
  25. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/train_GPU_0_all_losses_reg_loss/events.out.tfevents.1728633033.autodl-container-b3ec4da47b-bc5fbea1.33573.2 +3 -0
  26. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/train_GPU_0_all_losses_vtm_loss/events.out.tfevents.1728633033.autodl-container-b3ec4da47b-bc5fbea1.33573.3 +3 -0
  27. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/model_2_26.834358523725836.pth.tar +3 -0
  28. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/nlq_predictions_epoch_val_top10_2.json +0 -0
  29. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/nlq_predictions_epoch_val_top10_2_noscore.json +0 -0
  30. ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/run.sh +1 -0
  31. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/config.txt +223 -0
  32. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/eval_results.txt +56 -0
  33. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726904924.autodl-container-b3ec4da47b-bc5fbea1.458057.0 +3 -0
  34. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726904924.autodl-container-b3ec4da47b-bc5fbea1.458059.0 +3 -0
  35. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726904924.autodl-container-b3ec4da47b-bc5fbea1.458060.0 +3 -0
  36. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906438.autodl-container-b3ec4da47b-bc5fbea1.460310.0 +3 -0
  37. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906439.autodl-container-b3ec4da47b-bc5fbea1.460307.0 +3 -0
  38. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906439.autodl-container-b3ec4da47b-bc5fbea1.460308.0 +3 -0
  39. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906439.autodl-container-b3ec4da47b-bc5fbea1.460309.0 +3 -0
  40. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/train_GPU_0_all_losses_cls_loss/events.out.tfevents.1726906838.autodl-container-b3ec4da47b-bc5fbea1.460307.1 +3 -0
  41. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/train_GPU_0_all_losses_reg_loss/events.out.tfevents.1726906838.autodl-container-b3ec4da47b-bc5fbea1.460307.2 +3 -0
  42. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/train_GPU_0_all_losses_vtm_loss/events.out.tfevents.1726906838.autodl-container-b3ec4da47b-bc5fbea1.460307.3 +3 -0
  43. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6.json +0 -0
  44. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6_Bayesian.json +0 -0
  45. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6_noscore.json +0 -0
  46. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6_noscore_Bayesian.json +0 -0
  47. goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/run.sh +1 -0
  48. pretrain_naq/egovlp/model_5_pretrain_egovlp.pth.tar +3 -0
  49. pretrain_naq/internvideo/model_7_pretrain.pth.tar +3 -0
  50. tacos/c3d/scratch/tacos_c3d_glove_weight1_5e-5_objectmambafinetune150/config.txt +228 -0
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/config.txt ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'dataset': {'classname_feat_concat': 'only',
2
+ 'classname_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/classname-clip-base/a_photo_of.pt',
3
+ 'default_fps': 30,
4
+ 'downsample_rate': 1,
5
+ 'enable_temporal_jittering': False,
6
+ 'feat_stride': 16.0,
7
+ 'fix_video_frames': 0,
8
+ 'input_txt_dim': 512,
9
+ 'input_vid_dim': 256,
10
+ 'json_file': 'ego4d_data/nlq_v1/ego4d_nlq_val_v1_lemma.jsonl',
11
+ 'lavila_caption_dir': '/root/autodl-tmp/data/ego4d/nlq/lavila/narration',
12
+ 'max_seq_len': 2560,
13
+ 'num_classes': 1,
14
+ 'num_frames': 16.0,
15
+ 'object_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/co-detr/class-score0.6-minnum10-lmdb',
16
+ 'object_feat_type': 'class-score',
17
+ 'text_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/SnAG/nlq_v1_clip_token_features',
18
+ 'train_jsonl_file': 'ego4d_data/nlq_v1/ego4d_nlq_train_v1_lemma_clean.jsonl',
19
+ 'val_jsonl_file': 'ego4d_data/nlq_v1/ego4d_nlq_val_v1_lemma.jsonl',
20
+ 'val_text_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/SnAG/nlq_v1_clip_token_features',
21
+ 'video_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/egovlp_lmdb'},
22
+ 'dataset_name': 'ego4d_multitask',
23
+ 'devices': 'cuda:0',
24
+ 'init_rand_seed': 12345678,
25
+ 'loader': {'batch_size': 4, 'num_workers': 4},
26
+ 'model': {'backbone_arch': [2, 4, 4, 0, 6],
27
+ 'backbone_type': 'ObjectMambaTransformer',
28
+ 'embd_dim': 512,
29
+ 'embd_kernel_size': 3,
30
+ 'embd_with_ln': True,
31
+ 'fpn_dim': 512,
32
+ 'fpn_start_level': 0,
33
+ 'fpn_type': 'identity',
34
+ 'fpn_with_ln': True,
35
+ 'generator': {'generator_type': 'point'},
36
+ 'head_dim': 512,
37
+ 'head_kernel_size': 3,
38
+ 'head_num_layers': 3,
39
+ 'head_with_ln': True,
40
+ 'input_txt_dim': 512,
41
+ 'input_vid_dim': 256,
42
+ 'max_buffer_len_factor': 4.0,
43
+ 'max_query': 230,
44
+ 'max_seq_len': 2560,
45
+ 'max_shot_num': 1800,
46
+ 'multiscale_encoder_cfg': [{'layer_cfg': {'mha_win_size': 9,
47
+ 'n_ds_strides': [2, 2],
48
+ 'n_embd': 256,
49
+ 'n_head': 4,
50
+ 'path_pdrop': 0.1},
51
+ 'layer_num': 6,
52
+ 'layer_type': 'TransformerBlock'},
53
+ {'layer_cfg': {'in_channels': [256,
54
+ 256,
55
+ 256,
56
+ 256,
57
+ 256,
58
+ 256,
59
+ 256],
60
+ 'out_channel': 256},
61
+ 'layer_num': 1,
62
+ 'layer_type': 'FPNLayernorm'}],
63
+ 'n_head': 4,
64
+ 'n_mha_win_size': 9,
65
+ 'nlq_heads_cfg': {'center_sample_radius': 1.5,
66
+ 'cls_head_cfg': {'empty_cls': [],
67
+ 'feat_dim': 256,
68
+ 'input_dim': 256,
69
+ 'kernel_size': 3,
70
+ 'num_classes': 1,
71
+ 'num_layers': 3,
72
+ 'prior_prob': 0.01,
73
+ 'with_ln': True},
74
+ 'duration_thresh': 0.001,
75
+ 'iou_threshold': 0.1,
76
+ 'loss_normalizer': 200,
77
+ 'loss_normalizer_momentum': 0.9,
78
+ 'max_seg_num': 5,
79
+ 'min_score': 0.001,
80
+ 'pre_nms_thresh': 0.001,
81
+ 'pre_nms_topk': 2000,
82
+ 'reg_head_cfg': {'feat_dim': 256,
83
+ 'fpn_levels': 7,
84
+ 'input_dim': 256,
85
+ 'kernel_size': 3,
86
+ 'num_layers': 3,
87
+ 'with_ln': True},
88
+ 'reg_loss_weight': 1.0,
89
+ 'train_label_smoothing': 0.1},
90
+ 'num_classes': 1,
91
+ 'obj_encoder_cfg': [{'layer_cfg': {'act': 'relu',
92
+ 'n_in': 512,
93
+ 'n_out': 256,
94
+ 'num_layer': 2},
95
+ 'layer_num': 1,
96
+ 'layer_type': 'MaskedConv1DLayer'},
97
+ {'layer_cfg': {'n_embd': 256, 'path_pdrop': 0.1},
98
+ 'layer_num': 4,
99
+ 'layer_type': 'ObjectEncoderBlock'}],
100
+ 'object_dim': 512,
101
+ 'object_use_cross_model': True,
102
+ 'object_win_size': 1,
103
+ 'regression_range': [[0, 4],
104
+ [2, 8],
105
+ [4, 16],
106
+ [8, 32],
107
+ [16, 64],
108
+ [32, 128],
109
+ [64, 10000]],
110
+ 'scale_factor': 2,
111
+ 'tasks': ['NLQ', 'VTM'],
112
+ 'test_cfg': {'duration_thresh': 0.001,
113
+ 'ext_score_file': None,
114
+ 'iou_threshold': 0.1,
115
+ 'max_seg_num': 5,
116
+ 'min_score': 0.001,
117
+ 'multiclass_nms': True,
118
+ 'nms_method': 'soft',
119
+ 'nms_sigma': 0.75,
120
+ 'pre_nms_thresh': 0.001,
121
+ 'pre_nms_topk': 2000,
122
+ 'test_num': 1,
123
+ 'test_start_epoch': 2,
124
+ 'voting_thresh': 0.9},
125
+ 'text_encoder_cfg': [{'layer_cfg': {'act': 'relu',
126
+ 'n_in': 512,
127
+ 'n_out': 256,
128
+ 'num_layer': 2},
129
+ 'layer_num': 1,
130
+ 'layer_type': 'MaskedConv1DLayer'},
131
+ {'layer_cfg': {'n_embd': 256,
132
+ 'n_head': 4,
133
+ 'path_pdrop': 0.1},
134
+ 'layer_num': 4,
135
+ 'layer_type': 'TransformerBlock'}],
136
+ 'train_cfg': {'box_loss_weight': 5.0,
137
+ 'center_sample': 'radius',
138
+ 'center_sample_radius': 1.5,
139
+ 'clip_grad_l2norm': 1.0,
140
+ 'cls_prior_prob': 0.01,
141
+ 'dropout': 0.0,
142
+ 'droppath': 0.1,
143
+ 'head_empty_cls': [],
144
+ 'init_loss_norm': 200,
145
+ 'iou_loss_weight': 1.0,
146
+ 'label_smoothing': 0.1,
147
+ 'loss_weight': 1.0,
148
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
149
+ 'num_decoder_layer': 6},
150
+ 'use_abs_pe': True,
151
+ 'use_lmha_in_fpn': True,
152
+ 'use_rel_pe': False,
153
+ 'video_encoder_cfg': [{'layer_cfg': {'act': 'relu',
154
+ 'kernel_size': 3,
155
+ 'n_hidden': 256,
156
+ 'n_in': 256,
157
+ 'n_out': 256,
158
+ 'num_layer': 2},
159
+ 'layer_num': 1,
160
+ 'layer_type': 'MaskedConv1DLayer'},
161
+ {'layer_cfg': {'mamba_arch': ['bimamba1',
162
+ 'mlp',
163
+ 'obj'],
164
+ 'n_embd': 256,
165
+ 'n_head': 4,
166
+ 'path_pdrop': 0.1},
167
+ 'layer_num': 4,
168
+ 'layer_type': 'ObjectMambaBlock'}],
169
+ 'vtm_heads_cfg': {'loss_weight': 1.5,
170
+ 'multiscale': False,
171
+ 'shot_aggregator_cfg': {'layer_cfg': {'cross_mixer_cfg': {'block_cfg': {'n_embd': 256,
172
+ 'n_head': 4},
173
+ 'block_type': 'MaskedMHA'},
174
+ 'num_layer': 1,
175
+ 'path_pdrop': 0.1,
176
+ 'query_num': 5,
177
+ 'self_mixer_cfg': {'block_cfg': {'n_embd': 256,
178
+ 'n_head': 4},
179
+ 'block_type': 'MaskedMHCA'}},
180
+ 'layer_type': 'QFormerLayer'},
181
+ 'similarity_head_cfg': {'layer_cfg': {'con_dim': 256,
182
+ 'x_dim': 256,
183
+ 'y_dim': 256},
184
+ 'layer_type': 'Cosine'},
185
+ 'soft_label': False}},
186
+ 'model_name': 'MultiTaskArch',
187
+ 'opt': {'backbone_lr_weight': 1,
188
+ 'epochs': 6,
189
+ 'learning_rate': 0.0004,
190
+ 'momentum': 0.9,
191
+ 'schedule_gamma': 0.1,
192
+ 'schedule_steps': [],
193
+ 'schedule_type': 'cosine',
194
+ 'type': 'AdamW',
195
+ 'warmup': True,
196
+ 'warmup_epochs': 4,
197
+ 'weight_decay': 0.05},
198
+ 'output_folder': '/root/autodl-tmp/model/GroundNLQ/ckpt/',
199
+ 'test_cfg': {'duration_thresh': 0.001,
200
+ 'ext_score_file': None,
201
+ 'iou_threshold': 0.1,
202
+ 'max_seg_num': 5,
203
+ 'min_score': 0.001,
204
+ 'multiclass_nms': True,
205
+ 'nms_method': 'soft',
206
+ 'nms_sigma': 0.75,
207
+ 'pre_nms_thresh': 0.001,
208
+ 'pre_nms_topk': 2000,
209
+ 'test_num': 1,
210
+ 'test_start_epoch': 2,
211
+ 'voting_thresh': 0.9},
212
+ 'track': 'goal_step',
213
+ 'train_cfg': {'box_loss_weight': 5.0,
214
+ 'center_sample': 'radius',
215
+ 'center_sample_radius': 1.5,
216
+ 'clip_grad_l2norm': 1.0,
217
+ 'cls_prior_prob': 0.01,
218
+ 'dropout': 0.0,
219
+ 'droppath': 0.1,
220
+ 'head_empty_cls': [],
221
+ 'init_loss_norm': 200,
222
+ 'iou_loss_weight': 1.0,
223
+ 'label_smoothing': 0.1,
224
+ 'loss_weight': 1.0,
225
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
226
+ 'num_decoder_layer': 6},
227
+ 'train_split': ['training'],
228
+ 'val_split': ['validation']}
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/eval_results.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ +----------+----------+----------+----------+----------+----------+
2
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
3
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
4
+ +----------+----------+----------+----------+----------+----------+
5
+ | 8.00 | 4.31 | 23.26 | 12.13 | 23.26 | 12.13 |
6
+ +----------+----------+----------+----------+----------+----------+avgiou=8.002065
7
+ epoch0
8
+ final_loss 0.67 (1.66)
9
+ cls_loss 0.24 (0.91)
10
+ reg_loss 0.17 (0.39)
11
+ vtm_loss 0.18 (0.24)
12
+ +----------+----------+----------+----------+----------+----------+
13
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
14
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
15
+ +----------+----------+----------+----------+----------+----------+
16
+ | 20.03 | 13.29 | 41.35 | 29.04 | 41.35 | 29.04 |
17
+ +----------+----------+----------+----------+----------+----------+avgiou=20.030976
18
+ epoch1
19
+ final_loss 0.71 (1.61)
20
+ cls_loss 0.29 (0.90)
21
+ reg_loss 0.15 (0.37)
22
+ vtm_loss 0.18 (0.23)
23
+ +----------+----------+----------+----------+----------+----------+
24
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
25
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
26
+ +----------+----------+----------+----------+----------+----------+
27
+ | 21.35 | 14.61 | 43.78 | 32.11 | 43.78 | 32.11 |
28
+ +----------+----------+----------+----------+----------+----------+avgiou=21.347445
29
+ epoch2
30
+ final_loss 0.63 (1.59)
31
+ cls_loss 0.22 (0.89)
32
+ reg_loss 0.15 (0.36)
33
+ vtm_loss 0.18 (0.23)
34
+ +----------+----------+----------+----------+----------+----------+
35
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
36
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
37
+ +----------+----------+----------+----------+----------+----------+
38
+ | 21.97 | 15.20 | 44.61 | 32.96 | 44.61 | 32.96 |
39
+ +----------+----------+----------+----------+----------+----------+avgiou=21.966959
40
+ epoch3
41
+ final_loss 0.54 (1.60)
42
+ cls_loss 0.16 (0.90)
43
+ reg_loss 0.11 (0.36)
44
+ vtm_loss 0.18 (0.23)
45
+ +----------+----------+----------+----------+----------+----------+
46
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
47
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
48
+ +----------+----------+----------+----------+----------+----------+
49
+ | 21.55 | 14.79 | 42.82 | 31.62 | 42.82 | 31.62 |
50
+ +----------+----------+----------+----------+----------+----------+avgiou=21.553949
51
+ epoch4
52
+ final_loss 0.68 (1.64)
53
+ cls_loss 0.26 (0.93)
54
+ reg_loss 0.16 (0.36)
55
+ vtm_loss 0.17 (0.23)
56
+ +----------+----------+----------+----------+----------+----------+
57
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
58
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
59
+ +----------+----------+----------+----------+----------+----------+
60
+ | 20.55 | 14.09 | 41.40 | 30.23 | 41.40 | 30.23 |
61
+ +----------+----------+----------+----------+----------+----------+avgiou=20.547238
62
+ epoch5
63
+ final_loss 0.61 (1.69)
64
+ cls_loss 0.20 (0.98)
65
+ reg_loss 0.13 (0.36)
66
+ vtm_loss 0.19 (0.23)
67
+ +----------+----------+----------+----------+----------+----------+
68
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
69
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
70
+ +----------+----------+----------+----------+----------+----------+
71
+ | 19.90 | 13.42 | 39.03 | 28.32 | 39.03 | 28.32 |
72
+ +----------+----------+----------+----------+----------+----------+avgiou=19.901910
73
+ epoch6
74
+ final_loss 0.72 (1.78)
75
+ cls_loss 0.23 (1.04)
76
+ reg_loss 0.13 (0.37)
77
+ vtm_loss 0.24 (0.25)
78
+ +----------+----------+----------+----------+----------+----------+
79
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
80
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
81
+ +----------+----------+----------+----------+----------+----------+
82
+ | 18.61 | 12.39 | 36.91 | 27.00 | 36.91 | 27.00 |
83
+ +----------+----------+----------+----------+----------+----------+avgiou=18.611255
84
+ epoch7
85
+ final_loss 0.64 (1.85)
86
+ cls_loss 0.20 (1.08)
87
+ reg_loss 0.10 (0.38)
88
+ vtm_loss 0.23 (0.26)
89
+ +----------+----------+----------+----------+----------+----------+
90
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
91
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
92
+ +----------+----------+----------+----------+----------+----------+
93
+ | 17.73 | 11.51 | 35.26 | 25.27 | 35.26 | 25.27 |
94
+ +----------+----------+----------+----------+----------+----------+avgiou=17.733609
95
+ epoch8
96
+ final_loss 0.74 (1.92)
97
+ cls_loss 0.25 (1.12)
98
+ reg_loss 0.13 (0.38)
99
+ vtm_loss 0.23 (0.28)
100
+ +----------+----------+----------+----------+----------+----------+
101
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
102
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
103
+ +----------+----------+----------+----------+----------+----------+
104
+ | 17.42 | 11.51 | 33.66 | 24.26 | 33.66 | 24.26 |
105
+ +----------+----------+----------+----------+----------+----------+avgiou=17.423851
106
+ epoch9
107
+ final_loss 0.70 (1.95)
108
+ cls_loss 0.25 (1.14)
109
+ reg_loss 0.11 (0.38)
110
+ vtm_loss 0.23 (0.29)
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/EVAL_GPU_0_all_losses_cls_loss/events.out.tfevents.1729874043.autodl-container-b3ec4da47b-bc5fbea1.663163.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f92dbc196d204a5bbf192b730190cda2c485c7914f7e4e2dcd701038c89a6d
3
+ size 5486
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/EVAL_GPU_0_all_losses_reg_loss/events.out.tfevents.1729874043.autodl-container-b3ec4da47b-bc5fbea1.663163.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ee9efa1cd173238395f72430ea8fd85b71d712a988b1191c069ddec5230161c
3
+ size 5486
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/EVAL_GPU_0_all_losses_vtm_loss/events.out.tfevents.1729874043.autodl-container-b3ec4da47b-bc5fbea1.663163.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d610e01810c519a7e41dc31e8ecf599103f3e00022a6fc343e597d52875fddad
3
+ size 5486
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/events.out.tfevents.1729871205.autodl-container-b3ec4da47b-bc5fbea1.663163.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4045b4b0d93a6cdee08d58dce600856cb25a359a55874c68b4e03b29e86e87d
3
+ size 20484
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/events.out.tfevents.1729871205.autodl-container-b3ec4da47b-bc5fbea1.663164.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44b680fd20310c4773deaddbb8c93562931ca0921fef7900c2a67c03ce628aa6
3
+ size 88
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/train_GPU_0_all_losses_cls_loss/events.out.tfevents.1729871427.autodl-container-b3ec4da47b-bc5fbea1.663163.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53de0e531f68e19336dfb0fe941113e31478e4918067f9435f9efb3b0069cee7
3
+ size 7407
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/train_GPU_0_all_losses_reg_loss/events.out.tfevents.1729871427.autodl-container-b3ec4da47b-bc5fbea1.663163.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b50081741ae2873022666decf314f8cb1a5c2ab4af4d7fdf103d21877b1b481d
3
+ size 7407
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/logs/train_GPU_0_all_losses_vtm_loss/events.out.tfevents.1729871427.autodl-container-b3ec4da47b-bc5fbea1.663163.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995c1012741d37f3f379da79a4ef7399703746711bb50f4160d4b5e41ed92a7f
3
+ size 7407
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/model_3_21.966959215281364.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941a926ec2879bd0cc2a245a2eaa5ac4c03cbbed2deebc26c86ae15ed59c340b
3
+ size 122083183
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/nlq_predictions_epoch_val_top10_3.json ADDED
The diff for this file is too large to render. See raw diff
 
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/nlq_predictions_epoch_val_top10_3_noscore.json ADDED
The diff for this file is too large to render. See raw diff
 
ego4d_nlq_v1/egovlp/finetuned/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4_objectmambafinetune173/run.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ bash tools/train_ego4d_finetune_head_twogpu.sh configs/ego4d_nlq_v1_multitask_egovlp_256_finetune_2e-4.yaml /root/autodl-tmp/model/GroundNLQ/ckpt/save/model_5_pretrain_egovlp.pth.tar objectmambafinetune173 0,1
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/config.txt ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'dataset': {'classname_feat_concat': 'only',
2
+ 'classname_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/classname-clip-base/a_photo_of.pt',
3
+ 'default_fps': 30,
4
+ 'downsample_rate': 1,
5
+ 'enable_temporal_jittering': False,
6
+ 'feat_stride': 16.0,
7
+ 'fix_video_frames': 0,
8
+ 'input_txt_dim': 512,
9
+ 'input_vid_dim': 2304,
10
+ 'json_file': './ego4d_data/ego4d_nlq_v2_ori_data/nlq_val.json',
11
+ 'lavila_caption_dir': '/root/autodl-tmp/data/ego4d/nlq/lavila/narration',
12
+ 'max_seq_len': 2560,
13
+ 'num_classes': 1,
14
+ 'num_frames': 16.0,
15
+ 'object_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/co-detr/class-score0.6-minnum10-lmdb',
16
+ 'object_feat_type': 'class-score',
17
+ 'text_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/nlq_v2_clip_token_features',
18
+ 'train_jsonl_file': './ego4d_data/ego4d_nlq_train_v2_label_lemma.jsonl',
19
+ 'val_jsonl_file': './ego4d_data/ego4d_nlq_val_v2_label_lemma.jsonl',
20
+ 'val_text_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/nlq_v2_clip_token_features',
21
+ 'video_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/em_egovlp+internvideo_visual_features_1.87fps'},
22
+ 'dataset_name': 'ego4d_multitask',
23
+ 'devices': 'cuda:0',
24
+ 'init_rand_seed': 12345678,
25
+ 'loader': {'batch_size': 4, 'num_workers': 4},
26
+ 'model': {'backbone_arch': [2, 4, 4, 0, 6],
27
+ 'backbone_type': 'ObjectMambaTransformer',
28
+ 'embd_dim': 512,
29
+ 'embd_kernel_size': 3,
30
+ 'embd_with_ln': True,
31
+ 'fpn_dim': 512,
32
+ 'fpn_start_level': 0,
33
+ 'fpn_type': 'identity',
34
+ 'fpn_with_ln': True,
35
+ 'generator': {'generator_type': 'point'},
36
+ 'head_dim': 512,
37
+ 'head_kernel_size': 3,
38
+ 'head_num_layers': 3,
39
+ 'head_with_ln': True,
40
+ 'input_txt_dim': 512,
41
+ 'input_vid_dim': 2304,
42
+ 'max_buffer_len_factor': 4.0,
43
+ 'max_query': 230,
44
+ 'max_seq_len': 2560,
45
+ 'max_shot_num': 1800,
46
+ 'multiscale_encoder_cfg': [{'layer_cfg': {'mha_win_size': 9,
47
+ 'n_ds_strides': [2, 2],
48
+ 'n_embd': 512,
49
+ 'n_head': 4,
50
+ 'path_pdrop': 0.1},
51
+ 'layer_num': 6,
52
+ 'layer_type': 'TransformerBlock'},
53
+ {'layer_cfg': {'in_channels': [512,
54
+ 512,
55
+ 512,
56
+ 512,
57
+ 512,
58
+ 512,
59
+ 512],
60
+ 'out_channel': 512},
61
+ 'layer_num': 1,
62
+ 'layer_type': 'FPNLayernorm'}],
63
+ 'n_head': 4,
64
+ 'n_mha_win_size': 9,
65
+ 'nlq_heads_cfg': {'center_sample_radius': 1.5,
66
+ 'cls_head_cfg': {'empty_cls': [],
67
+ 'feat_dim': 512,
68
+ 'input_dim': 512,
69
+ 'kernel_size': 3,
70
+ 'num_classes': 1,
71
+ 'num_layers': 3,
72
+ 'prior_prob': 0.01,
73
+ 'with_ln': True},
74
+ 'duration_thresh': 0.001,
75
+ 'iou_threshold': 0.1,
76
+ 'loss_normalizer': 200,
77
+ 'loss_normalizer_momentum': 0.9,
78
+ 'max_seg_num': 5,
79
+ 'min_score': 0.001,
80
+ 'pre_nms_thresh': 0.001,
81
+ 'pre_nms_topk': 2000,
82
+ 'reg_head_cfg': {'feat_dim': 512,
83
+ 'fpn_levels': 7,
84
+ 'input_dim': 512,
85
+ 'kernel_size': 3,
86
+ 'num_layers': 3,
87
+ 'with_ln': True},
88
+ 'reg_loss_weight': 1.0,
89
+ 'train_label_smoothing': 0.1},
90
+ 'num_classes': 1,
91
+ 'obj_encoder_cfg': [{'layer_cfg': {'act': 'relu',
92
+ 'n_in': 512,
93
+ 'num_layer': 2},
94
+ 'layer_num': 1,
95
+ 'layer_type': 'MaskedConv1DLayer'},
96
+ {'layer_cfg': {'n_embd': 512, 'path_pdrop': 0.1},
97
+ 'layer_num': 4,
98
+ 'layer_type': 'ObjectEncoderBlock'}],
99
+ 'object_dim': 512,
100
+ 'object_use_cross_model': True,
101
+ 'object_win_size': 1,
102
+ 'regression_range': [[0, 4],
103
+ [2, 8],
104
+ [4, 16],
105
+ [8, 32],
106
+ [16, 64],
107
+ [32, 128],
108
+ [64, 10000]],
109
+ 'scale_factor': 2,
110
+ 'tasks': ['NLQ', 'VTM'],
111
+ 'test_cfg': {'duration_thresh': 0.001,
112
+ 'ext_score_file': None,
113
+ 'iou_threshold': 0.1,
114
+ 'max_seg_num': 5,
115
+ 'min_score': 0.001,
116
+ 'multiclass_nms': True,
117
+ 'nms_method': 'soft',
118
+ 'nms_sigma': 0.75,
119
+ 'pre_nms_thresh': 0.001,
120
+ 'pre_nms_topk': 2000,
121
+ 'test_num': 1,
122
+ 'test_start_epoch': 2,
123
+ 'voting_thresh': 0.9},
124
+ 'text_encoder_cfg': [{'layer_cfg': {'act': 'relu',
125
+ 'n_in': 512,
126
+ 'num_layer': 2},
127
+ 'layer_num': 1,
128
+ 'layer_type': 'MaskedConv1DLayer'},
129
+ {'layer_cfg': {'n_embd': 512,
130
+ 'n_head': 4,
131
+ 'path_pdrop': 0.1},
132
+ 'layer_num': 4,
133
+ 'layer_type': 'TransformerBlock'}],
134
+ 'train_cfg': {'box_loss_weight': 5.0,
135
+ 'center_sample': 'radius',
136
+ 'center_sample_radius': 1.5,
137
+ 'clip_grad_l2norm': 1.0,
138
+ 'cls_prior_prob': 0.01,
139
+ 'dropout': 0.0,
140
+ 'droppath': 0.1,
141
+ 'head_empty_cls': [],
142
+ 'init_loss_norm': 200,
143
+ 'iou_loss_weight': 1.0,
144
+ 'label_smoothing': 0.1,
145
+ 'loss_weight': 1.0,
146
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
147
+ 'num_decoder_layer': 6},
148
+ 'use_abs_pe': True,
149
+ 'use_lmha_in_fpn': True,
150
+ 'use_rel_pe': False,
151
+ 'video_encoder_cfg': [{'layer_cfg': {'act': 'relu',
152
+ 'kernel_size': 3,
153
+ 'n_hidden': 512,
154
+ 'n_in': 2304,
155
+ 'n_out': 512,
156
+ 'num_layer': 2},
157
+ 'layer_num': 1,
158
+ 'layer_type': 'MaskedConv1DLayer'},
159
+ {'layer_cfg': {'mamba_arch': ['bimamba1',
160
+ 'mlp',
161
+ 'obj'],
162
+ 'n_embd': 512,
163
+ 'n_head': 4,
164
+ 'path_pdrop': 0.1},
165
+ 'layer_num': 4,
166
+ 'layer_type': 'ObjectMambaBlock'}],
167
+ 'vtm_heads_cfg': {'loss_weight': 1.5,
168
+ 'multiscale': False,
169
+ 'shot_aggregator_cfg': {'layer_cfg': {'cross_mixer_cfg': {'block_cfg': {'n_embd': 512,
170
+ 'n_head': 4},
171
+ 'block_type': 'MaskedMHA'},
172
+ 'num_layer': 1,
173
+ 'path_pdrop': 0.1,
174
+ 'query_num': 5,
175
+ 'self_mixer_cfg': {'block_cfg': {'n_embd': 512,
176
+ 'n_head': 4},
177
+ 'block_type': 'MaskedMHCA'}},
178
+ 'layer_type': 'QFormerLayer'},
179
+ 'similarity_head_cfg': {'layer_cfg': {'con_dim': 512,
180
+ 'x_dim': 512,
181
+ 'y_dim': 512},
182
+ 'layer_type': 'Cosine'},
183
+ 'soft_label': False}},
184
+ 'model_name': 'MultiTaskArch',
185
+ 'opt': {'backbone_lr_weight': 1,
186
+ 'epochs': 6,
187
+ 'learning_rate': 0.0004,
188
+ 'momentum': 0.9,
189
+ 'schedule_gamma': 0.1,
190
+ 'schedule_steps': [],
191
+ 'schedule_type': 'cosine',
192
+ 'type': 'AdamW',
193
+ 'warmup': True,
194
+ 'warmup_epochs': 4,
195
+ 'weight_decay': 0.05},
196
+ 'output_folder': '/root/autodl-tmp/model/GroundNLQ/ckpt/',
197
+ 'test_cfg': {'duration_thresh': 0.001,
198
+ 'ext_score_file': None,
199
+ 'iou_threshold': 0.1,
200
+ 'max_seg_num': 5,
201
+ 'min_score': 0.001,
202
+ 'multiclass_nms': True,
203
+ 'nms_method': 'soft',
204
+ 'nms_sigma': 0.75,
205
+ 'pre_nms_thresh': 0.001,
206
+ 'pre_nms_topk': 2000,
207
+ 'test_num': 1,
208
+ 'test_start_epoch': 2,
209
+ 'voting_thresh': 0.9},
210
+ 'track': 'nlq',
211
+ 'train_cfg': {'box_loss_weight': 5.0,
212
+ 'center_sample': 'radius',
213
+ 'center_sample_radius': 1.5,
214
+ 'clip_grad_l2norm': 1.0,
215
+ 'cls_prior_prob': 0.01,
216
+ 'dropout': 0.0,
217
+ 'droppath': 0.1,
218
+ 'head_empty_cls': [],
219
+ 'init_loss_norm': 200,
220
+ 'iou_loss_weight': 1.0,
221
+ 'label_smoothing': 0.1,
222
+ 'loss_weight': 1.0,
223
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
224
+ 'num_decoder_layer': 6},
225
+ 'train_split': ['training'],
226
+ 'val_split': ['validation']}
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/eval_results.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ +----------+----------+----------+----------+----------+----------+
2
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
3
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
4
+ +----------+----------+----------+----------+----------+----------+
5
+ | 22.28 | 13.51 | 47.47 | 30.82 | 47.47 | 30.82 |
6
+ +----------+----------+----------+----------+----------+----------+avgiou=17.893234
7
+ epoch0
8
+ final_loss 1.49 (1.50)
9
+ cls_loss 0.80 (0.82)
10
+ reg_loss 0.41 (0.37)
11
+ vtm_loss 0.18 (0.21)
12
+ +----------+----------+----------+----------+----------+----------+
13
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
14
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
15
+ +----------+----------+----------+----------+----------+----------+
16
+ | 30.80 | 21.22 | 57.93 | 43.91 | 57.93 | 43.91 |
17
+ +----------+----------+----------+----------+----------+----------+avgiou=26.010545
18
+ epoch1
19
+ final_loss 1.32 (1.47)
20
+ cls_loss 0.76 (0.81)
21
+ reg_loss 0.29 (0.34)
22
+ vtm_loss 0.18 (0.21)
23
+ +----------+----------+----------+----------+----------+----------+
24
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
25
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
26
+ +----------+----------+----------+----------+----------+----------+
27
+ | 31.63 | 22.03 | 57.91 | 45.19 | 57.91 | 45.19 |
28
+ +----------+----------+----------+----------+----------+----------+avgiou=26.834359
29
+ epoch2
30
+ final_loss 1.39 (1.48)
31
+ cls_loss 0.82 (0.84)
32
+ reg_loss 0.31 (0.34)
33
+ vtm_loss 0.17 (0.20)
34
+ +----------+----------+----------+----------+----------+----------+
35
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
36
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
37
+ +----------+----------+----------+----------+----------+----------+
38
+ | 30.82 | 20.98 | 56.81 | 43.83 | 56.81 | 43.83 |
39
+ +----------+----------+----------+----------+----------+----------+avgiou=25.900703
40
+ epoch3
41
+ final_loss 1.45 (1.50)
42
+ cls_loss 0.80 (0.86)
43
+ reg_loss 0.40 (0.34)
44
+ vtm_loss 0.17 (0.20)
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/EVAL_GPU_0_all_losses_cls_loss/events.out.tfevents.1728637028.autodl-container-b3ec4da47b-bc5fbea1.33573.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204898598341fc7043ef1565613b7d8f4f72c38b4f00cc21786f8cbaff7fd1cb
3
+ size 2726
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/EVAL_GPU_0_all_losses_reg_loss/events.out.tfevents.1728637028.autodl-container-b3ec4da47b-bc5fbea1.33573.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:902bc45d32111db78cc6e77989359e1a7a6a84cd731e3cbd7ebd78db976394a7
3
+ size 2726
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/EVAL_GPU_0_all_losses_vtm_loss/events.out.tfevents.1728637028.autodl-container-b3ec4da47b-bc5fbea1.33573.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2fb79a698b30e5224fea25ec682384c511beaf6adedc29c26457aa1e286dd94
3
+ size 2726
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632235.autodl-container-b3ec4da47b-bc5fbea1.31554.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a939c2d48a6aea6f129586ee4d0fa6ba50f075927233c253ca3544769e15d79
3
+ size 88
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632235.autodl-container-b3ec4da47b-bc5fbea1.31555.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5435840bf4ee16117de08cd1862d324adee13f0f8474bf803994ae794ad6b182
3
+ size 88
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632793.autodl-container-b3ec4da47b-bc5fbea1.33573.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2835cf0277a91e459c72e8eed594345f5b97e35298e453e192332afa8cd47de3
3
+ size 12974
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/events.out.tfevents.1728632793.autodl-container-b3ec4da47b-bc5fbea1.33574.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46d617f8b7b1e0cf8f671677c63e908bd78be53699e32afa09e17e4536bfbcfc
3
+ size 88
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/train_GPU_0_all_losses_cls_loss/events.out.tfevents.1728633033.autodl-container-b3ec4da47b-bc5fbea1.33573.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418ff6da494fef248a78f29ec7bd762321dd4fcdd73bca21d8a00876b14fcb8a
3
+ size 5089
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/train_GPU_0_all_losses_reg_loss/events.out.tfevents.1728633033.autodl-container-b3ec4da47b-bc5fbea1.33573.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a8ce87fb2738bb17843b94845afcfb532659e2426c7ff0cb7871f6dcc89bcf
3
+ size 5089
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/logs/train_GPU_0_all_losses_vtm_loss/events.out.tfevents.1728633033.autodl-container-b3ec4da47b-bc5fbea1.33573.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44a0fc1c4df109b6a12877a31941824addd99bfdf6ec269d0ed190ea7a024a6b
3
+ size 5089
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/model_2_26.834358523725836.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382435ff1efe5a1167474e66d3bb31c1f33d80414bdd9c532ddeb08d39ccd139
3
+ size 486572975
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/nlq_predictions_epoch_val_top10_2.json ADDED
The diff for this file is too large to render. See raw diff
 
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/nlq_predictions_epoch_val_top10_2_noscore.json ADDED
The diff for this file is too large to render. See raw diff
 
ego4d_nlq_v2/internvideo/finetune/ego4d_nlq_v2_multitask_finetune_2e-4_objectmambafinetune144/run.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ bash tools/train_ego4d_finetune_head_twogpu.sh configs/ego4d_nlq_v2_multitask_finetune_2e-4.yaml /root/autodl-tmp/model/GroundNLQ/ckpt/save/model_7_pretrain.pth.tar objectmambafinetune144 0,1
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/config.txt ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'dataset': {'classname_feat_concat': 'only',
2
+ 'classname_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/classname-clip-base/a_photo_of.pt',
3
+ 'default_fps': 30,
4
+ 'downsample_rate': 1,
5
+ 'enable_temporal_jittering': False,
6
+ 'feat_stride': 16.0,
7
+ 'fix_video_frames': 0,
8
+ 'input_txt_dim': 512,
9
+ 'input_vid_dim': 2304,
10
+ 'json_file': 'ego4d_data/goalstep_data/ego4d_goal_step_val_v2_lemma.jsonl',
11
+ 'lavila_caption_dir': '/root/autodl-tmp/data/ego4d/goalstep/lavila-64/',
12
+ 'max_seq_len': 9216,
13
+ 'num_classes': 1,
14
+ 'num_frames': 16.0,
15
+ 'object_feat_dir': '/root/autodl-tmp/data/ego4d/goalstep/co-detr/clip-class-lmdb',
16
+ 'object_feat_type': 'class-score',
17
+ 'text_feat_dir': '/root/autodl-tmp/data/ego4d/goalstep/clip_query_lmdb',
18
+ 'train_jsonl_file': 'ego4d_data/goalstep_data/clip/ego4d_goal_step_train_v2.jsonl',
19
+ 'val_jsonl_file': 'ego4d_data/goalstep_data/ego4d_goal_step_val_v2_lemma.jsonl',
20
+ 'val_text_feat_dir': '/root/autodl-tmp/data/ego4d/goalstep/clip_query_lmdb',
21
+ 'video_feat_dir': '/root/autodl-tmp/data/ego4d/goalstep/video_feature/internvideo_clip_lmdb'},
22
+ 'dataset_name': 'ego4d_multitask',
23
+ 'devices': 'cuda:0',
24
+ 'init_rand_seed': 12345678,
25
+ 'loader': {'batch_size': 1, 'num_workers': 1},
26
+ 'model': {'backbone_arch': (2, 2, 2, 0, 6),
27
+ 'backbone_type': 'convTransformer',
28
+ 'embd_dim': 512,
29
+ 'embd_kernel_size': 3,
30
+ 'embd_with_ln': True,
31
+ 'fpn_dim': 512,
32
+ 'fpn_start_level': 0,
33
+ 'fpn_type': 'identity',
34
+ 'fpn_with_ln': True,
35
+ 'generator': {'generator_type': 'point'},
36
+ 'head_dim': 512,
37
+ 'head_kernel_size': 3,
38
+ 'head_num_layers': 3,
39
+ 'head_with_ln': True,
40
+ 'input_txt_dim': 512,
41
+ 'input_vid_dim': 2304,
42
+ 'max_buffer_len_factor': 4.0,
43
+ 'max_query': 560,
44
+ 'max_seq_len': 9216,
45
+ 'max_shot_num': 3400,
46
+ 'multiscale_encoder_cfg': [{'layer_cfg': {'mha_win_size': 9,
47
+ 'n_ds_strides': [2, 2],
48
+ 'n_embd': 512,
49
+ 'n_head': 4,
50
+ 'path_pdrop': 0.1},
51
+ 'layer_num': 6,
52
+ 'layer_type': 'TransformerBlock'},
53
+ {'layer_cfg': {'in_channels': [512,
54
+ 512,
55
+ 512,
56
+ 512,
57
+ 512,
58
+ 512,
59
+ 512],
60
+ 'out_channel': 512},
61
+ 'layer_num': 1,
62
+ 'layer_type': 'FPNLayernorm'}],
63
+ 'n_head': 4,
64
+ 'n_mha_win_size': -1,
65
+ 'nlq_heads_cfg': {'center_sample_radius': 1.5,
66
+ 'cls_head_cfg': {'empty_cls': [],
67
+ 'feat_dim': 512,
68
+ 'input_dim': 512,
69
+ 'kernel_size': 3,
70
+ 'num_classes': 1,
71
+ 'num_layers': 3,
72
+ 'prior_prob': 0.01,
73
+ 'with_ln': True},
74
+ 'duration_thresh': 0.001,
75
+ 'iou_threshold': 0.1,
76
+ 'loss_normalizer': 200,
77
+ 'loss_normalizer_momentum': 0.9,
78
+ 'max_seg_num': 5,
79
+ 'min_score': 0.001,
80
+ 'pre_nms_thresh': 0.001,
81
+ 'pre_nms_topk': 2000,
82
+ 'reg_head_cfg': {'feat_dim': 512,
83
+ 'fpn_levels': 7,
84
+ 'input_dim': 512,
85
+ 'kernel_size': 3,
86
+ 'num_layers': 3,
87
+ 'with_ln': True},
88
+ 'reg_loss_weight': 1.0,
89
+ 'train_label_smoothing': 0.1},
90
+ 'num_classes': 1,
91
+ 'obj_encoder_cfg': [{'layer_cfg': {'act': 'relu',
92
+ 'n_in': 512,
93
+ 'num_layer': 2},
94
+ 'layer_num': 1,
95
+ 'layer_type': 'MaskedConv1DLayer'},
96
+ {'layer_cfg': {'n_embd': 512, 'path_pdrop': 0.1},
97
+ 'layer_num': 4,
98
+ 'layer_type': 'ObjectEncoderBlock'}],
99
+ 'regression_range': [[0, 4],
100
+ [2, 8],
101
+ [4, 16],
102
+ [8, 32],
103
+ [16, 64],
104
+ [32, 128],
105
+ [64, 10000]],
106
+ 'scale_factor': 2,
107
+ 'tasks': ['NLQ', 'VTM'],
108
+ 'test_cfg': {'duration_thresh': 0.001,
109
+ 'ext_score_file': None,
110
+ 'iou_threshold': 0.1,
111
+ 'max_seg_num': 5,
112
+ 'min_score': 0.001,
113
+ 'multiclass_nms': True,
114
+ 'nms_method': 'soft',
115
+ 'nms_sigma': 0.75,
116
+ 'pre_nms_thresh': 0.001,
117
+ 'pre_nms_topk': 2000,
118
+ 'test_num': 1,
119
+ 'test_start_epoch': 2,
120
+ 'voting_thresh': 0.9},
121
+ 'text_encoder_cfg': [{'layer_cfg': {'act': 'relu',
122
+ 'n_in': 512,
123
+ 'num_layer': 2},
124
+ 'layer_num': 1,
125
+ 'layer_type': 'MaskedConv1DLayer'},
126
+ {'layer_cfg': {'n_embd': 512,
127
+ 'n_head': 4,
128
+ 'path_pdrop': 0.1},
129
+ 'layer_num': 4,
130
+ 'layer_type': 'TransformerBlock'}],
131
+ 'train_cfg': {'box_loss_weight': 5.0,
132
+ 'center_sample': 'radius',
133
+ 'center_sample_radius': 1.5,
134
+ 'clip_grad_l2norm': 1.0,
135
+ 'cls_prior_prob': 0.01,
136
+ 'dropout': 0.0,
137
+ 'droppath': 0.1,
138
+ 'head_empty_cls': [],
139
+ 'init_loss_norm': 200,
140
+ 'iou_loss_weight': 1.0,
141
+ 'label_smoothing': 0.1,
142
+ 'loss_weight': 1.0,
143
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
144
+ 'num_decoder_layer': 6},
145
+ 'use_abs_pe': True,
146
+ 'use_lmha_in_fpn': True,
147
+ 'use_rel_pe': False,
148
+ 'video_encoder_cfg': [{'layer_cfg': {'act': 'relu',
149
+ 'kernel_size': 3,
150
+ 'n_hidden': 512,
151
+ 'n_in': 2304,
152
+ 'n_out': 512,
153
+ 'num_layer': 2},
154
+ 'layer_num': 1,
155
+ 'layer_type': 'MaskedConv1DLayer'},
156
+ {'layer_cfg': {'mamba_arch': ['bimamba1',
157
+ 'mlp',
158
+ 'obj'],
159
+ 'n_embd': 512,
160
+ 'n_head': 4,
161
+ 'path_pdrop': 0.1},
162
+ 'layer_num': 4,
163
+ 'layer_type': 'ObjectMambaBlock'}],
164
+ 'vtm_heads_cfg': {'loss_weight': 1.5,
165
+ 'multiscale': False,
166
+ 'shot_aggregator_cfg': {'layer_cfg': {'cross_mixer_cfg': {'block_cfg': {'n_embd': 512,
167
+ 'n_head': 4},
168
+ 'block_type': 'MaskedMHA'},
169
+ 'num_layer': 1,
170
+ 'path_pdrop': 0.1,
171
+ 'query_num': 5,
172
+ 'self_mixer_cfg': {'block_cfg': {'n_embd': 512,
173
+ 'n_head': 4},
174
+ 'block_type': 'MaskedMHCA'}},
175
+ 'layer_type': 'QFormerLayer'},
176
+ 'similarity_head_cfg': {'layer_cfg': {'con_dim': 512,
177
+ 'x_dim': 512,
178
+ 'y_dim': 512},
179
+ 'layer_type': 'Cosine'},
180
+ 'soft_label': False}},
181
+ 'model_name': 'MultiTaskArch',
182
+ 'opt': {'backbone_lr_weight': 1,
183
+ 'epochs': 6,
184
+ 'learning_rate': 0.0008,
185
+ 'momentum': 0.9,
186
+ 'schedule_gamma': 0.1,
187
+ 'schedule_steps': [],
188
+ 'schedule_type': 'cosine',
189
+ 'type': 'AdamW',
190
+ 'warmup': True,
191
+ 'warmup_epochs': 4,
192
+ 'weight_decay': 0.05},
193
+ 'output_folder': '/root/autodl-tmp/model/GroundNLQ/goalstep/',
194
+ 'test_cfg': {'duration_thresh': 0.001,
195
+ 'ext_score_file': None,
196
+ 'iou_threshold': 0.1,
197
+ 'max_seg_num': 5,
198
+ 'min_score': 0.001,
199
+ 'multiclass_nms': True,
200
+ 'nms_method': 'soft',
201
+ 'nms_sigma': 0.75,
202
+ 'pre_nms_thresh': 0.001,
203
+ 'pre_nms_topk': 2000,
204
+ 'test_num': 1,
205
+ 'test_start_epoch': 2,
206
+ 'voting_thresh': 0.9},
207
+ 'track': 'goal_step',
208
+ 'train_cfg': {'box_loss_weight': 5.0,
209
+ 'center_sample': 'radius',
210
+ 'center_sample_radius': 1.5,
211
+ 'clip_grad_l2norm': 1.0,
212
+ 'cls_prior_prob': 0.01,
213
+ 'dropout': 0.0,
214
+ 'droppath': 0.1,
215
+ 'head_empty_cls': [],
216
+ 'init_loss_norm': 200,
217
+ 'iou_loss_weight': 1.0,
218
+ 'label_smoothing': 0.1,
219
+ 'loss_weight': 1.0,
220
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
221
+ 'num_decoder_layer': 6},
222
+ 'train_split': ['training'],
223
+ 'val_split': ['validation']}
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/eval_results.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ +----------+----------+----------+----------+----------+----------+
2
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
3
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
4
+ +----------+----------+----------+----------+----------+----------+
5
+ | 26.13 | 21.61 | 56.73 | 49.30 | 56.73 | 49.30 |
6
+ +----------+----------+----------+----------+----------+----------+avgiou=26.130457
7
+ epoch0
8
+ +----------+----------+----------+----------+----------+----------+
9
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
10
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
11
+ +----------+----------+----------+----------+----------+----------+
12
+ | 27.99 | 23.83 | 58.72 | 51.57 | 58.72 | 51.57 |
13
+ +----------+----------+----------+----------+----------+----------+avgiou=27.988565
14
+ epoch1
15
+ +----------+----------+----------+----------+----------+----------+
16
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
17
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
18
+ +----------+----------+----------+----------+----------+----------+
19
+ | 28.42 | 23.79 | 59.50 | 52.39 | 59.50 | 52.39 |
20
+ +----------+----------+----------+----------+----------+----------+avgiou=28.417360
21
+ epoch2
22
+ +----------+----------+----------+----------+----------+----------+
23
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
24
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
25
+ +----------+----------+----------+----------+----------+----------+
26
+ | 28.17 | 24.06 | 58.74 | 51.74 | 58.74 | 51.74 |
27
+ +----------+----------+----------+----------+----------+----------+avgiou=28.170478
28
+ epoch3
29
+ +----------+----------+----------+----------+----------+----------+
30
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
31
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
32
+ +----------+----------+----------+----------+----------+----------+
33
+ | 28.74 | 24.27 | 59.16 | 52.27 | 59.16 | 52.27 |
34
+ +----------+----------+----------+----------+----------+----------+avgiou=28.742204
35
+ epoch4
36
+ +----------+----------+----------+----------+----------+----------+
37
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
38
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
39
+ +----------+----------+----------+----------+----------+----------+
40
+ | 28.60 | 24.44 | 59.65 | 53.13 | 59.65 | 53.13 |
41
+ +----------+----------+----------+----------+----------+----------+avgiou=28.599272
42
+ epoch5
43
+ +----------+----------+----------+----------+----------+----------+
44
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
45
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
46
+ +----------+----------+----------+----------+----------+----------+
47
+ | 29.61 | 24.94 | 59.51 | 52.48 | 59.51 | 52.48 |
48
+ +----------+----------+----------+----------+----------+----------+avgiou=29.612786
49
+ epoch6
50
+ +----------+----------+----------+----------+----------+----------+
51
+ | Rank@1 | Rank@1 | Rank@5 | Rank@5 | Rank@10 | Rank@10 |
52
+ | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 | mIoU@0.3 | mIoU@0.5 |
53
+ +----------+----------+----------+----------+----------+----------+
54
+ | 28.92 | 24.57 | 59.17 | 52.43 | 59.17 | 52.43 |
55
+ +----------+----------+----------+----------+----------+----------+avgiou=28.924116
56
+ epoch7
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726904924.autodl-container-b3ec4da47b-bc5fbea1.458057.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecca71d48639775ce1f6c217bfe928294fc08b01b5568b66109cb367ea1b9f63
3
+ size 88
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726904924.autodl-container-b3ec4da47b-bc5fbea1.458059.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00aaeab73622930454886f391bb37812060334fc4e955468ec0796ca6f992f96
3
+ size 88
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726904924.autodl-container-b3ec4da47b-bc5fbea1.458060.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ac5b1566c4058ef555d97b101936cee9aeb90cfcb2885b648f348e1b6fd8d9
3
+ size 88
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906438.autodl-container-b3ec4da47b-bc5fbea1.460310.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d096f2b7006f2deb0a4c10eaa54f0a69f89c4ff41eaa521de174b436e93169b
3
+ size 88
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906439.autodl-container-b3ec4da47b-bc5fbea1.460307.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4e28bd158929f7db87bfc69169443fa99dcb61a8e47c4fd235ea86cfebe403a
3
+ size 79899
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906439.autodl-container-b3ec4da47b-bc5fbea1.460308.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a45dc73a5396e958bb5b8b0ed845bf85c3d7625c807f712686c3b41b9af2cd
3
+ size 88
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/events.out.tfevents.1726906439.autodl-container-b3ec4da47b-bc5fbea1.460309.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b33462cec9f03325622b13d84818afb653ab1aea89bfe5f3449bc670b0ea288
3
+ size 88
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/train_GPU_0_all_losses_cls_loss/events.out.tfevents.1726906838.autodl-container-b3ec4da47b-bc5fbea1.460307.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:695ef39d29320f4820ae3af21b36d103b3b88a547c56db757caf66deae948f36
3
+ size 39047
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/train_GPU_0_all_losses_reg_loss/events.out.tfevents.1726906838.autodl-container-b3ec4da47b-bc5fbea1.460307.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74c8ee73938de8a9ebbf72aa4ed3401902ef7ad497b49ca30a5042dccf8a001b
3
+ size 39047
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/logs/train_GPU_0_all_losses_vtm_loss/events.out.tfevents.1726906838.autodl-container-b3ec4da47b-bc5fbea1.460307.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692dfc9b278fc5fa5ebcb849f351b4c6c14636f88c16f4c3b6de74ba5ac0d545
3
+ size 39047
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6.json ADDED
The diff for this file is too large to render. See raw diff
 
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6_Bayesian.json ADDED
The diff for this file is too large to render. See raw diff
 
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6_noscore.json ADDED
The diff for this file is too large to render. See raw diff
 
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/nlq_predictions_epoch_val_top10_6_noscore_Bayesian.json ADDED
The diff for this file is too large to render. See raw diff
 
goalstep/internvideo/finetuned/ego4d_goalstep_v2_baseline_2e-4_objectmambafinetune135/run.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ bash tools/train_ego4d_finetune_head_4gpu_noeval.sh configs/ego4d_goalstep_v2_baseline_2e-4.yaml /root/autodl-tmp/model/GroundNLQ/ckpt/save/model_7_pretrain.pth.tar objectmambafinetune135 0,1,2,3
pretrain_naq/egovlp/model_5_pretrain_egovlp.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0582a3fc28fd33b0c1a183014ca70eaa39faffad9c5955f97ce460f01324b04d
3
+ size 122080780
pretrain_naq/internvideo/model_7_pretrain.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ea828749455f900e0d7bad3556d6d4035d31a7fc247a1e3d6b697a86ec5ed1
3
+ size 425493745
tacos/c3d/scratch/tacos_c3d_glove_weight1_5e-5_objectmambafinetune150/config.txt ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'dataset': {'classname_feat_concat': 'only',
2
+ 'classname_feat_dir': '/root/autodl-tmp/data/ego4d/nlq/classname-clip-base/a_photo_of.pt',
3
+ 'default_fps': 29.4,
4
+ 'downsample_rate': 1,
5
+ 'enable_temporal_jittering': False,
6
+ 'feat_stride': 16.0,
7
+ 'fix_video_frames': 0,
8
+ 'input_txt_dim': 512,
9
+ 'input_vid_dim': 4096,
10
+ 'json_file': 'ego4d_data/tacos/test_lemma.jsonl',
11
+ 'lavila_caption_dir': '/root/autodl-tmp/data/tacos/lavila',
12
+ 'max_seq_len': 3072,
13
+ 'num_classes': 1,
14
+ 'num_frames': 16.0,
15
+ 'object_feat_dir': '/root/autodl-tmp/data/tacos/class-score0.6-minnum10-lmdb',
16
+ 'object_feat_type': 'class-score',
17
+ 'text_feat_dir': '/root/autodl-tmp/data/tacos/glove_clip_token_features',
18
+ 'train_jsonl_file': 'ego4d_data/tacos/train_lemma.jsonl',
19
+ 'val_jsonl_file': 'ego4d_data/tacos/test_lemma.jsonl',
20
+ 'val_text_feat_dir': '/root/autodl-tmp/data/tacos/glove_clip_token_features',
21
+ 'video_feat_dir': '/root/autodl-tmp/data/tacos/c3d_lmdb'},
22
+ 'dataset_name': 'ego4d_multitask',
23
+ 'devices': 'cuda:0',
24
+ 'init_rand_seed': 12345678,
25
+ 'loader': {'batch_size': 2, 'num_workers': 2},
26
+ 'model': {'backbone_arch': [2, 4, 4, 0, 6],
27
+ 'backbone_type': 'ObjectMambaTransformer',
28
+ 'embd_dim': 512,
29
+ 'embd_kernel_size': 3,
30
+ 'embd_with_ln': True,
31
+ 'fpn_dim': 512,
32
+ 'fpn_start_level': 0,
33
+ 'fpn_type': 'identity',
34
+ 'fpn_with_ln': True,
35
+ 'generator': {'generator_type': 'point'},
36
+ 'head_dim': 512,
37
+ 'head_kernel_size': 3,
38
+ 'head_num_layers': 3,
39
+ 'head_with_ln': True,
40
+ 'input_txt_dim': 512,
41
+ 'input_vid_dim': 4096,
42
+ 'max_buffer_len_factor': 4.0,
43
+ 'max_query': 500,
44
+ 'max_seq_len': 3072,
45
+ 'max_shot_num': 100,
46
+ 'multiscale_encoder_cfg': [{'layer_cfg': {'mha_win_size': 9,
47
+ 'n_ds_strides': [2, 2],
48
+ 'n_embd': 512,
49
+ 'n_head': 4,
50
+ 'path_pdrop': 0.1},
51
+ 'layer_num': 6,
52
+ 'layer_type': 'TransformerBlock'},
53
+ {'layer_cfg': {'in_channels': [512,
54
+ 512,
55
+ 512,
56
+ 512,
57
+ 512,
58
+ 512,
59
+ 512],
60
+ 'out_channel': 512},
61
+ 'layer_num': 1,
62
+ 'layer_type': 'FPNLayernorm'}],
63
+ 'n_head': 4,
64
+ 'n_mha_win_size': 9,
65
+ 'nlq_heads_cfg': {'center_sample_radius': 1.5,
66
+ 'cls_head_cfg': {'empty_cls': [],
67
+ 'feat_dim': 512,
68
+ 'input_dim': 512,
69
+ 'kernel_size': 3,
70
+ 'num_classes': 1,
71
+ 'num_layers': 3,
72
+ 'prior_prob': 0.01,
73
+ 'with_ln': True},
74
+ 'duration_thresh': 0.001,
75
+ 'iou_threshold': 0.1,
76
+ 'loss_normalizer': 200,
77
+ 'loss_normalizer_momentum': 0.9,
78
+ 'max_seg_num': 5,
79
+ 'min_score': 0.001,
80
+ 'pre_nms_thresh': 0.001,
81
+ 'pre_nms_topk': 2000,
82
+ 'reg_head_cfg': {'feat_dim': 512,
83
+ 'fpn_levels': 7,
84
+ 'input_dim': 512,
85
+ 'kernel_size': 3,
86
+ 'num_layers': 3,
87
+ 'with_ln': True},
88
+ 'reg_loss_weight': 1.0,
89
+ 'train_label_smoothing': 0.1},
90
+ 'num_classes': 1,
91
+ 'obj_encoder_cfg': [{'layer_cfg': {'act': 'relu',
92
+ 'n_in': 512,
93
+ 'num_layer': 2},
94
+ 'layer_num': 1,
95
+ 'layer_type': 'MaskedConv1DLayer'},
96
+ {'layer_cfg': {'n_embd': 512, 'path_pdrop': 0.1},
97
+ 'layer_num': 4,
98
+ 'layer_type': 'ObjectEncoderBlock'}],
99
+ 'object_dim': 512,
100
+ 'object_use_cross_model': True,
101
+ 'object_win_size': 1,
102
+ 'regression_range': [[0, 4],
103
+ [2, 8],
104
+ [4, 16],
105
+ [8, 32],
106
+ [16, 64],
107
+ [32, 128],
108
+ [64, 10000]],
109
+ 'scale_factor': 2,
110
+ 'tasks': ['NLQ', 'VTM'],
111
+ 'test_cfg': {'duration_thresh': 0.001,
112
+ 'ext_score_file': None,
113
+ 'iou_threshold': 0.1,
114
+ 'max_seg_num': 5,
115
+ 'min_score': 0.001,
116
+ 'multiclass_nms': True,
117
+ 'nms_method': 'soft',
118
+ 'nms_sigma': 0.75,
119
+ 'pre_nms_thresh': 0.001,
120
+ 'pre_nms_topk': 2000,
121
+ 'test_num': 1,
122
+ 'test_start_epoch': 2,
123
+ 'voting_thresh': 0.9},
124
+ 'text_encoder_cfg': [{'layer_cfg': {'act': 'relu',
125
+ 'n_in': 300,
126
+ 'n_out': 512,
127
+ 'num_layer': 2},
128
+ 'layer_num': 1,
129
+ 'layer_type': 'MaskedConv1DLayer'},
130
+ {'layer_cfg': {'n_embd': 512,
131
+ 'n_head': 4,
132
+ 'path_pdrop': 0.1},
133
+ 'layer_num': 4,
134
+ 'layer_type': 'TransformerBlock',
135
+ 'use_abs_pe': True}],
136
+ 'train_cfg': {'box_loss_weight': 5.0,
137
+ 'center_sample': 'radius',
138
+ 'center_sample_radius': 1.5,
139
+ 'clip_grad_l2norm': 1.0,
140
+ 'cls_prior_prob': 0.01,
141
+ 'dropout': 0.0,
142
+ 'droppath': 0.1,
143
+ 'head_empty_cls': [],
144
+ 'init_loss_norm': 200,
145
+ 'iou_loss_weight': 1.0,
146
+ 'label_smoothing': 0.1,
147
+ 'loss_weight': 1.0,
148
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
149
+ 'num_decoder_layer': 6},
150
+ 'use_abs_pe': True,
151
+ 'use_lmha_in_fpn': True,
152
+ 'use_rel_pe': False,
153
+ 'video_encoder_cfg': [{'layer_cfg': {'act': 'relu',
154
+ 'kernel_size': 3,
155
+ 'n_hidden': 512,
156
+ 'n_in': 4096,
157
+ 'n_out': 512,
158
+ 'num_layer': 2},
159
+ 'layer_num': 1,
160
+ 'layer_type': 'MaskedConv1DLayer'},
161
+ {'layer_cfg': {'mamba_arch': ['bimamba1',
162
+ 'mlp',
163
+ 'obj'],
164
+ 'n_embd': 512,
165
+ 'n_head': 4,
166
+ 'path_pdrop': 0.1},
167
+ 'layer_num': 4,
168
+ 'layer_type': 'ObjectMambaBlock'}],
169
+ 'vtm_heads_cfg': {'loss_weight': 1.0,
170
+ 'multiscale': False,
171
+ 'shot_aggregator_cfg': {'layer_cfg': {'cross_mixer_cfg': {'block_cfg': {'n_embd': 512,
172
+ 'n_head': 4},
173
+ 'block_type': 'MaskedMHA'},
174
+ 'num_layer': 1,
175
+ 'path_pdrop': 0.1,
176
+ 'query_num': 5,
177
+ 'self_mixer_cfg': {'block_cfg': {'n_embd': 512,
178
+ 'n_head': 4},
179
+ 'block_type': 'MaskedMHCA'}},
180
+ 'layer_type': 'QFormerLayer'},
181
+ 'similarity_head_cfg': {'layer_cfg': {'con_dim': 512,
182
+ 'x_dim': 512,
183
+ 'y_dim': 512},
184
+ 'layer_type': 'Cosine'},
185
+ 'soft_label': False}},
186
+ 'model_name': 'MultiTaskArch',
187
+ 'opt': {'backbone_lr_weight': 1,
188
+ 'epochs': 6,
189
+ 'learning_rate': 0.0002,
190
+ 'momentum': 0.9,
191
+ 'schedule_gamma': 0.1,
192
+ 'schedule_steps': [],
193
+ 'schedule_type': 'cosine',
194
+ 'type': 'AdamW',
195
+ 'warmup': True,
196
+ 'warmup_epochs': 4,
197
+ 'weight_decay': 0.05},
198
+ 'output_folder': '/root/autodl-tmp/model/GroundNLQ/tacos/',
199
+ 'test_cfg': {'duration_thresh': 0.001,
200
+ 'ext_score_file': None,
201
+ 'iou_threshold': 0.1,
202
+ 'max_seg_num': 5,
203
+ 'min_score': 0.001,
204
+ 'multiclass_nms': True,
205
+ 'nms_method': 'soft',
206
+ 'nms_sigma': 0.75,
207
+ 'pre_nms_thresh': 0.001,
208
+ 'pre_nms_topk': 2000,
209
+ 'test_num': 1,
210
+ 'test_start_epoch': 2,
211
+ 'voting_thresh': 0.9},
212
+ 'track': 'goal_step',
213
+ 'train_cfg': {'box_loss_weight': 5.0,
214
+ 'center_sample': 'radius',
215
+ 'center_sample_radius': 1.5,
216
+ 'clip_grad_l2norm': 1.0,
217
+ 'cls_prior_prob': 0.01,
218
+ 'dropout': 0.0,
219
+ 'droppath': 0.1,
220
+ 'head_empty_cls': [],
221
+ 'init_loss_norm': 200,
222
+ 'iou_loss_weight': 1.0,
223
+ 'label_smoothing': 0.1,
224
+ 'loss_weight': 1.0,
225
+ 'mamba_arch': ['bimamba1', 'mlp', 'obj'],
226
+ 'num_decoder_layer': 6},
227
+ 'train_split': ['training'],
228
+ 'val_split': ['validation']}