Add files using upload-large-folder tool
Browse files- experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/merges.txt +0 -0
- experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/vocab.json +0 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/merges.txt +0 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/vocab.json +0 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-300/vocab.json +0 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json +24 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja +7 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json +24 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja +7 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json +29 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json +31 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json +208 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json +734 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json +24 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja +7 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json +29 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json +31 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json +208 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/merges.txt +0 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json +29 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json +208 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log +0 -0
- experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/vocab.json +0 -0
- experiments/public/eval/eval_1gpu.sh +81 -0
- experiments/public/eval/eval_1gpu_aop.sh +79 -0
- experiments/public/eval/eval_1gpu_cut_layer.sh +76 -0
- experiments/public/eval/eval_1gpu_cut_layer_AOP_text.sh +103 -0
- experiments/public/eval/eval_1gpu_cut_layer_unified_new.sh +131 -0
- experiments/public/eval/eval_1gpu_early_exit_classifier.sh +70 -0
- experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_attn_pooling.sh +102 -0
- experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling.sh +212 -0
- experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling_new.sh +102 -0
- experiments/public/eval/eval_1gpu_early_exit_classifier_V5.sh +88 -0
- experiments/public/eval/eval_1gpu_early_exit_classifier_V5_new.sh +99 -0
- experiments/public/eval/eval_1gpu_multilayer_AOP_attn_pooling.sh +108 -0
- experiments/public/eval/eval_1gpu_multilayer_AOP_new.sh +106 -0
- experiments/public/eval/eval_1gpu_output_attn.sh +83 -0
- experiments/public/eval/eval_vlm2vecv1_8gpu.sh +71 -0
- experiments/public/eval/image_retrival.yaml +101 -0
- experiments/public/eval/mieb_any2any_retrieval_lite.yaml +55 -0
- experiments/public/eval/mieb_any2any_retrieval_lite2.yaml +55 -0
- experiments/public/eval/run_batch_benchmark.sh +112 -0
- experiments/public/eval/scan_threshold.sh +176 -0
- experiments/public/eval/visdoc_retrival.yaml +141 -0
- experiments/public/train/train_alltasks.yaml +395 -0
- experiments/public/train/train_image.yaml +161 -0
- experiments/public/train/train_image1.yaml +160 -0
- experiments/public/train/train_v2-gp.sh +103 -0
- experiments/public/train/train_v2-qwen2vl-2B_imageonly_add_CRD.sh +109 -0
- experiments/public/train/train_v2-qwen2vl-2B_imageonly_layer_prune.sh +99 -0
experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-300/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|box_end|>": 151649,
|
| 5 |
+
"<|box_start|>": 151648,
|
| 6 |
+
"<|endoftext|>": 151643,
|
| 7 |
+
"<|file_sep|>": 151664,
|
| 8 |
+
"<|fim_middle|>": 151660,
|
| 9 |
+
"<|fim_pad|>": 151662,
|
| 10 |
+
"<|fim_prefix|>": 151659,
|
| 11 |
+
"<|fim_suffix|>": 151661,
|
| 12 |
+
"<|im_end|>": 151645,
|
| 13 |
+
"<|im_start|>": 151644,
|
| 14 |
+
"<|image_pad|>": 151655,
|
| 15 |
+
"<|object_ref_end|>": 151647,
|
| 16 |
+
"<|object_ref_start|>": 151646,
|
| 17 |
+
"<|quad_end|>": 151651,
|
| 18 |
+
"<|quad_start|>": 151650,
|
| 19 |
+
"<|repo_name|>": 151663,
|
| 20 |
+
"<|video_pad|>": 151656,
|
| 21 |
+
"<|vision_end|>": 151653,
|
| 22 |
+
"<|vision_pad|>": 151654,
|
| 23 |
+
"<|vision_start|>": 151652
|
| 24 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
|
| 2 |
+
You are a helpful assistant.<|im_end|>
|
| 3 |
+
{% endif %}<|im_start|>{{ message['role'] }}
|
| 4 |
+
{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
|
| 5 |
+
{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
|
| 6 |
+
{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
|
| 7 |
+
{% endif %}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|box_end|>": 151649,
|
| 5 |
+
"<|box_start|>": 151648,
|
| 6 |
+
"<|endoftext|>": 151643,
|
| 7 |
+
"<|file_sep|>": 151664,
|
| 8 |
+
"<|fim_middle|>": 151660,
|
| 9 |
+
"<|fim_pad|>": 151662,
|
| 10 |
+
"<|fim_prefix|>": 151659,
|
| 11 |
+
"<|fim_suffix|>": 151661,
|
| 12 |
+
"<|im_end|>": 151645,
|
| 13 |
+
"<|im_start|>": 151644,
|
| 14 |
+
"<|image_pad|>": 151655,
|
| 15 |
+
"<|object_ref_end|>": 151647,
|
| 16 |
+
"<|object_ref_start|>": 151646,
|
| 17 |
+
"<|quad_end|>": 151651,
|
| 18 |
+
"<|quad_start|>": 151650,
|
| 19 |
+
"<|repo_name|>": 151663,
|
| 20 |
+
"<|video_pad|>": 151656,
|
| 21 |
+
"<|vision_end|>": 151653,
|
| 22 |
+
"<|vision_pad|>": 151654,
|
| 23 |
+
"<|vision_start|>": 151652
|
| 24 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
|
| 2 |
+
You are a helpful assistant.<|im_end|>
|
| 3 |
+
{% endif %}<|im_start|>{{ message['role'] }}
|
| 4 |
+
{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
|
| 5 |
+
{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
|
| 6 |
+
{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
|
| 7 |
+
{% endif %}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.48145466,
|
| 8 |
+
0.4578275,
|
| 9 |
+
0.40821073
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "Qwen2_5_VLImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.26862954,
|
| 14 |
+
0.26130258,
|
| 15 |
+
0.27577711
|
| 16 |
+
],
|
| 17 |
+
"max_pixels": 1003520,
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"min_pixels": 3136,
|
| 20 |
+
"patch_size": 14,
|
| 21 |
+
"processor_class": "Qwen2_5_VLProcessor",
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"max_pixels": 1003520,
|
| 26 |
+
"min_pixels": 3136
|
| 27 |
+
},
|
| 28 |
+
"temporal_patch_size": 2
|
| 29 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
}
|
| 181 |
+
},
|
| 182 |
+
"additional_special_tokens": [
|
| 183 |
+
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
+
],
|
| 197 |
+
"bos_token": null,
|
| 198 |
+
"clean_up_tokenization_spaces": false,
|
| 199 |
+
"eos_token": "<|im_end|>",
|
| 200 |
+
"errors": "replace",
|
| 201 |
+
"extra_special_tokens": {},
|
| 202 |
+
"model_max_length": 131072,
|
| 203 |
+
"pad_token": "<|endoftext|>",
|
| 204 |
+
"processor_class": "Qwen2_5_VLProcessor",
|
| 205 |
+
"split_special_tokens": false,
|
| 206 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 207 |
+
"unk_token": null
|
| 208 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.013017443374121323,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 100,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.00013017443374121324,
|
| 14 |
+
"grad_norm": 9.027134895324707,
|
| 15 |
+
"learning_rate": 0.0,
|
| 16 |
+
"loss": 1.7121,
|
| 17 |
+
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.0002603488674824265,
|
| 21 |
+
"grad_norm": 6.82881498336792,
|
| 22 |
+
"learning_rate": 5e-06,
|
| 23 |
+
"loss": 1.3212,
|
| 24 |
+
"step": 2
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.00039052330122363966,
|
| 28 |
+
"grad_norm": 9.780951499938965,
|
| 29 |
+
"learning_rate": 1e-05,
|
| 30 |
+
"loss": 1.3029,
|
| 31 |
+
"step": 3
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.000520697734964853,
|
| 35 |
+
"grad_norm": 6.956725597381592,
|
| 36 |
+
"learning_rate": 1.5e-05,
|
| 37 |
+
"loss": 1.3596,
|
| 38 |
+
"step": 4
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.0006508721687060661,
|
| 42 |
+
"grad_norm": 7.1865010261535645,
|
| 43 |
+
"learning_rate": 2e-05,
|
| 44 |
+
"loss": 1.0132,
|
| 45 |
+
"step": 5
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.0007810466024472793,
|
| 49 |
+
"grad_norm": 6.9718475341796875,
|
| 50 |
+
"learning_rate": 2.5e-05,
|
| 51 |
+
"loss": 1.0072,
|
| 52 |
+
"step": 6
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.0009112210361884926,
|
| 56 |
+
"grad_norm": 7.56270170211792,
|
| 57 |
+
"learning_rate": 3e-05,
|
| 58 |
+
"loss": 0.9878,
|
| 59 |
+
"step": 7
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.001041395469929706,
|
| 63 |
+
"grad_norm": 9.146328926086426,
|
| 64 |
+
"learning_rate": 3.5000000000000004e-05,
|
| 65 |
+
"loss": 1.0033,
|
| 66 |
+
"step": 8
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.001171569903670919,
|
| 70 |
+
"grad_norm": 7.27562952041626,
|
| 71 |
+
"learning_rate": 4e-05,
|
| 72 |
+
"loss": 1.2293,
|
| 73 |
+
"step": 9
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.0013017443374121322,
|
| 77 |
+
"grad_norm": 7.683845520019531,
|
| 78 |
+
"learning_rate": 4.4999999999999996e-05,
|
| 79 |
+
"loss": 1.0291,
|
| 80 |
+
"step": 10
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.0014319187711533455,
|
| 84 |
+
"grad_norm": 6.271151542663574,
|
| 85 |
+
"learning_rate": 5e-05,
|
| 86 |
+
"loss": 0.9344,
|
| 87 |
+
"step": 11
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.0015620932048945586,
|
| 91 |
+
"grad_norm": 7.351341247558594,
|
| 92 |
+
"learning_rate": 5.5e-05,
|
| 93 |
+
"loss": 1.1735,
|
| 94 |
+
"step": 12
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.001692267638635772,
|
| 98 |
+
"grad_norm": 9.452715873718262,
|
| 99 |
+
"learning_rate": 6e-05,
|
| 100 |
+
"loss": 1.6768,
|
| 101 |
+
"step": 13
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.0018224420723769851,
|
| 105 |
+
"grad_norm": 4.848631858825684,
|
| 106 |
+
"learning_rate": 6.500000000000001e-05,
|
| 107 |
+
"loss": 1.1856,
|
| 108 |
+
"step": 14
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.0019526165061181985,
|
| 112 |
+
"grad_norm": 4.508008003234863,
|
| 113 |
+
"learning_rate": 7.000000000000001e-05,
|
| 114 |
+
"loss": 1.1299,
|
| 115 |
+
"step": 15
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.002082790939859412,
|
| 119 |
+
"grad_norm": 7.280736446380615,
|
| 120 |
+
"learning_rate": 7.5e-05,
|
| 121 |
+
"loss": 1.1765,
|
| 122 |
+
"step": 16
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.0022129653736006247,
|
| 126 |
+
"grad_norm": 5.3748979568481445,
|
| 127 |
+
"learning_rate": 8e-05,
|
| 128 |
+
"loss": 0.7167,
|
| 129 |
+
"step": 17
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.002343139807341838,
|
| 133 |
+
"grad_norm": 4.538390636444092,
|
| 134 |
+
"learning_rate": 8.5e-05,
|
| 135 |
+
"loss": 0.8025,
|
| 136 |
+
"step": 18
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.0024733142410830514,
|
| 140 |
+
"grad_norm": 5.350919723510742,
|
| 141 |
+
"learning_rate": 8.999999999999999e-05,
|
| 142 |
+
"loss": 1.0602,
|
| 143 |
+
"step": 19
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.0026034886748242643,
|
| 147 |
+
"grad_norm": 7.6904802322387695,
|
| 148 |
+
"learning_rate": 9.5e-05,
|
| 149 |
+
"loss": 0.9535,
|
| 150 |
+
"step": 20
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.0027336631085654777,
|
| 154 |
+
"grad_norm": 3.215092658996582,
|
| 155 |
+
"learning_rate": 0.0001,
|
| 156 |
+
"loss": 0.6306,
|
| 157 |
+
"step": 21
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.002863837542306691,
|
| 161 |
+
"grad_norm": 3.4990460872650146,
|
| 162 |
+
"learning_rate": 0.000105,
|
| 163 |
+
"loss": 0.8273,
|
| 164 |
+
"step": 22
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.0029940119760479044,
|
| 168 |
+
"grad_norm": 6.226487159729004,
|
| 169 |
+
"learning_rate": 0.00011,
|
| 170 |
+
"loss": 0.7278,
|
| 171 |
+
"step": 23
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.0031241864097891173,
|
| 175 |
+
"grad_norm": 8.820632934570312,
|
| 176 |
+
"learning_rate": 0.000115,
|
| 177 |
+
"loss": 1.1691,
|
| 178 |
+
"step": 24
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.0032543608435303306,
|
| 182 |
+
"grad_norm": 4.559078216552734,
|
| 183 |
+
"learning_rate": 0.00012,
|
| 184 |
+
"loss": 0.7181,
|
| 185 |
+
"step": 25
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.003384535277271544,
|
| 189 |
+
"grad_norm": 4.220932960510254,
|
| 190 |
+
"learning_rate": 0.000125,
|
| 191 |
+
"loss": 1.1495,
|
| 192 |
+
"step": 26
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.003514709711012757,
|
| 196 |
+
"grad_norm": 3.457106590270996,
|
| 197 |
+
"learning_rate": 0.00013000000000000002,
|
| 198 |
+
"loss": 0.6497,
|
| 199 |
+
"step": 27
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.0036448841447539702,
|
| 203 |
+
"grad_norm": 4.938692092895508,
|
| 204 |
+
"learning_rate": 0.000135,
|
| 205 |
+
"loss": 0.8021,
|
| 206 |
+
"step": 28
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.0037750585784951836,
|
| 210 |
+
"grad_norm": 4.849185943603516,
|
| 211 |
+
"learning_rate": 0.00014000000000000001,
|
| 212 |
+
"loss": 0.4922,
|
| 213 |
+
"step": 29
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.003905233012236397,
|
| 217 |
+
"grad_norm": 2.6389944553375244,
|
| 218 |
+
"learning_rate": 0.000145,
|
| 219 |
+
"loss": 0.7901,
|
| 220 |
+
"step": 30
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.00403540744597761,
|
| 224 |
+
"grad_norm": 3.179384231567383,
|
| 225 |
+
"learning_rate": 0.00015,
|
| 226 |
+
"loss": 0.4517,
|
| 227 |
+
"step": 31
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.004165581879718824,
|
| 231 |
+
"grad_norm": 3.68798828125,
|
| 232 |
+
"learning_rate": 0.000155,
|
| 233 |
+
"loss": 0.7674,
|
| 234 |
+
"step": 32
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.004295756313460036,
|
| 238 |
+
"grad_norm": 3.3014638423919678,
|
| 239 |
+
"learning_rate": 0.00016,
|
| 240 |
+
"loss": 0.6232,
|
| 241 |
+
"step": 33
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.0044259307472012495,
|
| 245 |
+
"grad_norm": 5.8319993019104,
|
| 246 |
+
"learning_rate": 0.000165,
|
| 247 |
+
"loss": 0.7051,
|
| 248 |
+
"step": 34
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.004556105180942463,
|
| 252 |
+
"grad_norm": 5.789146423339844,
|
| 253 |
+
"learning_rate": 0.00017,
|
| 254 |
+
"loss": 0.9646,
|
| 255 |
+
"step": 35
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.004686279614683676,
|
| 259 |
+
"grad_norm": 3.3160910606384277,
|
| 260 |
+
"learning_rate": 0.000175,
|
| 261 |
+
"loss": 0.7404,
|
| 262 |
+
"step": 36
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.0048164540484248895,
|
| 266 |
+
"grad_norm": 2.0886712074279785,
|
| 267 |
+
"learning_rate": 0.00017999999999999998,
|
| 268 |
+
"loss": 0.4553,
|
| 269 |
+
"step": 37
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.004946628482166103,
|
| 273 |
+
"grad_norm": 3.526718854904175,
|
| 274 |
+
"learning_rate": 0.000185,
|
| 275 |
+
"loss": 0.6724,
|
| 276 |
+
"step": 38
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.005076802915907316,
|
| 280 |
+
"grad_norm": 1.9652310609817505,
|
| 281 |
+
"learning_rate": 0.00019,
|
| 282 |
+
"loss": 0.4729,
|
| 283 |
+
"step": 39
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.005206977349648529,
|
| 287 |
+
"grad_norm": 3.9210290908813477,
|
| 288 |
+
"learning_rate": 0.00019500000000000002,
|
| 289 |
+
"loss": 0.9257,
|
| 290 |
+
"step": 40
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.005337151783389742,
|
| 294 |
+
"grad_norm": 2.2785885334014893,
|
| 295 |
+
"learning_rate": 0.0002,
|
| 296 |
+
"loss": 0.3922,
|
| 297 |
+
"step": 41
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.005467326217130955,
|
| 301 |
+
"grad_norm": 5.556844711303711,
|
| 302 |
+
"learning_rate": 0.000205,
|
| 303 |
+
"loss": 0.8272,
|
| 304 |
+
"step": 42
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.005597500650872169,
|
| 308 |
+
"grad_norm": 1.7946547269821167,
|
| 309 |
+
"learning_rate": 0.00021,
|
| 310 |
+
"loss": 0.2776,
|
| 311 |
+
"step": 43
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.005727675084613382,
|
| 315 |
+
"grad_norm": 1.6659146547317505,
|
| 316 |
+
"learning_rate": 0.000215,
|
| 317 |
+
"loss": 0.2818,
|
| 318 |
+
"step": 44
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.005857849518354595,
|
| 322 |
+
"grad_norm": 2.9105308055877686,
|
| 323 |
+
"learning_rate": 0.00022,
|
| 324 |
+
"loss": 0.733,
|
| 325 |
+
"step": 45
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.005988023952095809,
|
| 329 |
+
"grad_norm": 1.707923173904419,
|
| 330 |
+
"learning_rate": 0.00022500000000000002,
|
| 331 |
+
"loss": 0.2418,
|
| 332 |
+
"step": 46
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.006118198385837021,
|
| 336 |
+
"grad_norm": 1.9957884550094604,
|
| 337 |
+
"learning_rate": 0.00023,
|
| 338 |
+
"loss": 0.6494,
|
| 339 |
+
"step": 47
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.006248372819578235,
|
| 343 |
+
"grad_norm": 2.3562097549438477,
|
| 344 |
+
"learning_rate": 0.000235,
|
| 345 |
+
"loss": 0.4304,
|
| 346 |
+
"step": 48
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.006378547253319448,
|
| 350 |
+
"grad_norm": 2.6113295555114746,
|
| 351 |
+
"learning_rate": 0.00024,
|
| 352 |
+
"loss": 0.6548,
|
| 353 |
+
"step": 49
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.006508721687060661,
|
| 357 |
+
"grad_norm": 2.3105404376983643,
|
| 358 |
+
"learning_rate": 0.000245,
|
| 359 |
+
"loss": 0.5747,
|
| 360 |
+
"step": 50
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.006638896120801875,
|
| 364 |
+
"grad_norm": 2.4724414348602295,
|
| 365 |
+
"learning_rate": 0.00025,
|
| 366 |
+
"loss": 0.3501,
|
| 367 |
+
"step": 51
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.006769070554543088,
|
| 371 |
+
"grad_norm": 2.129112482070923,
|
| 372 |
+
"learning_rate": 0.000255,
|
| 373 |
+
"loss": 0.3983,
|
| 374 |
+
"step": 52
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.006899244988284301,
|
| 378 |
+
"grad_norm": 1.734704852104187,
|
| 379 |
+
"learning_rate": 0.00026000000000000003,
|
| 380 |
+
"loss": 0.4274,
|
| 381 |
+
"step": 53
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.007029419422025514,
|
| 385 |
+
"grad_norm": 1.7710378170013428,
|
| 386 |
+
"learning_rate": 0.00026500000000000004,
|
| 387 |
+
"loss": 0.2733,
|
| 388 |
+
"step": 54
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.007159593855766727,
|
| 392 |
+
"grad_norm": 3.876213788986206,
|
| 393 |
+
"learning_rate": 0.00027,
|
| 394 |
+
"loss": 0.2934,
|
| 395 |
+
"step": 55
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.0072897682895079405,
|
| 399 |
+
"grad_norm": 1.6544724702835083,
|
| 400 |
+
"learning_rate": 0.000275,
|
| 401 |
+
"loss": 0.42,
|
| 402 |
+
"step": 56
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.007419942723249154,
|
| 406 |
+
"grad_norm": 4.511378288269043,
|
| 407 |
+
"learning_rate": 0.00028000000000000003,
|
| 408 |
+
"loss": 0.7193,
|
| 409 |
+
"step": 57
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.007550117156990367,
|
| 413 |
+
"grad_norm": 1.969791293144226,
|
| 414 |
+
"learning_rate": 0.000285,
|
| 415 |
+
"loss": 0.2931,
|
| 416 |
+
"step": 58
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.0076802915907315805,
|
| 420 |
+
"grad_norm": 1.4399250745773315,
|
| 421 |
+
"learning_rate": 0.00029,
|
| 422 |
+
"loss": 0.2678,
|
| 423 |
+
"step": 59
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.007810466024472794,
|
| 427 |
+
"grad_norm": 2.075308084487915,
|
| 428 |
+
"learning_rate": 0.000295,
|
| 429 |
+
"loss": 0.5184,
|
| 430 |
+
"step": 60
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.007940640458214007,
|
| 434 |
+
"grad_norm": 2.092390775680542,
|
| 435 |
+
"learning_rate": 0.0003,
|
| 436 |
+
"loss": 0.501,
|
| 437 |
+
"step": 61
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.00807081489195522,
|
| 441 |
+
"grad_norm": 1.3803796768188477,
|
| 442 |
+
"learning_rate": 0.000305,
|
| 443 |
+
"loss": 0.0933,
|
| 444 |
+
"step": 62
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.008200989325696434,
|
| 448 |
+
"grad_norm": 2.6716833114624023,
|
| 449 |
+
"learning_rate": 0.00031,
|
| 450 |
+
"loss": 0.4907,
|
| 451 |
+
"step": 63
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.008331163759437647,
|
| 455 |
+
"grad_norm": 2.602332353591919,
|
| 456 |
+
"learning_rate": 0.000315,
|
| 457 |
+
"loss": 0.5355,
|
| 458 |
+
"step": 64
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.008461338193178859,
|
| 462 |
+
"grad_norm": 1.9427075386047363,
|
| 463 |
+
"learning_rate": 0.00032,
|
| 464 |
+
"loss": 0.2417,
|
| 465 |
+
"step": 65
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.008591512626920072,
|
| 469 |
+
"grad_norm": 2.076782703399658,
|
| 470 |
+
"learning_rate": 0.00032500000000000004,
|
| 471 |
+
"loss": 0.2974,
|
| 472 |
+
"step": 66
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.008721687060661286,
|
| 476 |
+
"grad_norm": 1.6976258754730225,
|
| 477 |
+
"learning_rate": 0.00033,
|
| 478 |
+
"loss": 0.2383,
|
| 479 |
+
"step": 67
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.008851861494402499,
|
| 483 |
+
"grad_norm": 1.6441351175308228,
|
| 484 |
+
"learning_rate": 0.000335,
|
| 485 |
+
"loss": 0.2017,
|
| 486 |
+
"step": 68
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.008982035928143712,
|
| 490 |
+
"grad_norm": 2.251415252685547,
|
| 491 |
+
"learning_rate": 0.00034,
|
| 492 |
+
"loss": 0.3529,
|
| 493 |
+
"step": 69
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.009112210361884926,
|
| 497 |
+
"grad_norm": 1.3723615407943726,
|
| 498 |
+
"learning_rate": 0.000345,
|
| 499 |
+
"loss": 0.2136,
|
| 500 |
+
"step": 70
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.009242384795626139,
|
| 504 |
+
"grad_norm": 2.3022258281707764,
|
| 505 |
+
"learning_rate": 0.00035,
|
| 506 |
+
"loss": 0.3552,
|
| 507 |
+
"step": 71
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.009372559229367352,
|
| 511 |
+
"grad_norm": 1.7158514261245728,
|
| 512 |
+
"learning_rate": 0.000355,
|
| 513 |
+
"loss": 0.2876,
|
| 514 |
+
"step": 72
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.009502733663108566,
|
| 518 |
+
"grad_norm": 2.0729708671569824,
|
| 519 |
+
"learning_rate": 0.00035999999999999997,
|
| 520 |
+
"loss": 0.3345,
|
| 521 |
+
"step": 73
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.009632908096849779,
|
| 525 |
+
"grad_norm": 0.8926207423210144,
|
| 526 |
+
"learning_rate": 0.000365,
|
| 527 |
+
"loss": 0.145,
|
| 528 |
+
"step": 74
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.009763082530590992,
|
| 532 |
+
"grad_norm": 1.281984806060791,
|
| 533 |
+
"learning_rate": 0.00037,
|
| 534 |
+
"loss": 0.2553,
|
| 535 |
+
"step": 75
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.009893256964332206,
|
| 539 |
+
"grad_norm": 2.1244750022888184,
|
| 540 |
+
"learning_rate": 0.000375,
|
| 541 |
+
"loss": 0.4454,
|
| 542 |
+
"step": 76
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.010023431398073419,
|
| 546 |
+
"grad_norm": 2.00681209564209,
|
| 547 |
+
"learning_rate": 0.00038,
|
| 548 |
+
"loss": 0.2888,
|
| 549 |
+
"step": 77
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.010153605831814632,
|
| 553 |
+
"grad_norm": 2.414694309234619,
|
| 554 |
+
"learning_rate": 0.00038500000000000003,
|
| 555 |
+
"loss": 0.3445,
|
| 556 |
+
"step": 78
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.010283780265555844,
|
| 560 |
+
"grad_norm": 1.4376050233840942,
|
| 561 |
+
"learning_rate": 0.00039000000000000005,
|
| 562 |
+
"loss": 0.3805,
|
| 563 |
+
"step": 79
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.010413954699297057,
|
| 567 |
+
"grad_norm": 1.5109490156173706,
|
| 568 |
+
"learning_rate": 0.000395,
|
| 569 |
+
"loss": 0.298,
|
| 570 |
+
"step": 80
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.01054412913303827,
|
| 574 |
+
"grad_norm": 1.4980159997940063,
|
| 575 |
+
"learning_rate": 0.0004,
|
| 576 |
+
"loss": 0.3296,
|
| 577 |
+
"step": 81
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.010674303566779484,
|
| 581 |
+
"grad_norm": 0.8917379379272461,
|
| 582 |
+
"learning_rate": 0.00040500000000000003,
|
| 583 |
+
"loss": 0.2573,
|
| 584 |
+
"step": 82
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.010804478000520697,
|
| 588 |
+
"grad_norm": 1.4543973207473755,
|
| 589 |
+
"learning_rate": 0.00041,
|
| 590 |
+
"loss": 0.3317,
|
| 591 |
+
"step": 83
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.01093465243426191,
|
| 595 |
+
"grad_norm": 1.2531291246414185,
|
| 596 |
+
"learning_rate": 0.000415,
|
| 597 |
+
"loss": 0.3687,
|
| 598 |
+
"step": 84
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.011064826868003124,
|
| 602 |
+
"grad_norm": 1.4232031106948853,
|
| 603 |
+
"learning_rate": 0.00042,
|
| 604 |
+
"loss": 0.1944,
|
| 605 |
+
"step": 85
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.011195001301744337,
|
| 609 |
+
"grad_norm": 1.066874384880066,
|
| 610 |
+
"learning_rate": 0.000425,
|
| 611 |
+
"loss": 0.2827,
|
| 612 |
+
"step": 86
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.01132517573548555,
|
| 616 |
+
"grad_norm": 1.0397121906280518,
|
| 617 |
+
"learning_rate": 0.00043,
|
| 618 |
+
"loss": 0.2561,
|
| 619 |
+
"step": 87
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.011455350169226764,
|
| 623 |
+
"grad_norm": 1.2276612520217896,
|
| 624 |
+
"learning_rate": 0.000435,
|
| 625 |
+
"loss": 0.0961,
|
| 626 |
+
"step": 88
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.011585524602967977,
|
| 630 |
+
"grad_norm": 1.4861217737197876,
|
| 631 |
+
"learning_rate": 0.00044,
|
| 632 |
+
"loss": 0.2329,
|
| 633 |
+
"step": 89
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.01171569903670919,
|
| 637 |
+
"grad_norm": 1.859115481376648,
|
| 638 |
+
"learning_rate": 0.00044500000000000003,
|
| 639 |
+
"loss": 0.2767,
|
| 640 |
+
"step": 90
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.011845873470450404,
|
| 644 |
+
"grad_norm": 1.5194251537322998,
|
| 645 |
+
"learning_rate": 0.00045000000000000004,
|
| 646 |
+
"loss": 0.2665,
|
| 647 |
+
"step": 91
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.011976047904191617,
|
| 651 |
+
"grad_norm": 1.2869577407836914,
|
| 652 |
+
"learning_rate": 0.000455,
|
| 653 |
+
"loss": 0.128,
|
| 654 |
+
"step": 92
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.01210622233793283,
|
| 658 |
+
"grad_norm": 1.3539648056030273,
|
| 659 |
+
"learning_rate": 0.00046,
|
| 660 |
+
"loss": 0.2405,
|
| 661 |
+
"step": 93
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.012236396771674042,
|
| 665 |
+
"grad_norm": 1.1017889976501465,
|
| 666 |
+
"learning_rate": 0.000465,
|
| 667 |
+
"loss": 0.2318,
|
| 668 |
+
"step": 94
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.012366571205415256,
|
| 672 |
+
"grad_norm": 1.0330371856689453,
|
| 673 |
+
"learning_rate": 0.00047,
|
| 674 |
+
"loss": 0.2629,
|
| 675 |
+
"step": 95
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.01249674563915647,
|
| 679 |
+
"grad_norm": 1.0031756162643433,
|
| 680 |
+
"learning_rate": 0.000475,
|
| 681 |
+
"loss": 0.152,
|
| 682 |
+
"step": 96
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.012626920072897682,
|
| 686 |
+
"grad_norm": 0.9949682950973511,
|
| 687 |
+
"learning_rate": 0.00048,
|
| 688 |
+
"loss": 0.2203,
|
| 689 |
+
"step": 97
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 0.012757094506638896,
|
| 693 |
+
"grad_norm": 1.5362247228622437,
|
| 694 |
+
"learning_rate": 0.00048499999999999997,
|
| 695 |
+
"loss": 0.2322,
|
| 696 |
+
"step": 98
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 0.01288726894038011,
|
| 700 |
+
"grad_norm": 1.273103952407837,
|
| 701 |
+
"learning_rate": 0.00049,
|
| 702 |
+
"loss": 0.2898,
|
| 703 |
+
"step": 99
|
| 704 |
+
},
|
| 705 |
+
{
|
| 706 |
+
"epoch": 0.013017443374121323,
|
| 707 |
+
"grad_norm": 0.6677097678184509,
|
| 708 |
+
"learning_rate": 0.000495,
|
| 709 |
+
"loss": 0.1565,
|
| 710 |
+
"step": 100
|
| 711 |
+
}
|
| 712 |
+
],
|
| 713 |
+
"logging_steps": 1,
|
| 714 |
+
"max_steps": 1000,
|
| 715 |
+
"num_input_tokens_seen": 0,
|
| 716 |
+
"num_train_epochs": 1,
|
| 717 |
+
"save_steps": 100,
|
| 718 |
+
"stateful_callbacks": {
|
| 719 |
+
"TrainerControl": {
|
| 720 |
+
"args": {
|
| 721 |
+
"should_epoch_stop": false,
|
| 722 |
+
"should_evaluate": false,
|
| 723 |
+
"should_log": false,
|
| 724 |
+
"should_save": true,
|
| 725 |
+
"should_training_stop": false
|
| 726 |
+
},
|
| 727 |
+
"attributes": {}
|
| 728 |
+
}
|
| 729 |
+
},
|
| 730 |
+
"total_flos": 0.0,
|
| 731 |
+
"train_batch_size": 128,
|
| 732 |
+
"trial_name": null,
|
| 733 |
+
"trial_params": null
|
| 734 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|box_end|>": 151649,
|
| 5 |
+
"<|box_start|>": 151648,
|
| 6 |
+
"<|endoftext|>": 151643,
|
| 7 |
+
"<|file_sep|>": 151664,
|
| 8 |
+
"<|fim_middle|>": 151660,
|
| 9 |
+
"<|fim_pad|>": 151662,
|
| 10 |
+
"<|fim_prefix|>": 151659,
|
| 11 |
+
"<|fim_suffix|>": 151661,
|
| 12 |
+
"<|im_end|>": 151645,
|
| 13 |
+
"<|im_start|>": 151644,
|
| 14 |
+
"<|image_pad|>": 151655,
|
| 15 |
+
"<|object_ref_end|>": 151647,
|
| 16 |
+
"<|object_ref_start|>": 151646,
|
| 17 |
+
"<|quad_end|>": 151651,
|
| 18 |
+
"<|quad_start|>": 151650,
|
| 19 |
+
"<|repo_name|>": 151663,
|
| 20 |
+
"<|video_pad|>": 151656,
|
| 21 |
+
"<|vision_end|>": 151653,
|
| 22 |
+
"<|vision_pad|>": 151654,
|
| 23 |
+
"<|vision_start|>": 151652
|
| 24 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
|
| 2 |
+
You are a helpful assistant.<|im_end|>
|
| 3 |
+
{% endif %}<|im_start|>{{ message['role'] }}
|
| 4 |
+
{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
|
| 5 |
+
{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
|
| 6 |
+
{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
|
| 7 |
+
{% endif %}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.48145466,
|
| 8 |
+
0.4578275,
|
| 9 |
+
0.40821073
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "Qwen2_5_VLImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.26862954,
|
| 14 |
+
0.26130258,
|
| 15 |
+
0.27577711
|
| 16 |
+
],
|
| 17 |
+
"max_pixels": 1003520,
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"min_pixels": 3136,
|
| 20 |
+
"patch_size": 14,
|
| 21 |
+
"processor_class": "Qwen2_5_VLProcessor",
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"max_pixels": 1003520,
|
| 26 |
+
"min_pixels": 3136
|
| 27 |
+
},
|
| 28 |
+
"temporal_patch_size": 2
|
| 29 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
}
|
| 181 |
+
},
|
| 182 |
+
"additional_special_tokens": [
|
| 183 |
+
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
+
],
|
| 197 |
+
"bos_token": null,
|
| 198 |
+
"clean_up_tokenization_spaces": false,
|
| 199 |
+
"eos_token": "<|im_end|>",
|
| 200 |
+
"errors": "replace",
|
| 201 |
+
"extra_special_tokens": {},
|
| 202 |
+
"model_max_length": 131072,
|
| 203 |
+
"pad_token": "<|endoftext|>",
|
| 204 |
+
"processor_class": "Qwen2_5_VLProcessor",
|
| 205 |
+
"split_special_tokens": false,
|
| 206 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 207 |
+
"unk_token": null
|
| 208 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.48145466,
|
| 8 |
+
0.4578275,
|
| 9 |
+
0.40821073
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "Qwen2_5_VLImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.26862954,
|
| 14 |
+
0.26130258,
|
| 15 |
+
0.27577711
|
| 16 |
+
],
|
| 17 |
+
"max_pixels": 1003520,
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"min_pixels": 3136,
|
| 20 |
+
"patch_size": 14,
|
| 21 |
+
"processor_class": "Qwen2_5_VLProcessor",
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"max_pixels": 1003520,
|
| 26 |
+
"min_pixels": 3136
|
| 27 |
+
},
|
| 28 |
+
"temporal_patch_size": 2
|
| 29 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
}
|
| 181 |
+
},
|
| 182 |
+
"additional_special_tokens": [
|
| 183 |
+
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
+
],
|
| 197 |
+
"bos_token": null,
|
| 198 |
+
"clean_up_tokenization_spaces": false,
|
| 199 |
+
"eos_token": "<|im_end|>",
|
| 200 |
+
"errors": "replace",
|
| 201 |
+
"extra_special_tokens": {},
|
| 202 |
+
"model_max_length": 131072,
|
| 203 |
+
"pad_token": "<|endoftext|>",
|
| 204 |
+
"processor_class": "Qwen2_5_VLProcessor",
|
| 205 |
+
"split_special_tokens": false,
|
| 206 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 207 |
+
"unk_token": null
|
| 208 |
+
}
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/public/eval/eval_1gpu.sh
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "==> Environment"
|
| 4 |
+
echo "conda location: $(which conda)"
|
| 5 |
+
echo "Python location: $(which python)"
|
| 6 |
+
echo "Python version: $(python --version)"
|
| 7 |
+
echo ""
|
| 8 |
+
|
| 9 |
+
cd VLM2Vec/ || exit
|
| 10 |
+
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
# Configuration
|
| 13 |
+
# ==============================================================================
|
| 14 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 15 |
+
BATCH_SIZE=1
|
| 16 |
+
# MODALITIES=("image_retrival" "video_retrival")
|
| 17 |
+
# MODALITIES=("image_retrival")
|
| 18 |
+
MODALITIES=("mieb_any2any_retrieval_lite")
|
| 19 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 20 |
+
# OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
|
| 21 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2vl_2B.add_mlp_try1/checkpoint-500" #_qry_cand_diff_ratio
|
| 22 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000_DART_2_0.75_0_0" #_qry_cand_diff_ratio
|
| 23 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/try_add_mlp_try/stage1" #_qry_cand_diff_ratio
|
| 24 |
+
|
| 25 |
+
# ==> Define models and their base output paths here
|
| 26 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 27 |
+
declare -a MODEL_SPECS
|
| 28 |
+
MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 29 |
+
# /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
|
| 30 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
|
| 31 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 32 |
+
# MODEL_SPECS+=( "Qwen/Qwen2.5-VL-3B-Instruct;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2.5VL-3B" )
|
| 33 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
|
| 34 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
|
| 35 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret;lamra;$OUTPUT_BASEDIR/LamRA-Ret" )lamra_qwen25
|
| 36 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
|
| 37 |
+
# MODEL_SPECS+=( "vidore/colpali-v1.3;colpali;$OUTPUT_BASEDIR/colpali-v1.3" )
|
| 38 |
+
|
| 39 |
+
# ==============================================================================
|
| 40 |
+
# Main Execution Loop
|
| 41 |
+
# ==============================================================================
|
| 42 |
+
# Loop through each model specification
|
| 43 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 44 |
+
# Parse the model name and base output path from the spec string
|
| 45 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 46 |
+
|
| 47 |
+
echo "================================================="
|
| 48 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 49 |
+
echo "================================================="
|
| 50 |
+
|
| 51 |
+
# Loop through each modality for the current model
|
| 52 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 53 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 54 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 55 |
+
|
| 56 |
+
echo "-------------------------------------------------"
|
| 57 |
+
echo " - Modality: $MODALITY"
|
| 58 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 59 |
+
|
| 60 |
+
# Ensure the output directory exists
|
| 61 |
+
mkdir -p "$OUTPUT_PATH"
|
| 62 |
+
|
| 63 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time.py \
|
| 64 |
+
--pooling eos \
|
| 65 |
+
--normalize true \
|
| 66 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 67 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 68 |
+
--model_name \"$MODEL_NAME\" \
|
| 69 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 70 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 71 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 72 |
+
|
| 73 |
+
echo " - Executing command..."
|
| 74 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 75 |
+
eval "$cmd"
|
| 76 |
+
echo " - Done."
|
| 77 |
+
echo "-------------------------------------------------"
|
| 78 |
+
done
|
| 79 |
+
done
|
| 80 |
+
|
| 81 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_aop.sh
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "==> Environment"
|
| 4 |
+
echo "conda location: $(which conda)"
|
| 5 |
+
echo "Python location: $(which python)"
|
| 6 |
+
echo "Python version: $(python --version)"
|
| 7 |
+
echo ""
|
| 8 |
+
|
| 9 |
+
cd VLM2Vec/ || exit
|
| 10 |
+
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
# Configuration
|
| 13 |
+
# ==============================================================================
|
| 14 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 15 |
+
BATCH_SIZE=1
|
| 16 |
+
# MODALITIES=("image_retrival" "video_retrival")
|
| 17 |
+
MODALITIES=("image_retrival")
|
| 18 |
+
# MODALITIES=("visdoc_retrival")
|
| 19 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 20 |
+
# OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
|
| 21 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_2_5_analysis_aop" #_qry_cand_diff_ratio
|
| 22 |
+
|
| 23 |
+
# ==> Define models and their base output paths here
|
| 24 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 25 |
+
declare -a MODEL_SPECS
|
| 26 |
+
# MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 27 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
|
| 28 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 29 |
+
MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_single_node_alltask_online_doc_data_12_11_h100;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2.5VL-7B" ) # Qwen/Qwen2.5-VL-3B-Instruct
|
| 30 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
|
| 31 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
|
| 32 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret;lamra;$OUTPUT_BASEDIR/LamRA-Ret" )lamra_qwen25
|
| 33 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
|
| 34 |
+
# MODEL_SPECS+=( "vidore/colpali-v1.3;colpali;$OUTPUT_BASEDIR/colpali-v1.3" )
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# ==============================================================================
|
| 38 |
+
# Main Execution Loop
|
| 39 |
+
# ==============================================================================
|
| 40 |
+
# Loop through each model specification
|
| 41 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 42 |
+
# Parse the model name and base output path from the spec string
|
| 43 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 44 |
+
|
| 45 |
+
echo "================================================="
|
| 46 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 47 |
+
echo "================================================="
|
| 48 |
+
|
| 49 |
+
# Loop through each modality for the current model
|
| 50 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 51 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 52 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 53 |
+
|
| 54 |
+
echo "-------------------------------------------------"
|
| 55 |
+
echo " - Modality: $MODALITY"
|
| 56 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 57 |
+
|
| 58 |
+
# Ensure the output directory exists
|
| 59 |
+
mkdir -p "$OUTPUT_PATH"
|
| 60 |
+
|
| 61 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_aop.py \
|
| 62 |
+
--pooling eos \
|
| 63 |
+
--normalize true \
|
| 64 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 65 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 66 |
+
--model_name \"$MODEL_NAME\" \
|
| 67 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 68 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 69 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 70 |
+
|
| 71 |
+
echo " - Executing command..."
|
| 72 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 73 |
+
eval "$cmd"
|
| 74 |
+
echo " - Done."
|
| 75 |
+
echo "-------------------------------------------------"
|
| 76 |
+
done
|
| 77 |
+
done
|
| 78 |
+
|
| 79 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_cut_layer.sh
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
# ==============================================================================
|
| 10 |
+
# Configuration
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 13 |
+
BATCH_SIZE=4
|
| 14 |
+
MODALITIES=("image_retrival")
|
| 15 |
+
# MODALITIES=("image_retrival" "video_retrival" "visdoc_retrival")
|
| 16 |
+
# MODALITIES=("video_retrival")
|
| 17 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 18 |
+
# OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
|
| 19 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec/cut_layer" #_qry_cand_diff_ratio
|
| 20 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/result" #_qry_cand_diff_ratio
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ==> Define models and their base output paths here
|
| 24 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 25 |
+
declare -a MODEL_SPECS
|
| 26 |
+
# MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 27 |
+
MODEL_SPECS+=( "/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/checkpoint-900;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 28 |
+
# /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
|
| 29 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B_single_node_image_ret_10_30_h100/checkpoint-1200;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 30 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/vlm2vec_train_2.5_3b_multilayer_distill_add_weight_image_ret_11_18_a100_2/checkpoint-1000;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 31 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
|
| 32 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
|
| 33 |
+
|
| 34 |
+
# ==============================================================================
|
| 35 |
+
# Main Execution Loop
|
| 36 |
+
# ==============================================================================
|
| 37 |
+
# Loop through each model specification
|
| 38 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 39 |
+
# Parse the model name and base output path from the spec string
|
| 40 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 41 |
+
|
| 42 |
+
echo "================================================="
|
| 43 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 44 |
+
echo "================================================="
|
| 45 |
+
|
| 46 |
+
# Loop through each modality for the current model
|
| 47 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 48 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 49 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 50 |
+
|
| 51 |
+
echo "-------------------------------------------------"
|
| 52 |
+
echo " - Modality: $MODALITY"
|
| 53 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 54 |
+
|
| 55 |
+
# Ensure the output directory exists
|
| 56 |
+
mkdir -p "$OUTPUT_PATH"
|
| 57 |
+
|
| 58 |
+
cmd="LM_LAYERS='4,8,12,16,20,last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_cut_layer.py \
|
| 59 |
+
--pooling eos \
|
| 60 |
+
--normalize true \
|
| 61 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 62 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 63 |
+
--model_name \"$MODEL_NAME\" \
|
| 64 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 65 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 66 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 67 |
+
|
| 68 |
+
echo " - Executing command..."
|
| 69 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 70 |
+
eval "$cmd"
|
| 71 |
+
echo " - Done."
|
| 72 |
+
echo "-------------------------------------------------"
|
| 73 |
+
done
|
| 74 |
+
done
|
| 75 |
+
|
| 76 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_cut_layer_AOP_text.sh
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
# ==============================================================================
|
| 10 |
+
# Configuration
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 13 |
+
BATCH_SIZE=64
|
| 14 |
+
MODALITIES=("image_retrival")
|
| 15 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 16 |
+
# OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
|
| 17 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_AOP/t_0.5_8_i_0.5_16_both_l12_bsz64" #_qry_cand_diff_ratio
|
| 18 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000/t_0.5_both_l10_bsz64_new" #_qry_cand_diff_ratio
|
| 19 |
+
|
| 20 |
+
# export AOP_ENABLED=1
|
| 21 |
+
# export AOP_APPLY=qry
|
| 22 |
+
# export AOP_LAYER=8
|
| 23 |
+
# export AOP_MODE=delta
|
| 24 |
+
# export AOP_DELTA=0.12
|
| 25 |
+
# export AOP_KHAT=1.6
|
| 26 |
+
# export AOP_MIN_KEEP=64
|
| 27 |
+
# export AOP_USE_BIAS=1
|
| 28 |
+
|
| 29 |
+
export AOP_ENABLED=1
|
| 30 |
+
export AOP_APPLY=both
|
| 31 |
+
export AOP_LAYER=10
|
| 32 |
+
export AOP_MODE=ratio
|
| 33 |
+
# export AOP_KEEP_RATIO=0.1
|
| 34 |
+
export AOP_MIN_KEEP=0
|
| 35 |
+
# export AOP_DEBUG=1
|
| 36 |
+
export AOP_SELECTION=random # 关键 aop | attention | random
|
| 37 |
+
|
| 38 |
+
export AOP_PRUNE_VISION=0
|
| 39 |
+
export AOP_PRUNE_TEXT=1
|
| 40 |
+
# 比例模式:视觉保留 10%,文本保留 80%
|
| 41 |
+
export AOP_KEEP_RATIO_VISION=0.5
|
| 42 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 43 |
+
# 保底
|
| 44 |
+
export AOP_MIN_KEEP_VISION=8
|
| 45 |
+
export AOP_MIN_KEEP_TEXT=8
|
| 46 |
+
# 文本保护
|
| 47 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 48 |
+
export AOP_PROTECT_SPECIAL=1
|
| 49 |
+
export AOP_RANDOM_SEED=42
|
| 50 |
+
export AOP_DEBUG=1
|
| 51 |
+
|
| 52 |
+
# ==> Define models and their base output paths here
|
| 53 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 54 |
+
declare -a MODEL_SPECS
|
| 55 |
+
# MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 56 |
+
# /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
|
| 57 |
+
MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 58 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
|
| 59 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
|
| 60 |
+
|
| 61 |
+
# ==============================================================================
|
| 62 |
+
# Main Execution Loop
|
| 63 |
+
# ==============================================================================
|
| 64 |
+
# Loop through each model specification
|
| 65 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 66 |
+
# Parse the model name and base output path from the spec string
|
| 67 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 68 |
+
|
| 69 |
+
echo "================================================="
|
| 70 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 71 |
+
echo "================================================="
|
| 72 |
+
|
| 73 |
+
# Loop through each modality for the current model
|
| 74 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 75 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 76 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 77 |
+
|
| 78 |
+
echo "-------------------------------------------------"
|
| 79 |
+
echo " - Modality: $MODALITY"
|
| 80 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 81 |
+
|
| 82 |
+
# Ensure the output directory exists
|
| 83 |
+
mkdir -p "$OUTPUT_PATH"
|
| 84 |
+
|
| 85 |
+
cmd="LM_LAYERS='last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_cut_layer_AOP_add_text_cut.py \
|
| 86 |
+
--pooling eos \
|
| 87 |
+
--normalize true \
|
| 88 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 89 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 90 |
+
--model_name \"$MODEL_NAME\" \
|
| 91 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 92 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 93 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 94 |
+
|
| 95 |
+
echo " - Executing command..."
|
| 96 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 97 |
+
eval "$cmd"
|
| 98 |
+
echo " - Done."
|
| 99 |
+
echo "-------------------------------------------------"
|
| 100 |
+
done
|
| 101 |
+
done
|
| 102 |
+
|
| 103 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_cut_layer_unified_new.sh
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "==> Environment"
|
| 5 |
+
echo "conda: $(which conda)"
|
| 6 |
+
echo "python: $(which python)"
|
| 7 |
+
python --version
|
| 8 |
+
echo ""
|
| 9 |
+
|
| 10 |
+
# 确保在 VLM2Vec 目录下
|
| 11 |
+
# 注意:cd VLM2Vec/ 这行我移除了,因为您的集群调用显示是在 clone Code/ 之后 cd Code/
|
| 12 |
+
# 如果 VLM2Vec 在 Code/ 目录下,请取消下面一行的注释
|
| 13 |
+
# cd VLM2Vec/ || exit 1
|
| 14 |
+
echo "Current directory: $(pwd)"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ==============================================================================
|
| 18 |
+
# Config (Read from Environment Variables or use Defaults)
|
| 19 |
+
# ==============================================================================
|
| 20 |
+
echo "==> Loading Configuration..."
|
| 21 |
+
|
| 22 |
+
# --- 基础配置 ---
|
| 23 |
+
CUDA_VISIBLE_DEVICES="${EVAL_CUDA_DEVICES:-"0,1,2,3,4,5,6,7"}"
|
| 24 |
+
BATCH_SIZE="${EVAL_BATCH_SIZE:-64}"
|
| 25 |
+
|
| 26 |
+
# --- 模态配置 (从空格分隔的字符串 E VAL_MODALITIES 读取) ---
|
| 27 |
+
MODALITIES_STR="${EVAL_MODALITIES:-"image_retrival video_retrival visdoc_retrival"}"
|
| 28 |
+
read -r -a MODALITIES <<< "$MODALITIES_STR"
|
| 29 |
+
|
| 30 |
+
# --- 路径配置 ---
|
| 31 |
+
DATA_BASEDIR="${EVAL_DATA_BASEDIR:-"~/data/vlm2vec_eval/MMEB-V2"}"
|
| 32 |
+
OUTPUT_BASEDIR="${EVAL_OUTPUT_BASEDIR:-"~/exps/vlm2vec_unified_eval"}"
|
| 33 |
+
|
| 34 |
+
# --- 模型清单 ---
|
| 35 |
+
# (重要) 默认模型规格现在在 OUTPUT_BASEDIR 被定义 *之后* 才设置, 以确保路径正确
|
| 36 |
+
DEFAULT_MODEL_SPEC="VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B"
|
| 37 |
+
MODEL_SPEC_TO_ADD="${EVAL_MODEL_SPEC:-"$DEFAULT_MODEL_SPEC"}"
|
| 38 |
+
|
| 39 |
+
declare -a MODEL_SPECS
|
| 40 |
+
MODEL_SPECS+=("$MODEL_SPEC_TO_ADD")
|
| 41 |
+
|
| 42 |
+
# ==============================================================================
|
| 43 |
+
# Cut-layer 配置 (Exported for Python)
|
| 44 |
+
# ==============================================================================
|
| 45 |
+
export LM_LAYERS="${EVAL_LM_LAYERS:-'16,20,24,last'}"
|
| 46 |
+
|
| 47 |
+
# ==============================================================================
|
| 48 |
+
# VisionZip 配置 (Exported for Python)
|
| 49 |
+
# ==============================================================================
|
| 50 |
+
export ZIP_ENABLED=${EVAL_ZIP_ENABLED:-0}
|
| 51 |
+
export ZIP_APPLY="${EVAL_ZIP_APPLY:-"both"}"
|
| 52 |
+
export ZIP_METHOD="${EVAL_ZIP_METHOD:-"visionzip"}"
|
| 53 |
+
export ZIP_KEEP_DOM=${EVAL_ZIP_KEEP_DOM:-0.90}
|
| 54 |
+
export ZIP_KEEP_CTX=${EVAL_ZIP_KEEP_CTX:-0.10}
|
| 55 |
+
|
| 56 |
+
# ==============================================================================
|
| 57 |
+
# AOP 配置 (Exported for Python)
|
| 58 |
+
# ==============================================================================
|
| 59 |
+
export AOP_ENABLED=${EVAL_AOP_ENABLED:-0}
|
| 60 |
+
export AOP_APPLY="${EVAL_AOP_APPLY:-"both"}"
|
| 61 |
+
export AOP_LAYER=${EVAL_AOP_LAYER:-20}
|
| 62 |
+
export AOP_MODE="${EVAL_AOP_MODE:-"ratio"}"
|
| 63 |
+
export AOP_KEEP_RATIO=${EVAL_AOP_KEEP_RATIO:-0.10}
|
| 64 |
+
export AOP_MIN_KEEP=${EVAL_AOP_MIN_KEEP:-64}
|
| 65 |
+
export AOP_DELTA=${EVAL_AOP_DELTA:-0.10}
|
| 66 |
+
export AOP_KHAT=${EVAL_AOP_KHAT:-1.0}
|
| 67 |
+
export AOP_USE_BIAS=${EVAL_AOP_USE_BIAS:-1}
|
| 68 |
+
export AOP_ATTN_IMPL="${EVAL_AOP_ATTN_IMPL:-"sdpa"}"
|
| 69 |
+
export AOP_DEBUG=${EVAL_AOP_DEBUG:-0} # 默认关闭 debug
|
| 70 |
+
|
| 71 |
+
# ==============================================================================
|
| 72 |
+
# 打印最终配置
|
| 73 |
+
# ==============================================================================
|
| 74 |
+
echo "--- Final Configuration ---"
|
| 75 |
+
echo "CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
|
| 76 |
+
echo "BATCH_SIZE: $BATCH_SIZE"
|
| 77 |
+
echo "MODALITIES: ${MODALITIES[*]}"
|
| 78 |
+
echo "DATA_BASEDIR: $DATA_BASEDIR"
|
| 79 |
+
echo "OUTPUT_BASEDIR: $OUTPUT_BASEDIR"
|
| 80 |
+
echo "MODEL_SPECS: ${MODEL_SPECS[*]}"
|
| 81 |
+
echo "LM_LAYERS: $LM_LAYERS"
|
| 82 |
+
echo "ZIP_ENABLED: $ZIP_ENABLED"
|
| 83 |
+
echo "AOP_ENABLED: $AOP_ENABLED"
|
| 84 |
+
if [ "$ZIP_ENABLED" -ne 0 ]; then
|
| 85 |
+
echo "ZIP_APPLY: $ZIP_APPLY, ZIP_METHOD: $ZIP_METHOD, ZIP_KEEP_DOM: $ZIP_KEEP_DOM, ZIP_KEEP_CTX: $ZIP_KEEP_CTX"
|
| 86 |
+
fi
|
| 87 |
+
if [ "$AOP_ENABLED" -ne 0 ]; then
|
| 88 |
+
echo "AOP_APPLY: $AOP_APPLY, AOP_LAYER: $AOP_LAYER, AOP_MODE: $AOP_MODE, AOP_KEEP_RATIO: $AOP_KEEP_RATIO"
|
| 89 |
+
fi
|
| 90 |
+
echo "---------------------------"
|
| 91 |
+
|
| 92 |
+
# ==============================================================================
|
| 93 |
+
# Run
|
| 94 |
+
# ==============================================================================
|
| 95 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 96 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 97 |
+
|
| 98 |
+
echo "================================================="
|
| 99 |
+
echo "🚀 Model: $MODEL_NAME"
|
| 100 |
+
echo "================================================="
|
| 101 |
+
|
| 102 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 103 |
+
# 假设 VLM2Vec 目录就是 Code/ 目录,或者脚本在 Code/ 目录下运行
|
| 104 |
+
# 并且 experiments/ 目录在 Code/ 下
|
| 105 |
+
DATA_CONFIG_PATH="experiments/public/eval/${MODALITY}.yaml"
|
| 106 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 107 |
+
mkdir -p "$OUTPUT_PATH"
|
| 108 |
+
|
| 109 |
+
echo "-------------------------------------------------"
|
| 110 |
+
echo " - Modality: $MODALITY"
|
| 111 |
+
echo " - Output: $OUTPUT_PATH"
|
| 112 |
+
echo " - Config: $DATA_CONFIG_PATH"
|
| 113 |
+
|
| 114 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_cut_layer_unified.py \
|
| 115 |
+
--pooling eos \
|
| 116 |
+
--normalize true \
|
| 117 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 118 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 119 |
+
--model_name \"$MODEL_NAME\" \
|
| 120 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 121 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 122 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 123 |
+
|
| 124 |
+
echo " - Executing..."
|
| 125 |
+
# echo "$cmd" # Debug: 打印完整命令
|
| 126 |
+
eval "$cmd"
|
| 127 |
+
echo " - Done."
|
| 128 |
+
done
|
| 129 |
+
done
|
| 130 |
+
|
| 131 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_early_exit_classifier.sh
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
|
| 10 |
+
# ==============================================================================
|
| 11 |
+
# Configuration
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 14 |
+
BATCH_SIZE=32
|
| 15 |
+
|
| 16 |
+
# 【AOP 配置】Token Pruning - 暂时关闭以匹配baseline
|
| 17 |
+
export AOP_ENABLED=0
|
| 18 |
+
# export AOP_APPLY=qry
|
| 19 |
+
# export AOP_LAYER=12 # AOP 剪裁发生在哪一层 (需要 < EE_LAYER)
|
| 20 |
+
# export AOP_MODE=ratio
|
| 21 |
+
# export AOP_KEEP_RATIO_VISION=1.0 # 视觉 Token 保留 100%
|
| 22 |
+
# export AOP_KEEP_RATIO_TEXT=1.0 # 文本 Token 不剪
|
| 23 |
+
# export AOP_SELECTION=aop # 使用注意力选择
|
| 24 |
+
|
| 25 |
+
# 【EE 配置】Early Exit via Classifier
|
| 26 |
+
export EE_ENABLED=1
|
| 27 |
+
export EE_LAYER=12 # 早停判定层
|
| 28 |
+
export EE_METHOD=classifier # 使用分类器
|
| 29 |
+
# export EE_DEBUG_MODE=1
|
| 30 |
+
export EE_THRESHOLD=0.99 # 早停阈值
|
| 31 |
+
export EE_TOPK=200
|
| 32 |
+
# 分类器路径:可以是 checkpoint 目录或 .pt 文件
|
| 33 |
+
export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/~/experiments/checkpoint-600"
|
| 34 |
+
|
| 35 |
+
MODALITIES=("image_retrival")
|
| 36 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 37 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_Classifier_Eval_AOP_Fused_0.99"
|
| 38 |
+
|
| 39 |
+
# 模型路径 (VLM)
|
| 40 |
+
MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000"
|
| 41 |
+
MODEL_NAME="qwen2_5_vl"
|
| 42 |
+
|
| 43 |
+
echo "================================================="
|
| 44 |
+
echo "🚀 Pipeline: AOP + Early Exit Classifier"
|
| 45 |
+
echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 46 |
+
echo "🚀 Threshold: $EE_THRESHOLD"
|
| 47 |
+
echo "================================================="
|
| 48 |
+
|
| 49 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 50 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 51 |
+
OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
|
| 52 |
+
|
| 53 |
+
mkdir -p "$OUTPUT_PATH"
|
| 54 |
+
|
| 55 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier.py \
|
| 56 |
+
--pooling eos \
|
| 57 |
+
--normalize true \
|
| 58 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 59 |
+
--model_backbone \"$MODEL_NAME\" \
|
| 60 |
+
--model_name \"$MODEL_CHECKPOINT\" \
|
| 61 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 62 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 63 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 64 |
+
|
| 65 |
+
echo " - Executing command..."
|
| 66 |
+
eval "$cmd"
|
| 67 |
+
echo " - Done."
|
| 68 |
+
done
|
| 69 |
+
|
| 70 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_attn_pooling.sh
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
|
| 10 |
+
# ==============================================================================
|
| 11 |
+
# Configuration
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 14 |
+
BATCH_SIZE=64
|
| 15 |
+
|
| 16 |
+
# 【AOP 配置】Token Pruning
|
| 17 |
+
export AOP_ENABLED=1
|
| 18 |
+
export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
|
| 19 |
+
export AOP_LAYER=12
|
| 20 |
+
export AOP_MODE=ratio
|
| 21 |
+
export AOP_SELECTION=attention # 使用注意力作为重要性分数
|
| 22 |
+
export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
|
| 23 |
+
|
| 24 |
+
# 开启图像 / 文本剪枝
|
| 25 |
+
export AOP_PRUNE_VISION=1
|
| 26 |
+
export AOP_PRUNE_TEXT=1
|
| 27 |
+
|
| 28 |
+
# 可选:合理的比例和保底
|
| 29 |
+
export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
|
| 30 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 31 |
+
export AOP_MIN_KEEP_VISION=8
|
| 32 |
+
export AOP_MIN_KEEP_TEXT=8
|
| 33 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 34 |
+
export AOP_PROTECT_SPECIAL=1
|
| 35 |
+
export AOP_MONITOR=0
|
| 36 |
+
|
| 37 |
+
# 【VPOOL 配置】Vision Token Pooling
|
| 38 |
+
export VPOOL_ENABLED=1
|
| 39 |
+
export VPOOL_APPLY=both # qry | tgt | both
|
| 40 |
+
export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
|
| 41 |
+
export VPOOL_KERNEL=2 # 2x2
|
| 42 |
+
export VPOOL_STRIDE=2
|
| 43 |
+
export VPOOL_METHOD=attn # NEW: attn pooling
|
| 44 |
+
export VPOOL_ATTN_TAU=1.0 # NEW: attn pooling temperature
|
| 45 |
+
export VPOOL_ONLY_VISION=1
|
| 46 |
+
export VPOOL_PROTECT_CLS=1
|
| 47 |
+
export VPOOL_MONITOR=1
|
| 48 |
+
|
| 49 |
+
# 如果想看 debug,可以加:
|
| 50 |
+
# export AOP_DEBUG=1
|
| 51 |
+
|
| 52 |
+
# 【EE 配置】Early Exit via Classifier
|
| 53 |
+
export EE_ENABLED=1
|
| 54 |
+
export EE_LAYER=12
|
| 55 |
+
export EE_METHOD=classifier
|
| 56 |
+
export EE_DEBUG_MODE=1
|
| 57 |
+
# 阈值根据 V5 分析结果填入,或者保留默认值
|
| 58 |
+
export EE_THRESHOLD=0
|
| 59 |
+
export EE_TOPK=200
|
| 60 |
+
export EE_PROFILE=1
|
| 61 |
+
export EE_TOPK_EMB=10
|
| 62 |
+
export EE_SKIP_LM_HEAD=1
|
| 63 |
+
|
| 64 |
+
# 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
|
| 65 |
+
export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000"
|
| 66 |
+
|
| 67 |
+
MODALITIES=("image_retrival")
|
| 68 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 69 |
+
# 【关键】修改输出目录
|
| 70 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000_0.45_try2"
|
| 71 |
+
|
| 72 |
+
MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_12_attn_pooling_new_all_12_26_h100_4_node_sigma4/checkpoint-1300"
|
| 73 |
+
MODEL_NAME="qwen2_5_vl"
|
| 74 |
+
|
| 75 |
+
echo "================================================="
|
| 76 |
+
echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
|
| 77 |
+
echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 78 |
+
echo "================================================="
|
| 79 |
+
|
| 80 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 81 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 82 |
+
OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
|
| 83 |
+
|
| 84 |
+
mkdir -p "$OUTPUT_PATH"
|
| 85 |
+
|
| 86 |
+
# 【关键】调用 V5 脚本
|
| 87 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_attn_pooling.py \
|
| 88 |
+
--pooling eos \
|
| 89 |
+
--normalize true \
|
| 90 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 91 |
+
--model_backbone \"$MODEL_NAME\" \
|
| 92 |
+
--model_name \"$MODEL_CHECKPOINT\" \
|
| 93 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 94 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 95 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 96 |
+
|
| 97 |
+
echo " - Executing command..."
|
| 98 |
+
eval "$cmd"
|
| 99 |
+
echo " - Done."
|
| 100 |
+
done
|
| 101 |
+
|
| 102 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling.sh
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
|
| 10 |
+
# ==============================================================================
|
| 11 |
+
# Configuration
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 14 |
+
BATCH_SIZE=64
|
| 15 |
+
|
| 16 |
+
# 【AOP 配置】Token Pruning
|
| 17 |
+
export AOP_ENABLED=0
|
| 18 |
+
export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
|
| 19 |
+
export AOP_LAYER=12
|
| 20 |
+
export AOP_MODE=ratio
|
| 21 |
+
export AOP_SELECTION=attention # 使用注意力作为重要性分数
|
| 22 |
+
export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
|
| 23 |
+
|
| 24 |
+
# 开启图像 / 文本剪枝
|
| 25 |
+
export AOP_PRUNE_VISION=0
|
| 26 |
+
export AOP_PRUNE_TEXT=1
|
| 27 |
+
|
| 28 |
+
# 可选:合理的比例和保底
|
| 29 |
+
export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
|
| 30 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 31 |
+
export AOP_MIN_KEEP_VISION=8
|
| 32 |
+
export AOP_MIN_KEEP_TEXT=8
|
| 33 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 34 |
+
export AOP_PROTECT_SPECIAL=1
|
| 35 |
+
export AOP_MONITOR=0
|
| 36 |
+
|
| 37 |
+
# 【VPOOL 配置】Vision Token Pooling
|
| 38 |
+
export VPOOL_ENABLED=0
|
| 39 |
+
export VPOOL_APPLY=both # qry | tgt | both
|
| 40 |
+
export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
|
| 41 |
+
export VPOOL_KERNEL=2 # 2x2
|
| 42 |
+
export VPOOL_STRIDE=2
|
| 43 |
+
export VPOOL_METHOD=avg # avg | max | linear | conv
|
| 44 |
+
export VPOOL_ONLY_VISION=1 # 仅对视觉 token 生效
|
| 45 |
+
export VPOOL_PROTECT_CLS=1
|
| 46 |
+
export VPOOL_MONITOR=0 # 如需打印长度变化可设为 1
|
| 47 |
+
|
| 48 |
+
# 如果想看 debug,可以加:
|
| 49 |
+
# export AOP_DEBUG=1
|
| 50 |
+
|
| 51 |
+
# 【EE 配置】Early Exit via Classifier
|
| 52 |
+
export EE_ENABLED=0
|
| 53 |
+
export EE_LAYER=12
|
| 54 |
+
export EE_METHOD=classifier
|
| 55 |
+
export EE_DEBUG_MODE=1
|
| 56 |
+
# 阈值根据 V5 分析结果填入,或者保留默认值
|
| 57 |
+
export EE_THRESHOLD=0
|
| 58 |
+
export EE_TOPK=200
|
| 59 |
+
export EE_PROFILE=1
|
| 60 |
+
export VPOOL_COMPILE=1
|
| 61 |
+
export EE_TORCH_PROFILE=1
|
| 62 |
+
export EE_TOPK_EMB=10
|
| 63 |
+
|
| 64 |
+
export STAGE_PROFILE=1
|
| 65 |
+
export STAGE_PROFILE_PRINT=1 # 可选:每个 dataset 结束打印
|
| 66 |
+
|
| 67 |
+
# 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
|
| 68 |
+
export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000"
|
| 69 |
+
|
| 70 |
+
MODALITIES=("image_retrival")
|
| 71 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 72 |
+
# 【关键】修改输出目录
|
| 73 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000_optimaized_wo"
|
| 74 |
+
|
| 75 |
+
MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4/checkpoint-5000"
|
| 76 |
+
MODEL_NAME="qwen2_5_vl"
|
| 77 |
+
|
| 78 |
+
echo "================================================="
|
| 79 |
+
echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
|
| 80 |
+
echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 81 |
+
echo "================================================="
|
| 82 |
+
|
| 83 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 84 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 85 |
+
OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
|
| 86 |
+
|
| 87 |
+
mkdir -p "$OUTPUT_PATH"
|
| 88 |
+
|
| 89 |
+
# 【关键】调用 V5 脚本
|
| 90 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_pooling.py \
|
| 91 |
+
--pooling eos \
|
| 92 |
+
--normalize true \
|
| 93 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 94 |
+
--model_backbone \"$MODEL_NAME\" \
|
| 95 |
+
--model_name \"$MODEL_CHECKPOINT\" \
|
| 96 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 97 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 98 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 99 |
+
|
| 100 |
+
echo " - Executing command..."
|
| 101 |
+
eval "$cmd"
|
| 102 |
+
echo " - Done."
|
| 103 |
+
done
|
| 104 |
+
|
| 105 |
+
echo "✅ All jobs completed."
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# #!/bin/bash
|
| 109 |
+
# echo "==> Environment"
|
| 110 |
+
# echo "conda location: $(which conda)"
|
| 111 |
+
# echo "Python location: $(which python)"
|
| 112 |
+
# echo "Python version: $(python --version)"
|
| 113 |
+
# echo ""
|
| 114 |
+
|
| 115 |
+
# cd VLM2Vec/ || exit
|
| 116 |
+
|
| 117 |
+
# # ==============================================================================
|
| 118 |
+
# # Configuration
|
| 119 |
+
# # ==============================================================================
|
| 120 |
+
# CUDA_VISIBLE_DEVICES="0"
|
| 121 |
+
# BATCH_SIZE=64
|
| 122 |
+
|
| 123 |
+
# # 【AOP 配置】Token Pruning
|
| 124 |
+
# export AOP_ENABLED=1
|
| 125 |
+
# export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
|
| 126 |
+
# export AOP_LAYER=12
|
| 127 |
+
# export AOP_MODE=ratio
|
| 128 |
+
# export AOP_SELECTION=attention # 使用注意力作为重要性分数
|
| 129 |
+
# export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
|
| 130 |
+
|
| 131 |
+
# # 开启图像 / 文本剪枝
|
| 132 |
+
# export AOP_PRUNE_VISION=0
|
| 133 |
+
# export AOP_PRUNE_TEXT=1
|
| 134 |
+
|
| 135 |
+
# # 可选:合理的比例和保底
|
| 136 |
+
# export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
|
| 137 |
+
# export AOP_KEEP_RATIO_TEXT=0.5
|
| 138 |
+
# export AOP_MIN_KEEP_VISION=8
|
| 139 |
+
# export AOP_MIN_KEEP_TEXT=8
|
| 140 |
+
# export AOP_PROTECT_TEXT_LAST=8
|
| 141 |
+
# export AOP_PROTECT_SPECIAL=1
|
| 142 |
+
# export AOP_MONITOR=0
|
| 143 |
+
|
| 144 |
+
# # 【VPOOL 配置】Vision Token Pooling
|
| 145 |
+
# export VPOOL_ENABLED=1
|
| 146 |
+
# export VPOOL_APPLY=both # qry | tgt | both
|
| 147 |
+
# export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
|
| 148 |
+
# export VPOOL_KERNEL=2 # 2x2
|
| 149 |
+
# export VPOOL_STRIDE=2
|
| 150 |
+
# export VPOOL_METHOD=avg # avg | max | linear | conv
|
| 151 |
+
# export VPOOL_ONLY_VISION=1 # 仅对视觉 token 生效
|
| 152 |
+
# export VPOOL_PROTECT_CLS=1
|
| 153 |
+
# export VPOOL_MONITOR=0 # 如需打印长度变化可设为 1
|
| 154 |
+
|
| 155 |
+
# # 如果想看 debug,可以加:
|
| 156 |
+
# # export AOP_DEBUG=1
|
| 157 |
+
|
| 158 |
+
# # 【EE 配置】Early Exit via Classifier
|
| 159 |
+
# export EE_ENABLED=1
|
| 160 |
+
# export EE_LAYER=12
|
| 161 |
+
# export EE_METHOD=classifier
|
| 162 |
+
# export EE_DEBUG_MODE=1
|
| 163 |
+
# # 阈值根据 V5 分析结果填入,或者保留默认值
|
| 164 |
+
# export EE_THRESHOLD=0
|
| 165 |
+
# export EE_TOPK=200
|
| 166 |
+
# export EE_PROFILE=1
|
| 167 |
+
# export VPOOL_COMPILE=1
|
| 168 |
+
# export EE_TORCH_PROFILE=1
|
| 169 |
+
# export EE_TOPK_EMB=10
|
| 170 |
+
|
| 171 |
+
# export STAGE_PROFILE=1
|
| 172 |
+
# export STAGE_PROFILE_PRINT=1 # 可选:每个 dataset 结束打印
|
| 173 |
+
|
| 174 |
+
# # 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
|
| 175 |
+
# export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_7B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz512/checkpoint-1000"
|
| 176 |
+
|
| 177 |
+
# MODALITIES=("image_retrival")
|
| 178 |
+
# DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 179 |
+
# # 【关键】修改输出目录
|
| 180 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_7B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz512/checkpoint-1000_new"
|
| 181 |
+
|
| 182 |
+
# MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_multilayer_distill_AOP_pooling_all_12_10_h100_4/checkpoint-5000"
|
| 183 |
+
# MODEL_NAME="qwen2_5_vl"
|
| 184 |
+
|
| 185 |
+
# echo "================================================="
|
| 186 |
+
# echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
|
| 187 |
+
# echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 188 |
+
# echo "================================================="
|
| 189 |
+
|
| 190 |
+
# for MODALITY in "${MODALITIES[@]}"; do
|
| 191 |
+
# DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 192 |
+
# OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
|
| 193 |
+
|
| 194 |
+
# mkdir -p "$OUTPUT_PATH"
|
| 195 |
+
|
| 196 |
+
# # 【关键】调用 V5 脚本
|
| 197 |
+
# cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_pooling.py \
|
| 198 |
+
# --pooling eos \
|
| 199 |
+
# --normalize true \
|
| 200 |
+
# --per_device_eval_batch_size $BATCH_SIZE \
|
| 201 |
+
# --model_backbone \"$MODEL_NAME\" \
|
| 202 |
+
# --model_name \"$MODEL_CHECKPOINT\" \
|
| 203 |
+
# --dataset_config \"$DATA_CONFIG_PATH\" \
|
| 204 |
+
# --encode_output_path \"$OUTPUT_PATH\" \
|
| 205 |
+
# --data_basedir \"$DATA_BASEDIR\""
|
| 206 |
+
|
| 207 |
+
# echo " - Executing command..."
|
| 208 |
+
# eval "$cmd"
|
| 209 |
+
# echo " - Done."
|
| 210 |
+
# done
|
| 211 |
+
|
| 212 |
+
# echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling_new.sh
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
|
| 10 |
+
# ==============================================================================
|
| 11 |
+
# Configuration
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 14 |
+
BATCH_SIZE=64
|
| 15 |
+
|
| 16 |
+
# 【AOP 配置】Token Pruning
|
| 17 |
+
export AOP_ENABLED=0
|
| 18 |
+
export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
|
| 19 |
+
export AOP_LAYER=10
|
| 20 |
+
export AOP_MODE=ratio
|
| 21 |
+
export AOP_SELECTION=attention # 使用注意力作为重要性分数
|
| 22 |
+
export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
|
| 23 |
+
|
| 24 |
+
# 开启图像 / 文本剪枝
|
| 25 |
+
export AOP_PRUNE_VISION=1
|
| 26 |
+
export AOP_PRUNE_TEXT=1
|
| 27 |
+
|
| 28 |
+
# 可选:合理的比例和保底
|
| 29 |
+
export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
|
| 30 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 31 |
+
export AOP_MIN_KEEP_VISION=8
|
| 32 |
+
export AOP_MIN_KEEP_TEXT=8
|
| 33 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 34 |
+
export AOP_PROTECT_SPECIAL=1
|
| 35 |
+
export AOP_MONITOR=0
|
| 36 |
+
|
| 37 |
+
# 【VPOOL 配置】Vision Token Pooling
|
| 38 |
+
export VPOOL_ENABLED=1
|
| 39 |
+
export VPOOL_APPLY=both # qry | tgt | both
|
| 40 |
+
export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
|
| 41 |
+
export VPOOL_KERNEL=2 # 2x2
|
| 42 |
+
export VPOOL_STRIDE=2
|
| 43 |
+
export VPOOL_METHOD=avg # avg | max | linear | conv
|
| 44 |
+
export VPOOL_ONLY_VISION=1 # 仅对视觉 token 生效
|
| 45 |
+
export VPOOL_PROTECT_CLS=1
|
| 46 |
+
export VPOOL_MONITOR=0 # 如需打印长度变化可设为 1
|
| 47 |
+
|
| 48 |
+
# 如果想看 debug,可以加:
|
| 49 |
+
# export AOP_DEBUG=1
|
| 50 |
+
|
| 51 |
+
# 【EE 配置】Early Exit via Classifier
|
| 52 |
+
export EE_ENABLED=1
|
| 53 |
+
export EE_LAYER=12
|
| 54 |
+
export EE_METHOD=classifier
|
| 55 |
+
export EE_DEBUG_MODE=1
|
| 56 |
+
# 阈值根据 V5 分析结果填入,或者保留默认值
|
| 57 |
+
export EE_THRESHOLD=0
|
| 58 |
+
export EE_TOPK=200
|
| 59 |
+
export EE_PROFILE=1
|
| 60 |
+
export EE_TOPK_EMB=10
|
| 61 |
+
export EE_SKIP_LM_HEAD=1
|
| 62 |
+
|
| 63 |
+
# 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
|
| 64 |
+
# export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node_Classifier_L12_i_ret_bsz512/checkpoint-1000"
|
| 65 |
+
export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000"
|
| 66 |
+
|
| 67 |
+
MODALITIES=("image_retrival")
|
| 68 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 69 |
+
# 【关键】修改输出目录
|
| 70 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000_only_Vision"
|
| 71 |
+
|
| 72 |
+
MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node/checkpoint-5000"
|
| 73 |
+
MODEL_NAME="qwen2_5_vl"
|
| 74 |
+
|
| 75 |
+
echo "================================================="
|
| 76 |
+
echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
|
| 77 |
+
echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 78 |
+
echo "================================================="
|
| 79 |
+
|
| 80 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 81 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 82 |
+
OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
|
| 83 |
+
|
| 84 |
+
mkdir -p "$OUTPUT_PATH"
|
| 85 |
+
|
| 86 |
+
# 【关键】调用 V5 脚本
|
| 87 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_pooling_new.py \
|
| 88 |
+
--pooling eos \
|
| 89 |
+
--normalize true \
|
| 90 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 91 |
+
--model_backbone \"$MODEL_NAME\" \
|
| 92 |
+
--model_name \"$MODEL_CHECKPOINT\" \
|
| 93 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 94 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 95 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 96 |
+
|
| 97 |
+
echo " - Executing command..."
|
| 98 |
+
eval "$cmd"
|
| 99 |
+
echo " - Done."
|
| 100 |
+
done
|
| 101 |
+
|
| 102 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_early_exit_classifier_V5.sh
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
|
| 10 |
+
# ==============================================================================
|
| 11 |
+
# Configuration
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 14 |
+
BATCH_SIZE=64
|
| 15 |
+
|
| 16 |
+
# 【AOP 配置】Token Pruning
|
| 17 |
+
export AOP_ENABLED=1
|
| 18 |
+
export AOP_APPLY=qry # 只剪 query;要剪 cand 可以设 both
|
| 19 |
+
export AOP_LAYER=12
|
| 20 |
+
export AOP_MODE=ratio
|
| 21 |
+
export AOP_SELECTION=attention # 使用注意力作为重要性分数
|
| 22 |
+
export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
|
| 23 |
+
|
| 24 |
+
# 开启图像 / 文本剪枝
|
| 25 |
+
export AOP_PRUNE_VISION=1
|
| 26 |
+
export AOP_PRUNE_TEXT=1
|
| 27 |
+
|
| 28 |
+
# 可选:合理的比例和保底
|
| 29 |
+
export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
|
| 30 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 31 |
+
export AOP_MIN_KEEP_VISION=8
|
| 32 |
+
export AOP_MIN_KEEP_TEXT=8
|
| 33 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 34 |
+
export AOP_PROTECT_SPECIAL=1
|
| 35 |
+
|
| 36 |
+
# 如果想看 debug,可以加:
|
| 37 |
+
# export AOP_DEBUG=1
|
| 38 |
+
|
| 39 |
+
# 【EE 配置】Early Exit via Classifier
|
| 40 |
+
export EE_ENABLED=1
|
| 41 |
+
export EE_LAYER=12
|
| 42 |
+
export EE_METHOD=classifier
|
| 43 |
+
export EE_DEBUG_MODE=1
|
| 44 |
+
# 阈值根据 V5 分析结果填入,或者保留默认值
|
| 45 |
+
export EE_THRESHOLD=0
|
| 46 |
+
export EE_TOPK=200
|
| 47 |
+
export EE_PROFILE=1
|
| 48 |
+
export EE_TOPK_EMB=10
|
| 49 |
+
|
| 50 |
+
# 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
|
| 51 |
+
export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800"
|
| 52 |
+
|
| 53 |
+
MODALITIES=("image_retrival")
|
| 54 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 55 |
+
# 【关键】修改输出目录
|
| 56 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800_0.3"
|
| 57 |
+
|
| 58 |
+
MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_h100/checkpoint-5000"
|
| 59 |
+
MODEL_NAME="qwen2_5_vl"
|
| 60 |
+
|
| 61 |
+
echo "================================================="
|
| 62 |
+
echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
|
| 63 |
+
echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 64 |
+
echo "================================================="
|
| 65 |
+
|
| 66 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 67 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 68 |
+
OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
|
| 69 |
+
|
| 70 |
+
mkdir -p "$OUTPUT_PATH"
|
| 71 |
+
|
| 72 |
+
# 【关键】调用 V5 脚本
|
| 73 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_V5.py \
|
| 74 |
+
--pooling eos \
|
| 75 |
+
--normalize true \
|
| 76 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 77 |
+
--model_backbone \"$MODEL_NAME\" \
|
| 78 |
+
--model_name \"$MODEL_CHECKPOINT\" \
|
| 79 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 80 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 81 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 82 |
+
|
| 83 |
+
echo " - Executing command..."
|
| 84 |
+
eval "$cmd"
|
| 85 |
+
echo " - Done."
|
| 86 |
+
done
|
| 87 |
+
|
| 88 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_early_exit_classifier_V5_new.sh
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
|
| 10 |
+
# ==============================================================================
|
| 11 |
+
# Configuration
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 14 |
+
BATCH_SIZE=64
|
| 15 |
+
|
| 16 |
+
# 【VPOOL 配置】Vision Token Pooling(如需启用,设为1)
|
| 17 |
+
export VPOOL_ENABLED=1
|
| 18 |
+
export VPOOL_APPLY=both # qry|cand|both
|
| 19 |
+
export VPOOL_LAYER=1 # 进入第 1 层前做 pooling
|
| 20 |
+
export VPOOL_KERNEL=2
|
| 21 |
+
export VPOOL_STRIDE=2
|
| 22 |
+
export VPOOL_METHOD=avg # avg|max|linear|conv
|
| 23 |
+
export VPOOL_PROTECT_CLS=1
|
| 24 |
+
export VPOOL_ONLY_VISION=1
|
| 25 |
+
export VPOOL_MONITOR=1
|
| 26 |
+
|
| 27 |
+
# 显存优化(可选)
|
| 28 |
+
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
| 29 |
+
|
| 30 |
+
# 【AOP 配置】Token Pruning
|
| 31 |
+
export AOP_ENABLED=1
|
| 32 |
+
export AOP_APPLY=qry # 只剪 query;要剪 cand 可以设 both
|
| 33 |
+
export AOP_LAYER=12
|
| 34 |
+
export AOP_MODE=ratio
|
| 35 |
+
export AOP_SELECTION=attention # 使用注意力作为重要性分数
|
| 36 |
+
export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
|
| 37 |
+
|
| 38 |
+
# 开启图像 / 文本剪枝
|
| 39 |
+
export AOP_PRUNE_TEXT=1
|
| 40 |
+
export AOP_PRUNE_VISION=0
|
| 41 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 42 |
+
export AOP_KEEP_RATIO_VISION=0.5
|
| 43 |
+
export AOP_MIN_KEEP_TEXT=16
|
| 44 |
+
export AOP_MIN_KEEP_VISION=8
|
| 45 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 46 |
+
export AOP_PROTECT_SPECIAL=1
|
| 47 |
+
export AOP_RANDOM_SEED=42
|
| 48 |
+
export AOP_MONITOR=1
|
| 49 |
+
|
| 50 |
+
# 【EE 配置】Early Exit via Classifier
|
| 51 |
+
export EE_ENABLED=1
|
| 52 |
+
export EE_LAYER=12
|
| 53 |
+
export EE_METHOD=classifier
|
| 54 |
+
export EE_DEBUG_MODE=1
|
| 55 |
+
# 阈值根据 V5 分析结果填入,或者保留默认值
|
| 56 |
+
export EE_THRESHOLD=0
|
| 57 |
+
export EE_TOPK=200
|
| 58 |
+
export EE_PROFILE=1
|
| 59 |
+
export EE_TOPK_EMB=10
|
| 60 |
+
|
| 61 |
+
# 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
|
| 62 |
+
export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800"
|
| 63 |
+
|
| 64 |
+
MODALITIES=("image_retrival")
|
| 65 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 66 |
+
# 【关键】修改输出目录
|
| 67 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800_0"
|
| 68 |
+
|
| 69 |
+
MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_h100/checkpoint-5000"
|
| 70 |
+
MODEL_NAME="qwen2_5_vl"
|
| 71 |
+
|
| 72 |
+
echo "================================================="
|
| 73 |
+
echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
|
| 74 |
+
echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 75 |
+
echo "================================================="
|
| 76 |
+
|
| 77 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 78 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 79 |
+
OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
|
| 80 |
+
|
| 81 |
+
mkdir -p "$OUTPUT_PATH"
|
| 82 |
+
|
| 83 |
+
# 【关键】调用 V5 脚本
|
| 84 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_V5_new.py \
|
| 85 |
+
--pooling eos \
|
| 86 |
+
--normalize true \
|
| 87 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 88 |
+
--model_backbone \"$MODEL_NAME\" \
|
| 89 |
+
--model_name \"$MODEL_CHECKPOINT\" \
|
| 90 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 91 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 92 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 93 |
+
|
| 94 |
+
echo " - Executing command..."
|
| 95 |
+
eval "$cmd"
|
| 96 |
+
echo " - Done."
|
| 97 |
+
done
|
| 98 |
+
|
| 99 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_multilayer_AOP_attn_pooling.sh
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
# ==============================================================================
|
| 10 |
+
# Configuration
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 13 |
+
BATCH_SIZE=64
|
| 14 |
+
MODALITIES=("image_retrival")
|
| 15 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 16 |
+
# OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
|
| 17 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_AOP/t_0.5_8_i_0.5_16_both_l12_bsz64" #_qry_cand_diff_ratio
|
| 18 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/checkpoint-20" #/t_0.5_8_i_0.5_16_both_l12_bsz64 #_qry_cand_diff_ratio
|
| 19 |
+
|
| 20 |
+
# ==== AOP + Vision Token Pooling(启用)====
|
| 21 |
+
export VPOOL_ENABLED=1
|
| 22 |
+
export VPOOL_APPLY=both
|
| 23 |
+
export VPOOL_LAYER=1
|
| 24 |
+
export VPOOL_KERNEL=2
|
| 25 |
+
export VPOOL_STRIDE=2
|
| 26 |
+
|
| 27 |
+
# === 改这里:启用 attention pooling ===
|
| 28 |
+
export VPOOL_METHOD=attn # attn | avg | max | ...
|
| 29 |
+
export VPOOL_ATTN_TAU=1.0 # NEW: attention pooling temperature
|
| 30 |
+
|
| 31 |
+
export VPOOL_PROTECT_CLS=1
|
| 32 |
+
export VPOOL_ONLY_VISION=1
|
| 33 |
+
export VPOOL_MONITOR=1
|
| 34 |
+
|
| 35 |
+
export AOP_ENABLED=1
|
| 36 |
+
export AOP_APPLY=both # qry|cand|both
|
| 37 |
+
export AOP_LAYER=13
|
| 38 |
+
export AOP_MODE=ratio
|
| 39 |
+
export AOP_SELECTION=attention
|
| 40 |
+
export AOP_ATTENTION_AGG=mean
|
| 41 |
+
|
| 42 |
+
export AOP_PRUNE_TEXT=1
|
| 43 |
+
export AOP_PRUNE_VISION=0
|
| 44 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 45 |
+
export AOP_KEEP_RATIO_VISION=0.5
|
| 46 |
+
export AOP_MIN_KEEP_TEXT=16
|
| 47 |
+
export AOP_MIN_KEEP_VISION=8
|
| 48 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 49 |
+
export AOP_PROTECT_SPECIAL=1
|
| 50 |
+
export AOP_RANDOM_SEED=42
|
| 51 |
+
export AOP_MONITOR=1
|
| 52 |
+
|
| 53 |
+
export EE_SKIP_LM_HEAD=1
|
| 54 |
+
|
| 55 |
+
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
| 56 |
+
|
| 57 |
+
# ==> Define models and their base output paths here
|
| 58 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 59 |
+
declare -a MODEL_SPECS
|
| 60 |
+
# MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 61 |
+
# /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
|
| 62 |
+
MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/exps/checkpoint-20;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 63 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
|
| 64 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
|
| 65 |
+
|
| 66 |
+
# ==============================================================================
|
| 67 |
+
# Main Execution Loop
|
| 68 |
+
# ==============================================================================
|
| 69 |
+
# Loop through each model specification
|
| 70 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 71 |
+
# Parse the model name and base output path from the spec string
|
| 72 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 73 |
+
|
| 74 |
+
echo "================================================="
|
| 75 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 76 |
+
echo "================================================="
|
| 77 |
+
|
| 78 |
+
# Loop through each modality for the current model
|
| 79 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 80 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 81 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 82 |
+
|
| 83 |
+
echo "-------------------------------------------------"
|
| 84 |
+
echo " - Modality: $MODALITY"
|
| 85 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 86 |
+
|
| 87 |
+
# Ensure the output directory exists
|
| 88 |
+
mkdir -p "$OUTPUT_PATH"
|
| 89 |
+
|
| 90 |
+
cmd="LM_LAYERS='last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_multilayer_AOP_attn_pooling.py \
|
| 91 |
+
--pooling eos \
|
| 92 |
+
--normalize true \
|
| 93 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 94 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 95 |
+
--model_name \"$MODEL_NAME\" \
|
| 96 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 97 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 98 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 99 |
+
|
| 100 |
+
echo " - Executing command..."
|
| 101 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 102 |
+
eval "$cmd"
|
| 103 |
+
echo " - Done."
|
| 104 |
+
echo "-------------------------------------------------"
|
| 105 |
+
done
|
| 106 |
+
done
|
| 107 |
+
|
| 108 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_multilayer_AOP_new.sh
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "==> Environment"
|
| 3 |
+
echo "conda location: $(which conda)"
|
| 4 |
+
echo "Python location: $(which python)"
|
| 5 |
+
echo "Python version: $(python --version)"
|
| 6 |
+
echo ""
|
| 7 |
+
|
| 8 |
+
cd VLM2Vec/ || exit
|
| 9 |
+
# ==============================================================================
|
| 10 |
+
# Configuration
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 13 |
+
BATCH_SIZE=64
|
| 14 |
+
MODALITIES=("image_retrival")
|
| 15 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 16 |
+
# OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
|
| 17 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_AOP/t_0.5_8_i_0.5_16_both_l12_bsz64" #_qry_cand_diff_ratio
|
| 18 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node/checkpoint-5000/aop_t_0.5_8_both_l6_bsz64" #_qry_cand_diff_ratio
|
| 19 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/result" #_qry_cand_diff_ratio
|
| 20 |
+
|
| 21 |
+
# ==== AOP + Vision Token Pooling(启用)====
|
| 22 |
+
export VPOOL_ENABLED=1
|
| 23 |
+
export VPOOL_APPLY=both # qry|cand|both
|
| 24 |
+
export VPOOL_LAYER=1 # 进入第 1 层前做 pooling
|
| 25 |
+
export VPOOL_KERNEL=2 # 2x2 pooling
|
| 26 |
+
export VPOOL_STRIDE=2
|
| 27 |
+
export VPOOL_METHOD=avg # avg|max|linear|conv
|
| 28 |
+
export VPOOL_PROTECT_CLS=1
|
| 29 |
+
export VPOOL_ONLY_VISION=1
|
| 30 |
+
export VPOOL_MONITOR=0 # 打印 pooling 前后长度
|
| 31 |
+
|
| 32 |
+
export AOP_ENABLED=1
|
| 33 |
+
export AOP_APPLY=both # qry|cand|both
|
| 34 |
+
export AOP_LAYER=10
|
| 35 |
+
export AOP_MODE=ratio
|
| 36 |
+
export AOP_SELECTION=aop #attention
|
| 37 |
+
export AOP_ATTENTION_AGG=mean
|
| 38 |
+
|
| 39 |
+
export AOP_PRUNE_TEXT=1
|
| 40 |
+
export AOP_PRUNE_VISION=0
|
| 41 |
+
export AOP_KEEP_RATIO_TEXT=0.5
|
| 42 |
+
export AOP_KEEP_RATIO_VISION=0.5
|
| 43 |
+
export AOP_MIN_KEEP_TEXT=8
|
| 44 |
+
export AOP_MIN_KEEP_VISION=8
|
| 45 |
+
export AOP_PROTECT_TEXT_LAST=8
|
| 46 |
+
export AOP_PROTECT_SPECIAL=1
|
| 47 |
+
export AOP_RANDOM_SEED=42
|
| 48 |
+
export AOP_MONITOR=0
|
| 49 |
+
|
| 50 |
+
export EE_SKIP_LM_HEAD=1
|
| 51 |
+
|
| 52 |
+
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
| 53 |
+
|
| 54 |
+
# ==> Define models and their base output paths here
|
| 55 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 56 |
+
declare -a MODEL_SPECS
|
| 57 |
+
# MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 58 |
+
# /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
|
| 59 |
+
MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 60 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/checkpoint-900;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 61 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
|
| 62 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
|
| 63 |
+
|
| 64 |
+
# ==============================================================================
|
| 65 |
+
# Main Execution Loop
|
| 66 |
+
# ==============================================================================
|
| 67 |
+
# Loop through each model specification
|
| 68 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 69 |
+
# Parse the model name and base output path from the spec string
|
| 70 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 71 |
+
|
| 72 |
+
echo "================================================="
|
| 73 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 74 |
+
echo "================================================="
|
| 75 |
+
|
| 76 |
+
# Loop through each modality for the current model
|
| 77 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 78 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 79 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 80 |
+
|
| 81 |
+
echo "-------------------------------------------------"
|
| 82 |
+
echo " - Modality: $MODALITY"
|
| 83 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 84 |
+
|
| 85 |
+
# Ensure the output directory exists
|
| 86 |
+
mkdir -p "$OUTPUT_PATH"
|
| 87 |
+
|
| 88 |
+
cmd="LM_LAYERS='12,last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_multilayer_AOP_new.py \
|
| 89 |
+
--pooling eos \
|
| 90 |
+
--normalize true \
|
| 91 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 92 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 93 |
+
--model_name \"$MODEL_NAME\" \
|
| 94 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 95 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 96 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 97 |
+
|
| 98 |
+
echo " - Executing command..."
|
| 99 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 100 |
+
eval "$cmd"
|
| 101 |
+
echo " - Done."
|
| 102 |
+
echo "-------------------------------------------------"
|
| 103 |
+
done
|
| 104 |
+
done
|
| 105 |
+
|
| 106 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_1gpu_output_attn.sh
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "==> Environment"
|
| 4 |
+
echo "conda location: $(which conda)"
|
| 5 |
+
echo "Python location: $(which python)"
|
| 6 |
+
echo "Python version: $(python --version)"
|
| 7 |
+
echo ""
|
| 8 |
+
|
| 9 |
+
cd VLM2Vec/ || exit
|
| 10 |
+
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
# Configuration
|
| 13 |
+
# ==============================================================================
|
| 14 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 15 |
+
BATCH_SIZE=64
|
| 16 |
+
MODALITIES=("image_retrival")
|
| 17 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 18 |
+
# OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
|
| 19 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2vl_2B.add_mlp_try1/checkpoint-500" #_qry_cand_diff_ratio
|
| 20 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec-output-attn/VLM2Vec-V2.0" #_qry_cand_diff_ratio
|
| 21 |
+
# OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/try_add_mlp_try/stage1" #_qry_cand_diff_ratio
|
| 22 |
+
|
| 23 |
+
# ==> Define models and their base output paths here
|
| 24 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 25 |
+
declare -a MODEL_SPECS
|
| 26 |
+
# MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 27 |
+
# /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
|
| 28 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
|
| 29 |
+
MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 30 |
+
# MODEL_SPECS+=( "Qwen/Qwen2.5-VL-3B-Instruct;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2.5VL-3B" )
|
| 31 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.add_mlp_try1/checkpoint-500;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2VL-2B" )
|
| 32 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.add_mlp_8000_16000/checkpoint-7200;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2VL-2B" )
|
| 33 |
+
# MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.add_mlp/stage1;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
|
| 34 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
|
| 35 |
+
# MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
|
| 36 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret;lamra;$OUTPUT_BASEDIR/LamRA-Ret" )lamra_qwen25
|
| 37 |
+
# MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
|
| 38 |
+
# MODEL_SPECS+=( "vidore/colpali-v1.3;colpali;$OUTPUT_BASEDIR/colpali-v1.3" )
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# ==============================================================================
|
| 42 |
+
# Main Execution Loop
|
| 43 |
+
# ==============================================================================
|
| 44 |
+
# Loop through each model specification
|
| 45 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 46 |
+
# Parse the model name and base output path from the spec string
|
| 47 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 48 |
+
|
| 49 |
+
echo "================================================="
|
| 50 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 51 |
+
echo "================================================="
|
| 52 |
+
|
| 53 |
+
# Loop through each modality for the current model
|
| 54 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 55 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 56 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 57 |
+
|
| 58 |
+
echo "-------------------------------------------------"
|
| 59 |
+
echo " - Modality: $MODALITY"
|
| 60 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 61 |
+
|
| 62 |
+
# Ensure the output directory exists
|
| 63 |
+
mkdir -p "$OUTPUT_PATH"
|
| 64 |
+
|
| 65 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time.py \
|
| 66 |
+
--pooling eos \
|
| 67 |
+
--normalize true \
|
| 68 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 69 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 70 |
+
--model_name \"$MODEL_NAME\" \
|
| 71 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 72 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 73 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 74 |
+
|
| 75 |
+
echo " - Executing command..."
|
| 76 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 77 |
+
eval "$cmd"
|
| 78 |
+
echo " - Done."
|
| 79 |
+
echo "-------------------------------------------------"
|
| 80 |
+
done
|
| 81 |
+
done
|
| 82 |
+
|
| 83 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/eval_vlm2vecv1_8gpu.sh
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "==> Environment"
|
| 4 |
+
echo "conda location: $(which conda)"
|
| 5 |
+
echo "Python location: $(which python)"
|
| 6 |
+
echo "Python version: $(python --version)"
|
| 7 |
+
echo ""
|
| 8 |
+
|
| 9 |
+
cd projects/VLM2Vec/ || exit
|
| 10 |
+
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
# Configuration
|
| 13 |
+
# ==============================================================================
|
| 14 |
+
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
| 15 |
+
BATCH_SIZE=32
|
| 16 |
+
MODALITIES=("image" "video" "visdoc")
|
| 17 |
+
DATA_BASEDIR="~/data/vlm2vec_eval"
|
| 18 |
+
OUTPUT_BASEDIR="~/exps/vlm2vec/"
|
| 19 |
+
|
| 20 |
+
# ==> Define models and their base output paths here
|
| 21 |
+
# Format: "MODEL_NAME;BASE_OUTPUT_PATH"
|
| 22 |
+
declare -a MODEL_SPECS
|
| 23 |
+
MODEL_SPECS+=( "TIGER-Lab/VLM2Vec-Qwen2VL-2B;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V1-Qwen2VL-2B" )
|
| 24 |
+
MODEL_SPECS+=( "TIGER-Lab/VLM2Vec-Qwen2VL-7B;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V1-Qwen2VL-7B" )
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ==============================================================================
|
| 28 |
+
# Main Execution Loop
|
| 29 |
+
# ==============================================================================
|
| 30 |
+
# Loop through each model specification
|
| 31 |
+
for spec in "${MODEL_SPECS[@]}"; do
|
| 32 |
+
# Parse the model name and base output path from the spec string
|
| 33 |
+
IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
|
| 34 |
+
|
| 35 |
+
echo "================================================="
|
| 36 |
+
echo "🚀 Processing Model: $MODEL_NAME"
|
| 37 |
+
echo "================================================="
|
| 38 |
+
|
| 39 |
+
# Loop through each modality for the current model
|
| 40 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 41 |
+
DATA_CONFIG_PATH="experiments/release/eval/$MODALITY.yaml"
|
| 42 |
+
OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
|
| 43 |
+
|
| 44 |
+
echo "-------------------------------------------------"
|
| 45 |
+
echo " - Modality: $MODALITY"
|
| 46 |
+
echo " - Output Path: $OUTPUT_PATH"
|
| 47 |
+
|
| 48 |
+
# Ensure the output directory exists
|
| 49 |
+
mkdir -p "$OUTPUT_PATH"
|
| 50 |
+
|
| 51 |
+
cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES torchrun --nproc_per_node=8 --master_port=2233 --max_restarts=0 eval.py \
|
| 52 |
+
--pooling eos \
|
| 53 |
+
--normalize true \
|
| 54 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 55 |
+
--model_backbone \"$MODEL_BACKBONE\" \
|
| 56 |
+
--model_name \"$MODEL_NAME\" \
|
| 57 |
+
--resize_use_processor false \
|
| 58 |
+
--image_resolution high \
|
| 59 |
+
--dataset_config \"$DATA_CONFIG_PATH\" \
|
| 60 |
+
--encode_output_path \"$OUTPUT_PATH\" \
|
| 61 |
+
--data_basedir \"$DATA_BASEDIR\""
|
| 62 |
+
|
| 63 |
+
echo " - Executing command..."
|
| 64 |
+
# echo "$cmd" # Uncomment for debugging the exact command
|
| 65 |
+
eval "$cmd"
|
| 66 |
+
echo " - Done."
|
| 67 |
+
echo "-------------------------------------------------"
|
| 68 |
+
done
|
| 69 |
+
done
|
| 70 |
+
|
| 71 |
+
echo "✅ All jobs completed."
|
experiments/public/eval/image_retrival.yaml
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# RET i -> i
|
| 2 |
+
CIRR:
|
| 3 |
+
dataset_parser: image_i2i_vg
|
| 4 |
+
dataset_name: CIRR
|
| 5 |
+
dataset_split: test
|
| 6 |
+
image_root: image-tasks/MMEB
|
| 7 |
+
eval_type: local
|
| 8 |
+
NIGHTS:
|
| 9 |
+
dataset_parser: image_i2i_vg
|
| 10 |
+
dataset_name: NIGHTS
|
| 11 |
+
dataset_split: test
|
| 12 |
+
image_root: image-tasks/MMEB
|
| 13 |
+
eval_type: local
|
| 14 |
+
OVEN:
|
| 15 |
+
dataset_parser: image_i2i_vg
|
| 16 |
+
dataset_name: OVEN
|
| 17 |
+
dataset_split: test
|
| 18 |
+
image_root: image-tasks/MMEB
|
| 19 |
+
eval_type: local
|
| 20 |
+
FashionIQ:
|
| 21 |
+
dataset_parser: image_i2i_vg
|
| 22 |
+
dataset_name: FashionIQ
|
| 23 |
+
dataset_split: test
|
| 24 |
+
image_root: image-tasks/MMEB
|
| 25 |
+
eval_type: local
|
| 26 |
+
|
| 27 |
+
# RET i -> t
|
| 28 |
+
MSCOCO_i2t:
|
| 29 |
+
dataset_parser: image_i2t
|
| 30 |
+
dataset_name: MSCOCO_i2t
|
| 31 |
+
dataset_split: test
|
| 32 |
+
image_root: image-tasks/MMEB
|
| 33 |
+
eval_type: local
|
| 34 |
+
VisualNews_i2t:
|
| 35 |
+
dataset_parser: image_i2t
|
| 36 |
+
dataset_name: VisualNews_i2t
|
| 37 |
+
dataset_split: test
|
| 38 |
+
image_root: image-tasks/MMEB
|
| 39 |
+
eval_type: local
|
| 40 |
+
# RET t -> i
|
| 41 |
+
VisDial:
|
| 42 |
+
dataset_parser: image_t2i
|
| 43 |
+
dataset_name: VisDial
|
| 44 |
+
dataset_split: test
|
| 45 |
+
image_root: image-tasks/MMEB
|
| 46 |
+
eval_type: local
|
| 47 |
+
MSCOCO_t2i:
|
| 48 |
+
dataset_parser: image_t2i
|
| 49 |
+
dataset_name: MSCOCO_t2i
|
| 50 |
+
dataset_split: test
|
| 51 |
+
image_root: image-tasks/MMEB
|
| 52 |
+
eval_type: local
|
| 53 |
+
VisualNews_t2i:
|
| 54 |
+
dataset_parser: image_t2i
|
| 55 |
+
dataset_name: VisualNews_t2i
|
| 56 |
+
dataset_split: test
|
| 57 |
+
image_root: image-tasks/MMEB
|
| 58 |
+
eval_type: local
|
| 59 |
+
WebQA:
|
| 60 |
+
dataset_parser: image_t2i
|
| 61 |
+
dataset_name: WebQA
|
| 62 |
+
dataset_split: test
|
| 63 |
+
image_root: image-tasks/MMEB
|
| 64 |
+
eval_type: local
|
| 65 |
+
EDIS:
|
| 66 |
+
dataset_parser: image_t2i
|
| 67 |
+
dataset_name: EDIS
|
| 68 |
+
dataset_split: test
|
| 69 |
+
image_root: image-tasks/MMEB
|
| 70 |
+
eval_type: local
|
| 71 |
+
Wiki-SS-NQ:
|
| 72 |
+
dataset_parser: image_t2i
|
| 73 |
+
dataset_name: Wiki-SS-NQ
|
| 74 |
+
dataset_split: test
|
| 75 |
+
image_root: image-tasks/MMEB
|
| 76 |
+
eval_type: local
|
| 77 |
+
# # RET i -> i
|
| 78 |
+
# CIRR:
|
| 79 |
+
# dataset_parser: image_i2i_vg
|
| 80 |
+
# dataset_name: CIRR
|
| 81 |
+
# dataset_split: test
|
| 82 |
+
# image_root: image-tasks/MMEB
|
| 83 |
+
# eval_type: local
|
| 84 |
+
# NIGHTS:
|
| 85 |
+
# dataset_parser: image_i2i_vg
|
| 86 |
+
# dataset_name: NIGHTS
|
| 87 |
+
# dataset_split: test
|
| 88 |
+
# image_root: image-tasks/MMEB
|
| 89 |
+
# eval_type: local
|
| 90 |
+
# OVEN:
|
| 91 |
+
# dataset_parser: image_i2i_vg
|
| 92 |
+
# dataset_name: OVEN
|
| 93 |
+
# dataset_split: test
|
| 94 |
+
# image_root: image-tasks/MMEB
|
| 95 |
+
# eval_type: local
|
| 96 |
+
# FashionIQ:
|
| 97 |
+
# dataset_parser: image_i2i_vg
|
| 98 |
+
# dataset_name: FashionIQ
|
| 99 |
+
# dataset_split: test
|
| 100 |
+
# image_root: image-tasks/MMEB
|
| 101 |
+
# eval_type: local
|
experiments/public/eval/mieb_any2any_retrieval_lite.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CIRRIT2IRetrieval:
|
| 2 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 3 |
+
# dataset_name: CIRRIT2IRetrieval
|
| 4 |
+
# dataset_split: test
|
| 5 |
+
# eval_type: global
|
| 6 |
+
# CUB200I2IRetrieval:
|
| 7 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 8 |
+
# dataset_name: CUB200I2IRetrieval
|
| 9 |
+
# dataset_split: test
|
| 10 |
+
# eval_type: global
|
| 11 |
+
# Fashion200kI2TRetrieval:
|
| 12 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 13 |
+
# dataset_name: Fashion200kI2TRetrieval
|
| 14 |
+
# dataset_split: test
|
| 15 |
+
# eval_type: global
|
| 16 |
+
# HatefulMemesI2TRetrieval:
|
| 17 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 18 |
+
# dataset_name: HatefulMemesI2TRetrieval
|
| 19 |
+
# dataset_split: test
|
| 20 |
+
# eval_type: global
|
| 21 |
+
InfoSeekIT2TRetrieval:
|
| 22 |
+
dataset_parser: mieb_any2any_retrieval
|
| 23 |
+
dataset_name: InfoSeekIT2TRetrieval
|
| 24 |
+
dataset_split: test
|
| 25 |
+
eval_type: global
|
| 26 |
+
NIGHTSI2IRetrieval:
|
| 27 |
+
dataset_parser: mieb_any2any_retrieval
|
| 28 |
+
dataset_name: NIGHTSI2IRetrieval
|
| 29 |
+
dataset_split: test
|
| 30 |
+
eval_type: global
|
| 31 |
+
OVENIT2TRetrieval:
|
| 32 |
+
dataset_parser: mieb_any2any_retrieval
|
| 33 |
+
dataset_name: OVENIT2TRetrieval
|
| 34 |
+
dataset_split: test
|
| 35 |
+
eval_type: global
|
| 36 |
+
RP2kI2IRetrieval:
|
| 37 |
+
dataset_parser: mieb_any2any_retrieval
|
| 38 |
+
dataset_name: RP2kI2IRetrieval
|
| 39 |
+
dataset_split: test
|
| 40 |
+
eval_type: global
|
| 41 |
+
VisualNewsI2TRetrieval:
|
| 42 |
+
dataset_parser: mieb_any2any_retrieval
|
| 43 |
+
dataset_name: VisualNewsI2TRetrieval
|
| 44 |
+
dataset_split: test
|
| 45 |
+
eval_type: global
|
| 46 |
+
VQA2IT2TRetrieval:
|
| 47 |
+
dataset_parser: mieb_any2any_retrieval
|
| 48 |
+
dataset_name: VQA2IT2TRetrieval
|
| 49 |
+
dataset_split: test
|
| 50 |
+
eval_type: global
|
| 51 |
+
WebQAT2ITRetrieval:
|
| 52 |
+
dataset_parser: mieb_any2any_retrieval
|
| 53 |
+
dataset_name: WebQAT2ITRetrieval
|
| 54 |
+
dataset_split: test
|
| 55 |
+
eval_type: global
|
experiments/public/eval/mieb_any2any_retrieval_lite2.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CIRRIT2IRetrieval:
|
| 2 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 3 |
+
# dataset_name: CIRRIT2IRetrieval
|
| 4 |
+
# dataset_split: test
|
| 5 |
+
# eval_type: global
|
| 6 |
+
# CUB200I2IRetrieval:
|
| 7 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 8 |
+
# dataset_name: CUB200I2IRetrieval
|
| 9 |
+
# dataset_split: test
|
| 10 |
+
# eval_type: global
|
| 11 |
+
# Fashion200kI2TRetrieval:
|
| 12 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 13 |
+
# dataset_name: Fashion200kI2TRetrieval
|
| 14 |
+
# dataset_split: test
|
| 15 |
+
# eval_type: global
|
| 16 |
+
# HatefulMemesI2TRetrieval:
|
| 17 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 18 |
+
# dataset_name: HatefulMemesI2TRetrieval
|
| 19 |
+
# dataset_split: test
|
| 20 |
+
# eval_type: global
|
| 21 |
+
# InfoSeekIT2TRetrieval:
|
| 22 |
+
# dataset_parser: mieb_any2any_retrieval
|
| 23 |
+
# dataset_name: InfoSeekIT2TRetrieval
|
| 24 |
+
# dataset_split: test
|
| 25 |
+
# eval_type: global
|
| 26 |
+
NIGHTSI2IRetrieval:
|
| 27 |
+
dataset_parser: mieb_any2any_retrieval
|
| 28 |
+
dataset_name: NIGHTSI2IRetrieval
|
| 29 |
+
dataset_split: test
|
| 30 |
+
eval_type: global
|
| 31 |
+
OVENIT2TRetrieval:
|
| 32 |
+
dataset_parser: mieb_any2any_retrieval
|
| 33 |
+
dataset_name: OVENIT2TRetrieval
|
| 34 |
+
dataset_split: test
|
| 35 |
+
eval_type: global
|
| 36 |
+
RP2kI2IRetrieval:
|
| 37 |
+
dataset_parser: mieb_any2any_retrieval
|
| 38 |
+
dataset_name: RP2kI2IRetrieval
|
| 39 |
+
dataset_split: test
|
| 40 |
+
eval_type: global
|
| 41 |
+
VisualNewsI2TRetrieval:
|
| 42 |
+
dataset_parser: mieb_any2any_retrieval
|
| 43 |
+
dataset_name: VisualNewsI2TRetrieval
|
| 44 |
+
dataset_split: test
|
| 45 |
+
eval_type: global
|
| 46 |
+
VQA2IT2TRetrieval:
|
| 47 |
+
dataset_parser: mieb_any2any_retrieval
|
| 48 |
+
dataset_name: VQA2IT2TRetrieval
|
| 49 |
+
dataset_split: test
|
| 50 |
+
eval_type: global
|
| 51 |
+
WebQAT2ITRetrieval:
|
| 52 |
+
dataset_parser: mieb_any2any_retrieval
|
| 53 |
+
dataset_name: WebQAT2ITRetrieval
|
| 54 |
+
dataset_split: test
|
| 55 |
+
eval_type: global
|
experiments/public/eval/run_batch_benchmark.sh
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# ======================= 配置区域 =======================
|
| 4 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 5 |
+
|
| 6 |
+
# 模型路径
|
| 7 |
+
MODEL_PATH="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_h100/checkpoint-5000"
|
| 8 |
+
# 分类器路径
|
| 9 |
+
CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-500"
|
| 10 |
+
|
| 11 |
+
# 结果保存路径
|
| 12 |
+
RESULT_CSV="final_speedup_results.csv"
|
| 13 |
+
RESULT_LOG="final_benchmark.log"
|
| 14 |
+
|
| 15 |
+
# 要跑的数据集列表
|
| 16 |
+
DATASETS=(
|
| 17 |
+
"CIRR"
|
| 18 |
+
"EDIS"
|
| 19 |
+
"FashionIQ"
|
| 20 |
+
"NIGHTS"
|
| 21 |
+
"OVEN"
|
| 22 |
+
"VisDial"
|
| 23 |
+
"MSCOCO_i2t"
|
| 24 |
+
"MSCOCO_t2i"
|
| 25 |
+
"VisualNews_i2t"
|
| 26 |
+
"VisualNews_t2i"
|
| 27 |
+
"WebQA"
|
| 28 |
+
"Wiki-SS-NQ"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Eval Config 模板目录
|
| 32 |
+
CONFIG_DIR="/home/v-menggao/code/VLM2Vec/experiments/public/eval"
|
| 33 |
+
# ========================================================
|
| 34 |
+
|
| 35 |
+
# 初始化结果文件
|
| 36 |
+
echo "Dataset,Baseline_Latency(ms),Ours_Latency(ms),Speedup_Ratio,Exit_Rate" > $RESULT_CSV
|
| 37 |
+
echo "================= Benchmark Started at $(date) =================" > $RESULT_LOG
|
| 38 |
+
|
| 39 |
+
echo "🚀 Starting Batch Benchmark on ${#DATASETS[@]} datasets..."
|
| 40 |
+
echo "📄 Results will be saved to: $RESULT_CSV"
|
| 41 |
+
|
| 42 |
+
# 循环遍历每一个数据集
|
| 43 |
+
for DATASET in "${DATASETS[@]}"; do
|
| 44 |
+
CONFIG_PATH="$CONFIG_DIR/$DATASET.yaml"
|
| 45 |
+
|
| 46 |
+
if [ ! -f "$CONFIG_PATH" ]; then
|
| 47 |
+
echo "⚠️ Config not found for $DATASET, skipping..." | tee -a $RESULT_LOG
|
| 48 |
+
continue
|
| 49 |
+
fi
|
| 50 |
+
|
| 51 |
+
echo ""
|
| 52 |
+
echo "----------------------------------------------------------------"
|
| 53 |
+
echo "📊 Benchmarking Dataset: $DATASET"
|
| 54 |
+
echo "----------------------------------------------------------------"
|
| 55 |
+
|
| 56 |
+
# 1. 跑 Baseline (Full Forward)
|
| 57 |
+
echo " 🐢 Running Baseline..."
|
| 58 |
+
export EE_ENABLED=0
|
| 59 |
+
export AOP_ENABLED=0
|
| 60 |
+
|
| 61 |
+
LOG_BASE=$(CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_benchmark_V5.py \
|
| 62 |
+
--model_name "$MODEL_PATH" \
|
| 63 |
+
--dataset_config "$CONFIG_PATH" \
|
| 64 |
+
--per_device_eval_batch_size 64 \
|
| 65 |
+
--dataloader_num_workers 4 \
|
| 66 |
+
2>>$RESULT_LOG)
|
| 67 |
+
|
| 68 |
+
# 提取 Baseline Latency (使用 grep 和 sed)
|
| 69 |
+
LAT_BASE=$(echo "$LOG_BASE" | grep "\[BENCHMARK_RESULT\]" | sed -n 's/.*Latency=\([0-9.]*\)ms.*/\1/p')
|
| 70 |
+
|
| 71 |
+
# 2. 跑 Ours (Early Exit)
|
| 72 |
+
echo " 🚀 Running Ours (Ratio=0.5)..."
|
| 73 |
+
export EE_ENABLED=1
|
| 74 |
+
export AOP_ENABLED=1
|
| 75 |
+
export EE_LAYER=12
|
| 76 |
+
export EE_CLASSIFIER_PATH="$CLASSIFIER_PATH"
|
| 77 |
+
export EE_THRESHOLD=0.3 # 强制固定 Ratio 以测量理论加速能力
|
| 78 |
+
|
| 79 |
+
LOG_OURS=$(CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_benchmark_V5.py \
|
| 80 |
+
--model_name "$MODEL_PATH" \
|
| 81 |
+
--dataset_config "$CONFIG_PATH" \
|
| 82 |
+
--per_device_eval_batch_size 64 \
|
| 83 |
+
--dataloader_num_workers 4 \
|
| 84 |
+
2>>$RESULT_LOG)
|
| 85 |
+
|
| 86 |
+
# 提取 Ours Latency
|
| 87 |
+
LAT_OURS=$(echo "$LOG_OURS" | grep "\[BENCHMARK_RESULT\]" | sed -n 's/.*Latency=\([0-9.]*\)ms.*/\1/p')
|
| 88 |
+
|
| 89 |
+
# 3. 计算与记录
|
| 90 |
+
if [ -z "$LAT_BASE" ] || [ -z "$LAT_OURS" ]; then
|
| 91 |
+
echo " ❌ Error: Failed to parse latency for $DATASET." | tee -a $RESULT_LOG
|
| 92 |
+
echo "$DATASET,ERROR,ERROR,0,0" >> $RESULT_CSV
|
| 93 |
+
else
|
| 94 |
+
# 使用 python 做浮点数除法计算加速比 (比 shell bc 更稳健)
|
| 95 |
+
SPEEDUP=$(python -c "print(f'{float($LAT_BASE)/float($LAT_OURS):.2f}')")
|
| 96 |
+
|
| 97 |
+
echo " ✅ Result: Base=${LAT_BASE}ms | Ours=${LAT_OURS}ms | Speedup=${SPEEDUP}x"
|
| 98 |
+
|
| 99 |
+
# 写入 CSV
|
| 100 |
+
echo "$DATASET,$LAT_BASE,$LAT_OURS,$SPEEDUP,0.5" >> $RESULT_CSV
|
| 101 |
+
fi
|
| 102 |
+
done
|
| 103 |
+
|
| 104 |
+
echo ""
|
| 105 |
+
echo "================================================================"
|
| 106 |
+
echo "🎉 Batch Benchmark Completed!"
|
| 107 |
+
echo "📄 Final Data: $RESULT_CSV"
|
| 108 |
+
echo "================================================================"
|
| 109 |
+
|
| 110 |
+
# 打印最终表格到终端以便查看
|
| 111 |
+
echo ""
|
| 112 |
+
column -s, -t $RESULT_CSV
|
experiments/public/eval/scan_threshold.sh
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# =============================================================================
|
| 3 |
+
# Threshold Scanning Script for Early Exit Classifier
|
| 4 |
+
# 自动扫描不同threshold下的性能表现
|
| 5 |
+
# =============================================================================
|
| 6 |
+
|
| 7 |
+
echo "==> Environment"
|
| 8 |
+
echo "conda location: $(which conda)"
|
| 9 |
+
echo "Python location: $(which python)"
|
| 10 |
+
echo "Python version: $(python --version)"
|
| 11 |
+
echo ""
|
| 12 |
+
|
| 13 |
+
cd VLM2Vec/ || exit
|
| 14 |
+
|
| 15 |
+
# ==============================================================================
|
| 16 |
+
# Configuration
|
| 17 |
+
# ==============================================================================
|
| 18 |
+
CUDA_VISIBLE_DEVICES="0"
|
| 19 |
+
BATCH_SIZE=32
|
| 20 |
+
|
| 21 |
+
# 【AOP 配置】Token Pruning - 关闭以匹配baseline
|
| 22 |
+
export AOP_ENABLED=0
|
| 23 |
+
|
| 24 |
+
# 【EE 配置】Early Exit via Classifier
|
| 25 |
+
export EE_ENABLED=1
|
| 26 |
+
export EE_LAYER=12
|
| 27 |
+
export EE_METHOD=classifier
|
| 28 |
+
export EE_DEBUG_MODE=0 # 关闭DEBUG模式加速
|
| 29 |
+
export EE_TOPK=200
|
| 30 |
+
|
| 31 |
+
# 分类器路径
|
| 32 |
+
export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/~/experiments/checkpoint-600"
|
| 33 |
+
|
| 34 |
+
# 数据集配置
|
| 35 |
+
MODALITIES=("image_retrival")
|
| 36 |
+
DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
|
| 37 |
+
|
| 38 |
+
# 模型路径
|
| 39 |
+
MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000"
|
| 40 |
+
MODEL_NAME="qwen2_5_vl"
|
| 41 |
+
|
| 42 |
+
# 【关键】Threshold扫描范围
|
| 43 |
+
# 根据你的实验结果,密集扫描0.95-1.0区间
|
| 44 |
+
THRESHOLDS=(0.95 0.96 0.97 0.975 0.98 0.985 0.99 0.995 1.0)
|
| 45 |
+
|
| 46 |
+
# 输出目录基础路径
|
| 47 |
+
OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_Classifier_ThresholdScan"
|
| 48 |
+
|
| 49 |
+
echo "================================================="
|
| 50 |
+
echo "🚀 Threshold Scanning for Early Exit Classifier"
|
| 51 |
+
echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
|
| 52 |
+
echo "🚀 Threshold Range: ${THRESHOLDS[@]}"
|
| 53 |
+
echo "🚀 Total Runs: ${#THRESHOLDS[@]}"
|
| 54 |
+
echo "================================================="
|
| 55 |
+
echo ""
|
| 56 |
+
|
| 57 |
+
# 创建结果汇总文件
|
| 58 |
+
SUMMARY_FILE="${OUTPUT_BASEDIR}/threshold_scan_summary.txt"
|
| 59 |
+
mkdir -p "$OUTPUT_BASEDIR"
|
| 60 |
+
|
| 61 |
+
# 写入表头
|
| 62 |
+
echo "=================================================" > "$SUMMARY_FILE"
|
| 63 |
+
echo "Threshold Scanning Results" >> "$SUMMARY_FILE"
|
| 64 |
+
echo "Date: $(date)" >> "$SUMMARY_FILE"
|
| 65 |
+
echo "Classifier: $EE_CLASSIFIER_PATH" >> "$SUMMARY_FILE"
|
| 66 |
+
echo "=================================================" >> "$SUMMARY_FILE"
|
| 67 |
+
echo "" >> "$SUMMARY_FILE"
|
| 68 |
+
printf "%-12s %-15s %-10s %-10s %-10s %-10s\n" "Threshold" "Dataset" "Hit@1" "Hit@5" "Hit@10" "Exit Rate" >> "$SUMMARY_FILE"
|
| 69 |
+
echo "--------------------------------------------------------------------------------" >> "$SUMMARY_FILE"
|
| 70 |
+
|
| 71 |
+
# 循环扫描每个threshold
|
| 72 |
+
for THRESHOLD in "${THRESHOLDS[@]}"; do
|
| 73 |
+
echo ""
|
| 74 |
+
echo "=========================================="
|
| 75 |
+
echo "Testing Threshold: $THRESHOLD"
|
| 76 |
+
echo "=========================================="
|
| 77 |
+
|
| 78 |
+
# 设置当前threshold
|
| 79 |
+
export EE_THRESHOLD=$THRESHOLD
|
| 80 |
+
|
| 81 |
+
# 设置输出路径(包含threshold信息)
|
| 82 |
+
OUTPUT_PATH="${OUTPUT_BASEDIR}/threshold_${THRESHOLD}"
|
| 83 |
+
|
| 84 |
+
for MODALITY in "${MODALITIES[@]}"; do
|
| 85 |
+
DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
|
| 86 |
+
|
| 87 |
+
# 创建输出目录
|
| 88 |
+
MODALITY_OUTPUT_PATH="${OUTPUT_PATH}/${MODALITY}"
|
| 89 |
+
mkdir -p "$MODALITY_OUTPUT_PATH"
|
| 90 |
+
|
| 91 |
+
echo " - Running $MODALITY with threshold=$THRESHOLD..."
|
| 92 |
+
|
| 93 |
+
# 执行评测
|
| 94 |
+
CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier.py \
|
| 95 |
+
--pooling eos \
|
| 96 |
+
--normalize true \
|
| 97 |
+
--per_device_eval_batch_size $BATCH_SIZE \
|
| 98 |
+
--model_backbone "$MODEL_NAME" \
|
| 99 |
+
--model_name "$MODEL_CHECKPOINT" \
|
| 100 |
+
--dataset_config "$DATA_CONFIG_PATH" \
|
| 101 |
+
--encode_output_path "$MODALITY_OUTPUT_PATH" \
|
| 102 |
+
--data_basedir "$DATA_BASEDIR" 2>&1 | tee "${OUTPUT_PATH}/${MODALITY}_log.txt"
|
| 103 |
+
|
| 104 |
+
# 提取结果(从日志中解析)
|
| 105 |
+
LOG_FILE="${OUTPUT_PATH}/${MODALITY}_log.txt"
|
| 106 |
+
|
| 107 |
+
# 解析每个数据集的结果
|
| 108 |
+
if [ -f "$LOG_FILE" ]; then
|
| 109 |
+
# 使用Python快速解析JSON结果
|
| 110 |
+
python3 << EOF >> "$SUMMARY_FILE"
|
| 111 |
+
import json
|
| 112 |
+
import re
|
| 113 |
+
import sys
|
| 114 |
+
|
| 115 |
+
log_file = "${LOG_FILE}"
|
| 116 |
+
threshold = ${THRESHOLD}
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
with open(log_file, 'r') as f:
|
| 120 |
+
content = f.read()
|
| 121 |
+
|
| 122 |
+
# 提取Early Exit Stats
|
| 123 |
+
exit_match = re.search(r'Early Exit Stats: Exit=(\d+)/(\d+)', content)
|
| 124 |
+
if exit_match:
|
| 125 |
+
exit_count = int(exit_match.group(1))
|
| 126 |
+
total_count = int(exit_match.group(2))
|
| 127 |
+
exit_rate = exit_count / total_count if total_count > 0 else 0.0
|
| 128 |
+
else:
|
| 129 |
+
exit_rate = -1.0
|
| 130 |
+
|
| 131 |
+
# 查找所有数据集的结果JSON文件
|
| 132 |
+
import os
|
| 133 |
+
import glob
|
| 134 |
+
result_files = glob.glob("${OUTPUT_PATH}/${MODALITY}/*_score_earlyexit.json")
|
| 135 |
+
|
| 136 |
+
for result_file in result_files:
|
| 137 |
+
dataset_name = os.path.basename(result_file).replace("_score_earlyexit.json", "")
|
| 138 |
+
with open(result_file, 'r') as rf:
|
| 139 |
+
results = json.load(rf)
|
| 140 |
+
|
| 141 |
+
hit1 = results.get('hit@1', -1)
|
| 142 |
+
hit5 = results.get('hit@5', -1)
|
| 143 |
+
hit10 = results.get('hit@10', -1)
|
| 144 |
+
|
| 145 |
+
print(f"{threshold:<12.3f} {dataset_name:<15s} {hit1:<10.4f} {hit5:<10.4f} {hit10:<10.4f} {exit_rate:<10.2%}")
|
| 146 |
+
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"{threshold:<12.3f} {'ERROR':<15s} {'-':<10s} {'-':<10s} {'-':<10s} {'-':<10s}", file=sys.stderr)
|
| 149 |
+
print(f"Error: {e}", file=sys.stderr)
|
| 150 |
+
EOF
|
| 151 |
+
fi
|
| 152 |
+
done
|
| 153 |
+
|
| 154 |
+
echo " ✓ Threshold $THRESHOLD completed"
|
| 155 |
+
done
|
| 156 |
+
|
| 157 |
+
echo "" >> "$SUMMARY_FILE"
|
| 158 |
+
echo "=================================================" >> "$SUMMARY_FILE"
|
| 159 |
+
echo "Scan completed at $(date)" >> "$SUMMARY_FILE"
|
| 160 |
+
echo "=================================================" >> "$SUMMARY_FILE"
|
| 161 |
+
|
| 162 |
+
echo ""
|
| 163 |
+
echo "=========================================="
|
| 164 |
+
echo "✅ All threshold scanning completed!"
|
| 165 |
+
echo "=========================================="
|
| 166 |
+
echo ""
|
| 167 |
+
echo "📊 Results summary saved to:"
|
| 168 |
+
echo " $SUMMARY_FILE"
|
| 169 |
+
echo ""
|
| 170 |
+
echo "📁 Detailed results in:"
|
| 171 |
+
echo " $OUTPUT_BASEDIR"
|
| 172 |
+
echo ""
|
| 173 |
+
|
| 174 |
+
# 显示汇总结果
|
| 175 |
+
echo "📈 Quick Summary:"
|
| 176 |
+
cat "$SUMMARY_FILE"
|
experiments/public/eval/visdoc_retrival.yaml
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ViDoRe_arxivqa:
|
| 2 |
+
dataset_parser: vidore
|
| 3 |
+
dataset_name: ViDoRe_arxivqa
|
| 4 |
+
image_root: visdoc-tasks/ViDoRe_arxivqa
|
| 5 |
+
eval_type: global
|
| 6 |
+
ViDoRe_docvqa:
|
| 7 |
+
dataset_parser: vidore
|
| 8 |
+
dataset_name: ViDoRe_docvqa
|
| 9 |
+
image_root: visdoc-tasks/ViDoRe_docvqa
|
| 10 |
+
eval_type: global
|
| 11 |
+
ViDoRe_infovqa:
|
| 12 |
+
dataset_parser: vidore
|
| 13 |
+
dataset_name: ViDoRe_infovqa
|
| 14 |
+
image_root: visdoc-tasks/ViDoRe_infovqa
|
| 15 |
+
eval_type: global
|
| 16 |
+
ViDoRe_tabfquad:
|
| 17 |
+
dataset_parser: vidore
|
| 18 |
+
dataset_name: ViDoRe_tabfquad
|
| 19 |
+
image_root: visdoc-tasks/ViDoRe_tabfquad
|
| 20 |
+
eval_type: global
|
| 21 |
+
ViDoRe_tatdqa:
|
| 22 |
+
dataset_parser: vidore
|
| 23 |
+
dataset_name: ViDoRe_tatdqa
|
| 24 |
+
image_root: visdoc-tasks/ViDoRe_tatdqa
|
| 25 |
+
eval_type: global
|
| 26 |
+
ViDoRe_shiftproject:
|
| 27 |
+
dataset_parser: vidore
|
| 28 |
+
dataset_name: ViDoRe_shiftproject
|
| 29 |
+
image_root: visdoc-tasks/ViDoRe_shiftproject
|
| 30 |
+
eval_type: global
|
| 31 |
+
ViDoRe_syntheticDocQA_artificial_intelligence:
|
| 32 |
+
dataset_parser: vidore
|
| 33 |
+
dataset_name: ViDoRe_syntheticDocQA_artificial_intelligence
|
| 34 |
+
image_root: visdoc-tasks/ViDoRe_syntheticDocQA_artificial_intelligence
|
| 35 |
+
eval_type: global
|
| 36 |
+
ViDoRe_syntheticDocQA_energy:
|
| 37 |
+
dataset_parser: vidore
|
| 38 |
+
dataset_name: ViDoRe_syntheticDocQA_energy
|
| 39 |
+
image_root: visdoc-tasks/ViDoRe_syntheticDocQA_energy
|
| 40 |
+
eval_type: global
|
| 41 |
+
ViDoRe_syntheticDocQA_government_reports:
|
| 42 |
+
dataset_parser: vidore
|
| 43 |
+
dataset_name: ViDoRe_syntheticDocQA_government_reports
|
| 44 |
+
image_root: visdoc-tasks/ViDoRe_syntheticDocQA_government_reports
|
| 45 |
+
eval_type: global
|
| 46 |
+
ViDoRe_syntheticDocQA_healthcare_industry:
|
| 47 |
+
dataset_parser: vidore
|
| 48 |
+
dataset_name: ViDoRe_syntheticDocQA_healthcare_industry
|
| 49 |
+
image_root: visdoc-tasks/ViDoRe_syntheticDocQA_healthcare_industry
|
| 50 |
+
eval_type: global
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
#ViDoRe_esg_reports_human_labeled_v2:
|
| 54 |
+
# dataset_parser: vidore
|
| 55 |
+
# dataset_name: ViDoRe_esg_reports_human_labeled_v2
|
| 56 |
+
# image_root: visdoc-tasks/esg_reports_human_labeled_v2
|
| 57 |
+
# eval_type: global
|
| 58 |
+
#ViDoRe_biomedical_lectures_v2:
|
| 59 |
+
# dataset_parser: vidore
|
| 60 |
+
# dataset_name: ViDoRe_biomedical_lectures_v2
|
| 61 |
+
# image_root: visdoc-tasks/biomedical_lectures_v2
|
| 62 |
+
# eval_type: global
|
| 63 |
+
#ViDoRe_biomedical_lectures_v2_multilingual:
|
| 64 |
+
# dataset_parser: vidore
|
| 65 |
+
# dataset_name: ViDoRe_biomedical_lectures_v2_multilingual
|
| 66 |
+
# image_root: visdoc-tasks/biomedical_lectures_v2_multilingual
|
| 67 |
+
# eval_type: global
|
| 68 |
+
#ViDoRe_economics_reports_v2:
|
| 69 |
+
# dataset_parser: vidore
|
| 70 |
+
# dataset_name: ViDoRe_economics_reports_v2
|
| 71 |
+
# image_root: visdoc-tasks/economics_reports_v2
|
| 72 |
+
# eval_type: global
|
| 73 |
+
#ViDoRe_economics_reports_v2_multilingual:
|
| 74 |
+
# dataset_parser: vidore
|
| 75 |
+
# dataset_name: ViDoRe_economics_reports_v2_multilingual
|
| 76 |
+
# image_root: visdoc-tasks/economics_reports_v2_multilingual
|
| 77 |
+
# eval_type: global
|
| 78 |
+
#ViDoRe_esg_reports_v2:
|
| 79 |
+
# dataset_parser: vidore
|
| 80 |
+
# dataset_name: ViDoRe_esg_reports_v2
|
| 81 |
+
# image_root: visdoc-tasks/esg_reports_v2
|
| 82 |
+
# eval_type: global
|
| 83 |
+
#ViDoRe_esg_reports_v2_multilingual:
|
| 84 |
+
# dataset_parser: vidore
|
| 85 |
+
# dataset_name: ViDoRe_esg_reports_v2_multilingual
|
| 86 |
+
# image_root: visdoc-tasks/esg_reports_v2_multilingual
|
| 87 |
+
# eval_type: global
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
VisRAG_ArxivQA:
|
| 91 |
+
dataset_parser: visrag
|
| 92 |
+
dataset_name: VisRAG_ArxivQA
|
| 93 |
+
image_root: visdoc-tasks/VisRAG_ArxivQA
|
| 94 |
+
eval_type: global
|
| 95 |
+
VisRAG_ChartQA:
|
| 96 |
+
dataset_parser: visrag
|
| 97 |
+
dataset_name: VisRAG_ChartQA
|
| 98 |
+
image_root: visdoc-tasks/VisRAG_ChartQA
|
| 99 |
+
eval_type: global
|
| 100 |
+
VisRAG_MP-DocVQA:
|
| 101 |
+
dataset_parser: visrag
|
| 102 |
+
dataset_name: VisRAG_MP-DocVQA
|
| 103 |
+
image_root: visdoc-tasks/VisRAG_MP-DocVQA
|
| 104 |
+
eval_type: global
|
| 105 |
+
VisRAG_SlideVQA:
|
| 106 |
+
dataset_parser: visrag
|
| 107 |
+
dataset_name: VisRAG_SlideVQA
|
| 108 |
+
image_root: visdoc-tasks/VisRAG_SlideVQA
|
| 109 |
+
eval_type: global
|
| 110 |
+
VisRAG_InfoVQA:
|
| 111 |
+
dataset_parser: visrag
|
| 112 |
+
dataset_name: VisRAG_InfoVQA
|
| 113 |
+
image_root: visdoc-tasks/VisRAG_InfoVQA
|
| 114 |
+
eval_type: global
|
| 115 |
+
VisRAG_PlotQA:
|
| 116 |
+
dataset_parser: visrag
|
| 117 |
+
dataset_name: VisRAG_PlotQA
|
| 118 |
+
image_root: visdoc-tasks/VisRAG_PlotQA
|
| 119 |
+
eval_type: global
|
| 120 |
+
|
| 121 |
+
ViDoSeek-page:
|
| 122 |
+
dataset_parser: vidore
|
| 123 |
+
dataset_name: ViDoSeek-page
|
| 124 |
+
image_root: visdoc-tasks/ViDoSeek-page
|
| 125 |
+
eval_type: global
|
| 126 |
+
ViDoSeek-doc:
|
| 127 |
+
dataset_parser: vidore
|
| 128 |
+
dataset_name: ViDoSeek-doc
|
| 129 |
+
image_root: visdoc-tasks/ViDoSeek-doc
|
| 130 |
+
eval_type: global
|
| 131 |
+
|
| 132 |
+
MMLongBench-doc:
|
| 133 |
+
dataset_parser: vidore
|
| 134 |
+
dataset_name: MMLongBench-doc
|
| 135 |
+
image_root: visdoc-tasks/MMLongBench-doc
|
| 136 |
+
eval_type: global
|
| 137 |
+
MMLongBench-page:
|
| 138 |
+
dataset_parser: vidore
|
| 139 |
+
dataset_name: MMLongBench-page
|
| 140 |
+
image_root: visdoc-tasks/MMLongBench-page
|
| 141 |
+
eval_type: global
|
experiments/public/train/train_alltasks.yaml
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ImageNet_1K:
|
| 2 |
+
# dataset_parser: mmeb
|
| 3 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 4 |
+
# subset_name: ImageNet_1K
|
| 5 |
+
# dataset_split: original
|
| 6 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 7 |
+
# num_sample_per_subset: 100000
|
| 8 |
+
# weight: 1
|
| 9 |
+
# N24News:
|
| 10 |
+
# dataset_parser: mmeb
|
| 11 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 12 |
+
# subset_name: N24News
|
| 13 |
+
# dataset_split: original
|
| 14 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 15 |
+
# num_sample_per_subset: 50000
|
| 16 |
+
# weight: 1
|
| 17 |
+
# HatefulMemes:
|
| 18 |
+
# dataset_parser: mmeb
|
| 19 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 20 |
+
# subset_name: HatefulMemes
|
| 21 |
+
# dataset_split: original
|
| 22 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 23 |
+
# num_sample_per_subset: 10000
|
| 24 |
+
# weight: 0.5
|
| 25 |
+
# VOC2007:
|
| 26 |
+
# dataset_parser: mmeb
|
| 27 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 28 |
+
# subset_name: VOC2007
|
| 29 |
+
# dataset_split: original
|
| 30 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 31 |
+
# num_sample_per_subset: 10000
|
| 32 |
+
# weight: 0.5
|
| 33 |
+
# SUN397:
|
| 34 |
+
# dataset_parser: mmeb
|
| 35 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 36 |
+
# subset_name: SUN397
|
| 37 |
+
# dataset_split: original
|
| 38 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 39 |
+
# num_sample_per_subset: 20000
|
| 40 |
+
# weight: 0.5
|
| 41 |
+
# OK-VQA:
|
| 42 |
+
# dataset_parser: mmeb
|
| 43 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 44 |
+
# subset_name: OK-VQA
|
| 45 |
+
# dataset_split: original
|
| 46 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 47 |
+
# num_sample_per_subset: 10000
|
| 48 |
+
# weight: 0.5
|
| 49 |
+
# A-OKVQA:
|
| 50 |
+
# dataset_parser: mmeb
|
| 51 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 52 |
+
# subset_name: A-OKVQA
|
| 53 |
+
# dataset_split: original
|
| 54 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 55 |
+
# num_sample_per_subset: 20000
|
| 56 |
+
# weight: 0.5
|
| 57 |
+
# DocVQA:
|
| 58 |
+
# dataset_parser: mmeb
|
| 59 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 60 |
+
# subset_name: DocVQA
|
| 61 |
+
# dataset_split: original
|
| 62 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 63 |
+
# num_sample_per_subset: 40000
|
| 64 |
+
# weight: 1
|
| 65 |
+
# InfographicsVQA:
|
| 66 |
+
# dataset_parser: mmeb
|
| 67 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 68 |
+
# subset_name: InfographicsVQA
|
| 69 |
+
# dataset_split: original
|
| 70 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 71 |
+
# num_sample_per_subset: 25000
|
| 72 |
+
# weight: 0.5
|
| 73 |
+
# ChartQA:
|
| 74 |
+
# dataset_parser: mmeb
|
| 75 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 76 |
+
# subset_name: ChartQA
|
| 77 |
+
# dataset_split: original
|
| 78 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 79 |
+
# num_sample_per_subset: 28000
|
| 80 |
+
# weight: 0.5
|
| 81 |
+
# Visual7W:
|
| 82 |
+
# dataset_parser: mmeb
|
| 83 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 84 |
+
# subset_name: Visual7W
|
| 85 |
+
# dataset_split: original
|
| 86 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 87 |
+
# num_sample_per_subset: 70000
|
| 88 |
+
# weight: 1
|
| 89 |
+
# VisDial:
|
| 90 |
+
# dataset_parser: mmeb
|
| 91 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 92 |
+
# subset_name: VisDial
|
| 93 |
+
# dataset_split: original
|
| 94 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 95 |
+
# num_sample_per_subset: 130000
|
| 96 |
+
# weight: 1
|
| 97 |
+
# CIRR:
|
| 98 |
+
# dataset_parser: mmeb
|
| 99 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 100 |
+
# subset_name: CIRR
|
| 101 |
+
# dataset_split: original
|
| 102 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 103 |
+
# num_sample_per_subset: 30000
|
| 104 |
+
# weight: 0.5
|
| 105 |
+
# VisualNews_t2i:
|
| 106 |
+
# dataset_parser: mmeb
|
| 107 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 108 |
+
# subset_name: VisualNews_t2i
|
| 109 |
+
# dataset_split: original
|
| 110 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 111 |
+
# num_sample_per_subset: 100000
|
| 112 |
+
# weight: 1
|
| 113 |
+
# VisualNews_i2t:
|
| 114 |
+
# dataset_parser: mmeb
|
| 115 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 116 |
+
# subset_name: VisualNews_i2t
|
| 117 |
+
# dataset_split: original
|
| 118 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 119 |
+
# num_sample_per_subset: 100000
|
| 120 |
+
# weight: 1
|
| 121 |
+
# MSCOCO_t2i:
|
| 122 |
+
# dataset_parser: mmeb
|
| 123 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 124 |
+
# subset_name: MSCOCO_t2i
|
| 125 |
+
# dataset_split: original
|
| 126 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 127 |
+
# num_sample_per_subset: 100000
|
| 128 |
+
# weight: 1
|
| 129 |
+
# MSCOCO_i2t:
|
| 130 |
+
# dataset_parser: mmeb
|
| 131 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 132 |
+
# subset_name: MSCOCO_i2t
|
| 133 |
+
# dataset_split: original
|
| 134 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 135 |
+
# num_sample_per_subset: 120000
|
| 136 |
+
# weight: 1
|
| 137 |
+
# NIGHTS:
|
| 138 |
+
# dataset_parser: mmeb
|
| 139 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 140 |
+
# subset_name: NIGHTS
|
| 141 |
+
# dataset_split: original
|
| 142 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 143 |
+
# num_sample_per_subset: 20000
|
| 144 |
+
# weight: 0.5
|
| 145 |
+
# WebQA:
|
| 146 |
+
# dataset_parser: mmeb
|
| 147 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 148 |
+
# subset_name: WebQA
|
| 149 |
+
# dataset_split: original
|
| 150 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 151 |
+
# num_sample_per_subset: 20000
|
| 152 |
+
# weight: 0.5
|
| 153 |
+
# MSCOCO:
|
| 154 |
+
# dataset_parser: mmeb
|
| 155 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 156 |
+
# subset_name: MSCOCO
|
| 157 |
+
# dataset_split: original
|
| 158 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 159 |
+
# num_sample_per_subset: 100000
|
| 160 |
+
# weight: 1
|
| 161 |
+
|
| 162 |
+
# colpali_train_set:
|
| 163 |
+
# dataset_parser: vidore
|
| 164 |
+
# dataset_name: vidore/colpali_train_set
|
| 165 |
+
# weight: 10
|
| 166 |
+
# visrag-indomain:
|
| 167 |
+
# dataset_parser: visrag
|
| 168 |
+
# dataset_name: openbmb/VisRAG-Ret-Train-In-domain-data
|
| 169 |
+
# global_dataset_name: VisRAG-Indomain-data
|
| 170 |
+
# weight: 12
|
| 171 |
+
|
| 172 |
+
# video_caption_300k:
|
| 173 |
+
# dataset_parser: llavahound_caption
|
| 174 |
+
# dataset_name: video_caption_300k
|
| 175 |
+
# dataset_path: vlm2vec_train/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
|
| 176 |
+
# video_frame_basedir: vlm2vec_train/train_video_and_instruction/train_300k
|
| 177 |
+
# weight: 5
|
| 178 |
+
# num_rows: 300_000
|
| 179 |
+
# num_frames: 8
|
| 180 |
+
# data_mode: caption_retrieval
|
| 181 |
+
# video_caption_300k-video:
|
| 182 |
+
# dataset_parser: llavahound_caption
|
| 183 |
+
# dataset_name: video_caption_300k
|
| 184 |
+
# dataset_path: vlm2vec_train/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
|
| 185 |
+
# video_frame_basedir: vlm2vec_train/train_video_and_instruction/train_300k
|
| 186 |
+
# weight: 5
|
| 187 |
+
# num_rows: 300_000
|
| 188 |
+
# num_frames: 8
|
| 189 |
+
# data_mode: video_retrieval
|
| 190 |
+
# video_qa_240k:
|
| 191 |
+
# dataset_parser: llavahound_qa
|
| 192 |
+
# dataset_name: video_qa_240k
|
| 193 |
+
# dataset_path: vlm2vec_train/train_video_and_instruction/video_instruction/train/sft/video_240k_caption_15k.jsonl
|
| 194 |
+
# video_frame_basedir: vlm2vec_train/train_video_and_instruction/train_300k
|
| 195 |
+
# weight: 5
|
| 196 |
+
# num_rows: 240_000
|
| 197 |
+
# num_frames: 8
|
| 198 |
+
|
| 199 |
+
ImageNet_1K:
|
| 200 |
+
dataset_parser: mmeb
|
| 201 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 202 |
+
subset_name: ImageNet_1K
|
| 203 |
+
dataset_split: original
|
| 204 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 205 |
+
num_sample_per_subset: 100000
|
| 206 |
+
weight: 1
|
| 207 |
+
N24News:
|
| 208 |
+
dataset_parser: mmeb
|
| 209 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 210 |
+
subset_name: N24News
|
| 211 |
+
dataset_split: original
|
| 212 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 213 |
+
num_sample_per_subset: 50000
|
| 214 |
+
weight: 1
|
| 215 |
+
HatefulMemes:
|
| 216 |
+
dataset_parser: mmeb
|
| 217 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 218 |
+
subset_name: HatefulMemes
|
| 219 |
+
dataset_split: original
|
| 220 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 221 |
+
num_sample_per_subset: 10000
|
| 222 |
+
weight: 0.5
|
| 223 |
+
VOC2007:
|
| 224 |
+
dataset_parser: mmeb
|
| 225 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 226 |
+
subset_name: VOC2007
|
| 227 |
+
dataset_split: original
|
| 228 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 229 |
+
num_sample_per_subset: 10000
|
| 230 |
+
weight: 0.5
|
| 231 |
+
SUN397:
|
| 232 |
+
dataset_parser: mmeb
|
| 233 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 234 |
+
subset_name: SUN397
|
| 235 |
+
dataset_split: original
|
| 236 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 237 |
+
num_sample_per_subset: 20000
|
| 238 |
+
weight: 0.5
|
| 239 |
+
OK-VQA:
|
| 240 |
+
dataset_parser: mmeb
|
| 241 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 242 |
+
subset_name: OK-VQA
|
| 243 |
+
dataset_split: original
|
| 244 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 245 |
+
num_sample_per_subset: 10000
|
| 246 |
+
weight: 0.5
|
| 247 |
+
A-OKVQA:
|
| 248 |
+
dataset_parser: mmeb
|
| 249 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 250 |
+
subset_name: A-OKVQA
|
| 251 |
+
dataset_split: original
|
| 252 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 253 |
+
num_sample_per_subset: 20000
|
| 254 |
+
weight: 0.5
|
| 255 |
+
DocVQA:
|
| 256 |
+
dataset_parser: mmeb
|
| 257 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 258 |
+
subset_name: DocVQA
|
| 259 |
+
dataset_split: original
|
| 260 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 261 |
+
num_sample_per_subset: 40000
|
| 262 |
+
weight: 1
|
| 263 |
+
InfographicsVQA:
|
| 264 |
+
dataset_parser: mmeb
|
| 265 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 266 |
+
subset_name: InfographicsVQA
|
| 267 |
+
dataset_split: original
|
| 268 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 269 |
+
num_sample_per_subset: 25000
|
| 270 |
+
weight: 0.5
|
| 271 |
+
ChartQA:
|
| 272 |
+
dataset_parser: mmeb
|
| 273 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 274 |
+
subset_name: ChartQA
|
| 275 |
+
dataset_split: original
|
| 276 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 277 |
+
num_sample_per_subset: 28000
|
| 278 |
+
weight: 0.5
|
| 279 |
+
Visual7W:
|
| 280 |
+
dataset_parser: mmeb
|
| 281 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 282 |
+
subset_name: Visual7W
|
| 283 |
+
dataset_split: original
|
| 284 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 285 |
+
num_sample_per_subset: 70000
|
| 286 |
+
weight: 1
|
| 287 |
+
VisDial:
|
| 288 |
+
dataset_parser: mmeb
|
| 289 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 290 |
+
subset_name: VisDial
|
| 291 |
+
dataset_split: original
|
| 292 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 293 |
+
num_sample_per_subset: 130000
|
| 294 |
+
weight: 1
|
| 295 |
+
CIRR:
|
| 296 |
+
dataset_parser: mmeb
|
| 297 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 298 |
+
subset_name: CIRR
|
| 299 |
+
dataset_split: original
|
| 300 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 301 |
+
num_sample_per_subset: 30000
|
| 302 |
+
weight: 0.5
|
| 303 |
+
VisualNews_t2i:
|
| 304 |
+
dataset_parser: mmeb
|
| 305 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 306 |
+
subset_name: VisualNews_t2i
|
| 307 |
+
dataset_split: original
|
| 308 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 309 |
+
num_sample_per_subset: 100000
|
| 310 |
+
weight: 1
|
| 311 |
+
VisualNews_i2t:
|
| 312 |
+
dataset_parser: mmeb
|
| 313 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 314 |
+
subset_name: VisualNews_i2t
|
| 315 |
+
dataset_split: original
|
| 316 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 317 |
+
num_sample_per_subset: 100000
|
| 318 |
+
weight: 1
|
| 319 |
+
MSCOCO_t2i:
|
| 320 |
+
dataset_parser: mmeb
|
| 321 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 322 |
+
subset_name: MSCOCO_t2i
|
| 323 |
+
dataset_split: original
|
| 324 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 325 |
+
num_sample_per_subset: 100000
|
| 326 |
+
weight: 1
|
| 327 |
+
MSCOCO_i2t:
|
| 328 |
+
dataset_parser: mmeb
|
| 329 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 330 |
+
subset_name: MSCOCO_i2t
|
| 331 |
+
dataset_split: original
|
| 332 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 333 |
+
num_sample_per_subset: 120000
|
| 334 |
+
weight: 1
|
| 335 |
+
NIGHTS:
|
| 336 |
+
dataset_parser: mmeb
|
| 337 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 338 |
+
subset_name: NIGHTS
|
| 339 |
+
dataset_split: original
|
| 340 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 341 |
+
num_sample_per_subset: 20000
|
| 342 |
+
weight: 0.5
|
| 343 |
+
WebQA:
|
| 344 |
+
dataset_parser: mmeb
|
| 345 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 346 |
+
subset_name: WebQA
|
| 347 |
+
dataset_split: original
|
| 348 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 349 |
+
num_sample_per_subset: 20000
|
| 350 |
+
weight: 0.5
|
| 351 |
+
MSCOCO:
|
| 352 |
+
dataset_parser: mmeb
|
| 353 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 354 |
+
subset_name: MSCOCO
|
| 355 |
+
dataset_split: original
|
| 356 |
+
image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
|
| 357 |
+
num_sample_per_subset: 100000
|
| 358 |
+
weight: 1
|
| 359 |
+
|
| 360 |
+
colpali_train_set:
|
| 361 |
+
dataset_parser: vidore
|
| 362 |
+
dataset_name: vidore/colpali_train_set
|
| 363 |
+
weight: 10
|
| 364 |
+
visrag-indomain:
|
| 365 |
+
dataset_parser: visrag
|
| 366 |
+
dataset_name: openbmb/VisRAG-Ret-Train-In-domain-data
|
| 367 |
+
global_dataset_name: VisRAG-Indomain-data
|
| 368 |
+
weight: 12
|
| 369 |
+
|
| 370 |
+
video_caption_300k:
|
| 371 |
+
dataset_parser: llavahound_caption
|
| 372 |
+
dataset_name: video_caption_300k
|
| 373 |
+
dataset_path: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
|
| 374 |
+
video_frame_basedir: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/train_300k_extracted
|
| 375 |
+
weight: 5
|
| 376 |
+
num_rows: 300_000
|
| 377 |
+
num_frames: 8
|
| 378 |
+
data_mode: caption_retrieval
|
| 379 |
+
video_caption_300k-video:
|
| 380 |
+
dataset_parser: llavahound_caption
|
| 381 |
+
dataset_name: video_caption_300k
|
| 382 |
+
dataset_path: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
|
| 383 |
+
video_frame_basedir: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/train_300k_extracted
|
| 384 |
+
weight: 5
|
| 385 |
+
num_rows: 300_000
|
| 386 |
+
num_frames: 8
|
| 387 |
+
data_mode: video_retrieval
|
| 388 |
+
video_qa_240k:
|
| 389 |
+
dataset_parser: llavahound_qa
|
| 390 |
+
dataset_name: video_qa_240k
|
| 391 |
+
dataset_path: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/video_instruction/train/sft/video_240k_caption_15k.jsonl
|
| 392 |
+
video_frame_basedir: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/train_300k_extracted
|
| 393 |
+
weight: 5
|
| 394 |
+
num_rows: 240_000
|
| 395 |
+
num_frames: 8
|
experiments/public/train/train_image.yaml
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ImageNet_1K:
|
| 2 |
+
# dataset_parser: mmeb
|
| 3 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 4 |
+
# subset_name: ImageNet_1K
|
| 5 |
+
# dataset_split: original
|
| 6 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 7 |
+
# num_sample_per_subset: 100000
|
| 8 |
+
# weight: 1
|
| 9 |
+
# N24News:
|
| 10 |
+
# dataset_parser: mmeb
|
| 11 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 12 |
+
# subset_name: N24News
|
| 13 |
+
# dataset_split: original
|
| 14 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 15 |
+
# num_sample_per_subset: 50000
|
| 16 |
+
# weight: 1
|
| 17 |
+
# HatefulMemes:
|
| 18 |
+
# dataset_parser: mmeb
|
| 19 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 20 |
+
# subset_name: HatefulMemes
|
| 21 |
+
# dataset_split: original
|
| 22 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 23 |
+
# num_sample_per_subset: 10000
|
| 24 |
+
# weight: 1
|
| 25 |
+
# VOC2007:
|
| 26 |
+
# dataset_parser: mmeb
|
| 27 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 28 |
+
# subset_name: VOC2007
|
| 29 |
+
# dataset_split: original
|
| 30 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 31 |
+
# num_sample_per_subset: 10000
|
| 32 |
+
# weight: 1
|
| 33 |
+
# SUN397:
|
| 34 |
+
# dataset_parser: mmeb
|
| 35 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 36 |
+
# subset_name: SUN397
|
| 37 |
+
# dataset_split: original
|
| 38 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 39 |
+
# num_sample_per_subset: 20000
|
| 40 |
+
# weight: 1
|
| 41 |
+
# OK-VQA:
|
| 42 |
+
# dataset_parser: mmeb
|
| 43 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 44 |
+
# subset_name: OK-VQA
|
| 45 |
+
# dataset_split: original
|
| 46 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 47 |
+
# num_sample_per_subset: 10000
|
| 48 |
+
# weight: 1
|
| 49 |
+
# A-OKVQA:
|
| 50 |
+
# dataset_parser: mmeb
|
| 51 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 52 |
+
# subset_name: A-OKVQA
|
| 53 |
+
# dataset_split: original
|
| 54 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 55 |
+
# num_sample_per_subset: 20000
|
| 56 |
+
# weight: 1
|
| 57 |
+
# DocVQA:
|
| 58 |
+
# dataset_parser: mmeb
|
| 59 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 60 |
+
# subset_name: DocVQA
|
| 61 |
+
# dataset_split: original
|
| 62 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 63 |
+
# num_sample_per_subset: 40000
|
| 64 |
+
# weight: 1
|
| 65 |
+
# InfographicsVQA:
|
| 66 |
+
# dataset_parser: mmeb
|
| 67 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 68 |
+
# subset_name: InfographicsVQA
|
| 69 |
+
# dataset_split: original
|
| 70 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 71 |
+
# num_sample_per_subset: 25000
|
| 72 |
+
# weight: 1
|
| 73 |
+
# ChartQA:
|
| 74 |
+
# dataset_parser: mmeb
|
| 75 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 76 |
+
# subset_name: ChartQA
|
| 77 |
+
# dataset_split: original
|
| 78 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 79 |
+
# num_sample_per_subset: 28000
|
| 80 |
+
# weight: 1
|
| 81 |
+
# Visual7W:
|
| 82 |
+
# dataset_parser: mmeb
|
| 83 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 84 |
+
# subset_name: Visual7W
|
| 85 |
+
# dataset_split: original
|
| 86 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 87 |
+
# num_sample_per_subset: 70000
|
| 88 |
+
# weight: 1
|
| 89 |
+
VisDial:
|
| 90 |
+
dataset_parser: mmeb
|
| 91 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 92 |
+
subset_name: VisDial
|
| 93 |
+
dataset_split: original
|
| 94 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 95 |
+
num_sample_per_subset: 130000
|
| 96 |
+
weight: 1
|
| 97 |
+
CIRR:
|
| 98 |
+
dataset_parser: mmeb
|
| 99 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 100 |
+
subset_name: CIRR
|
| 101 |
+
dataset_split: original
|
| 102 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 103 |
+
num_sample_per_subset: 30000
|
| 104 |
+
weight: 1
|
| 105 |
+
VisualNews_t2i:
|
| 106 |
+
dataset_parser: mmeb
|
| 107 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 108 |
+
subset_name: VisualNews_t2i
|
| 109 |
+
dataset_split: original
|
| 110 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 111 |
+
num_sample_per_subset: 100000
|
| 112 |
+
weight: 1
|
| 113 |
+
VisualNews_i2t:
|
| 114 |
+
dataset_parser: mmeb
|
| 115 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 116 |
+
subset_name: VisualNews_i2t
|
| 117 |
+
dataset_split: original
|
| 118 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 119 |
+
num_sample_per_subset: 100000
|
| 120 |
+
weight: 1
|
| 121 |
+
MSCOCO_t2i:
|
| 122 |
+
dataset_parser: mmeb
|
| 123 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 124 |
+
subset_name: MSCOCO_t2i
|
| 125 |
+
dataset_split: original
|
| 126 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 127 |
+
num_sample_per_subset: 100000
|
| 128 |
+
weight: 1
|
| 129 |
+
MSCOCO_i2t:
|
| 130 |
+
dataset_parser: mmeb
|
| 131 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 132 |
+
subset_name: MSCOCO_i2t
|
| 133 |
+
dataset_split: original
|
| 134 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 135 |
+
num_sample_per_subset: 120000
|
| 136 |
+
weight: 1
|
| 137 |
+
NIGHTS:
|
| 138 |
+
dataset_parser: mmeb
|
| 139 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 140 |
+
subset_name: NIGHTS
|
| 141 |
+
dataset_split: original
|
| 142 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 143 |
+
num_sample_per_subset: 20000
|
| 144 |
+
weight: 1
|
| 145 |
+
WebQA:
|
| 146 |
+
dataset_parser: mmeb
|
| 147 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 148 |
+
subset_name: WebQA
|
| 149 |
+
dataset_split: original
|
| 150 |
+
image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
|
| 151 |
+
num_sample_per_subset: 20000
|
| 152 |
+
weight: 1
|
| 153 |
+
|
| 154 |
+
# MSCOCO:
|
| 155 |
+
# dataset_parser: mmeb
|
| 156 |
+
# dataset_name: TIGER-Lab/MMEB-train
|
| 157 |
+
# subset_name: MSCOCO
|
| 158 |
+
# dataset_split: original
|
| 159 |
+
# image_dir: vlm2vec_train/MMEB-train/image
|
| 160 |
+
# num_sample_per_subset: 100000
|
| 161 |
+
# weight: 1
|
experiments/public/train/train_image1.yaml
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ImageNet_1K:
|
| 2 |
+
dataset_parser: mmeb
|
| 3 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 4 |
+
subset_name: ImageNet_1K
|
| 5 |
+
dataset_split: original
|
| 6 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 7 |
+
num_sample_per_subset: 100000
|
| 8 |
+
weight: 1
|
| 9 |
+
N24News:
|
| 10 |
+
dataset_parser: mmeb
|
| 11 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 12 |
+
subset_name: N24News
|
| 13 |
+
dataset_split: original
|
| 14 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 15 |
+
num_sample_per_subset: 50000
|
| 16 |
+
weight: 1
|
| 17 |
+
HatefulMemes:
|
| 18 |
+
dataset_parser: mmeb
|
| 19 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 20 |
+
subset_name: HatefulMemes
|
| 21 |
+
dataset_split: original
|
| 22 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 23 |
+
num_sample_per_subset: 10000
|
| 24 |
+
weight: 1
|
| 25 |
+
VOC2007:
|
| 26 |
+
dataset_parser: mmeb
|
| 27 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 28 |
+
subset_name: VOC2007
|
| 29 |
+
dataset_split: original
|
| 30 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 31 |
+
num_sample_per_subset: 10000
|
| 32 |
+
weight: 1
|
| 33 |
+
SUN397:
|
| 34 |
+
dataset_parser: mmeb
|
| 35 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 36 |
+
subset_name: SUN397
|
| 37 |
+
dataset_split: original
|
| 38 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 39 |
+
num_sample_per_subset: 20000
|
| 40 |
+
weight: 1
|
| 41 |
+
OK-VQA:
|
| 42 |
+
dataset_parser: mmeb
|
| 43 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 44 |
+
subset_name: OK-VQA
|
| 45 |
+
dataset_split: original
|
| 46 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 47 |
+
num_sample_per_subset: 10000
|
| 48 |
+
weight: 1
|
| 49 |
+
A-OKVQA:
|
| 50 |
+
dataset_parser: mmeb
|
| 51 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 52 |
+
subset_name: A-OKVQA
|
| 53 |
+
dataset_split: original
|
| 54 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 55 |
+
num_sample_per_subset: 20000
|
| 56 |
+
weight: 1
|
| 57 |
+
DocVQA:
|
| 58 |
+
dataset_parser: mmeb
|
| 59 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 60 |
+
subset_name: DocVQA
|
| 61 |
+
dataset_split: original
|
| 62 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 63 |
+
num_sample_per_subset: 40000
|
| 64 |
+
weight: 1
|
| 65 |
+
InfographicsVQA:
|
| 66 |
+
dataset_parser: mmeb
|
| 67 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 68 |
+
subset_name: InfographicsVQA
|
| 69 |
+
dataset_split: original
|
| 70 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 71 |
+
num_sample_per_subset: 25000
|
| 72 |
+
weight: 1
|
| 73 |
+
ChartQA:
|
| 74 |
+
dataset_parser: mmeb
|
| 75 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 76 |
+
subset_name: ChartQA
|
| 77 |
+
dataset_split: original
|
| 78 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 79 |
+
num_sample_per_subset: 28000
|
| 80 |
+
weight: 1
|
| 81 |
+
Visual7W:
|
| 82 |
+
dataset_parser: mmeb
|
| 83 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 84 |
+
subset_name: Visual7W
|
| 85 |
+
dataset_split: original
|
| 86 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 87 |
+
num_sample_per_subset: 70000
|
| 88 |
+
weight: 1
|
| 89 |
+
VisDial:
|
| 90 |
+
dataset_parser: mmeb
|
| 91 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 92 |
+
subset_name: VisDial
|
| 93 |
+
dataset_split: original
|
| 94 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 95 |
+
num_sample_per_subset: 130000
|
| 96 |
+
weight: 1
|
| 97 |
+
CIRR:
|
| 98 |
+
dataset_parser: mmeb
|
| 99 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 100 |
+
subset_name: CIRR
|
| 101 |
+
dataset_split: original
|
| 102 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 103 |
+
num_sample_per_subset: 30000
|
| 104 |
+
weight: 1
|
| 105 |
+
VisualNews_t2i:
|
| 106 |
+
dataset_parser: mmeb
|
| 107 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 108 |
+
subset_name: VisualNews_t2i
|
| 109 |
+
dataset_split: original
|
| 110 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 111 |
+
num_sample_per_subset: 100000
|
| 112 |
+
weight: 1
|
| 113 |
+
VisualNews_i2t:
|
| 114 |
+
dataset_parser: mmeb
|
| 115 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 116 |
+
subset_name: VisualNews_i2t
|
| 117 |
+
dataset_split: original
|
| 118 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 119 |
+
num_sample_per_subset: 100000
|
| 120 |
+
weight: 1
|
| 121 |
+
MSCOCO_t2i:
|
| 122 |
+
dataset_parser: mmeb
|
| 123 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 124 |
+
subset_name: MSCOCO_t2i
|
| 125 |
+
dataset_split: original
|
| 126 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 127 |
+
num_sample_per_subset: 100000
|
| 128 |
+
weight: 1
|
| 129 |
+
MSCOCO_i2t:
|
| 130 |
+
dataset_parser: mmeb
|
| 131 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 132 |
+
subset_name: MSCOCO_i2t
|
| 133 |
+
dataset_split: original
|
| 134 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 135 |
+
num_sample_per_subset: 120000
|
| 136 |
+
weight: 1
|
| 137 |
+
NIGHTS:
|
| 138 |
+
dataset_parser: mmeb
|
| 139 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 140 |
+
subset_name: NIGHTS
|
| 141 |
+
dataset_split: original
|
| 142 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 143 |
+
num_sample_per_subset: 20000
|
| 144 |
+
weight: 1
|
| 145 |
+
WebQA:
|
| 146 |
+
dataset_parser: mmeb
|
| 147 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 148 |
+
subset_name: WebQA
|
| 149 |
+
dataset_split: original
|
| 150 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 151 |
+
num_sample_per_subset: 20000
|
| 152 |
+
weight: 1
|
| 153 |
+
MSCOCO:
|
| 154 |
+
dataset_parser: mmeb
|
| 155 |
+
dataset_name: TIGER-Lab/MMEB-train
|
| 156 |
+
subset_name: MSCOCO
|
| 157 |
+
dataset_split: original
|
| 158 |
+
image_dir: vlm2vec_train/MMEB-train/image
|
| 159 |
+
num_sample_per_subset: 100000
|
| 160 |
+
weight: 1
|
experiments/public/train/train_v2-gp.sh
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# #!/bin/bash
|
| 2 |
+
# # NOTE: replace ... with actual paths
|
| 3 |
+
# export LD_LIBRARY_PATH=...
|
| 4 |
+
# export PATH=...
|
| 5 |
+
# echo "conda location: $(which conda)"
|
| 6 |
+
# echo "Python location: $(which python)"
|
| 7 |
+
# echo "Python version: $(python --version)"
|
| 8 |
+
|
| 9 |
+
# export HF_DATASETS_CACHE=...
|
| 10 |
+
# export HF_HOME=...
|
| 11 |
+
# export WANDB_DISABLED=false
|
| 12 |
+
# export WANDB_PROJECT=...
|
| 13 |
+
# export WANDB_API_KEY=...
|
| 14 |
+
# export HUGGING_FACE_HUB_TOKEN=...
|
| 15 |
+
# export WANDB_PROJECT=...
|
| 16 |
+
# export WANDB_RUN_GROUP=...
|
| 17 |
+
# export EXP_NAME=Qwen2vl_2B.image+visdoc+video.autoresize.lora16.BS1024.IB64.GCq8p8.NormTemp002.lr5e5.step5kwarm100.8H100
|
| 18 |
+
|
| 19 |
+
# export WANDB_NAME=$EXP_NAME
|
| 20 |
+
# export EXP_DIR=.../$EXP_NAME
|
| 21 |
+
# export WANDB_DIR=$EXP_DIR
|
| 22 |
+
# echo $EXP_DIR
|
| 23 |
+
|
| 24 |
+
# mkdir -p $EXP_DIR/wandb
|
| 25 |
+
# rm -rf $EXP_DIR/wandb/*
|
| 26 |
+
|
| 27 |
+
# cd PATH_TO_VLM2VEC_REPO
|
| 28 |
+
# cmd="CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=2207 --max_restarts=0 train.py --lora --lora_r 16 --model_name Qwen/Qwen2-VL-2B-Instruct --bf16 --pooling eos --normalize True --temperature 0.02 --dataloader_num_workers 8 --dataset_config experiments/release/train/train_image.yaml --run_name $EXP_NAME --output_dir $EXP_DIR --grad_cache True --per_device_train_batch_size 128 --gc_q_chunk_size 8 --gc_p_chunk_size 8 --interleave_batch_size 64 --lr_scheduler_type linear --learning_rate 5e-5 --max_steps 5000 --warmup_steps 100 --save_steps 50 --logging_steps 1 --save_safetensors True --remove_unused_columns False --resume_from auto --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
|
| 29 |
+
|
| 30 |
+
# echo $cmd
|
| 31 |
+
# eval $cmd
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
#!/bin/bash
|
| 35 |
+
|
| 36 |
+
# 1. CUDA 动态库路径
|
| 37 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
| 38 |
+
|
| 39 |
+
# 2. PATH 添加 conda 环境 bin 路径
|
| 40 |
+
export PATH=/home/v-menggao/miniconda3/envs/VLMtoVec/bin:/home/v-menggao/miniconda3/condabin:$PATH
|
| 41 |
+
|
| 42 |
+
# 打印当前环境信息
|
| 43 |
+
echo "conda location: $(which conda)"
|
| 44 |
+
echo "Python location: $(which python)"
|
| 45 |
+
echo "Python version: $(python --version)"
|
| 46 |
+
|
| 47 |
+
# 3. HuggingFace 缓存路径
|
| 48 |
+
export HF_DATASETS_CACHE=/home/v-menggao/.cache/huggingface/datasets
|
| 49 |
+
export HF_HOME=/home/v-menggao/.cache/huggingface
|
| 50 |
+
|
| 51 |
+
# 4. W&B 设置(需要你自己填 project、API key)
|
| 52 |
+
timestamp=$(date +%Y%m%d_%H%M%S)
|
| 53 |
+
export WANDB_DISABLED=false
|
| 54 |
+
export WANDB_PROJECT=vlm2vec_gp_${timestamp}
|
| 55 |
+
export WANDB_API_KEY=4d73ec74bcbb8dfa92520641573bd6ce93ad829a # 从 https://wandb.ai/settings 获取
|
| 56 |
+
export HUGGING_FACE_HUB_TOKEN=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 # 从 https://huggingface.co/settings/tokens 获取
|
| 57 |
+
export WANDB_RUN_GROUP=baseline_test
|
| 58 |
+
|
| 59 |
+
# 5. 实验名字和目录
|
| 60 |
+
export EXP_NAME=Qwen2.5vl_gp_try
|
| 61 |
+
export WANDB_NAME=$EXP_NAME
|
| 62 |
+
export EXP_DIR=/home/v-menggao/code/VLM2Vec/~/experiments_try_gp/$EXP_NAME # /mnt/data 确保是有空间的磁盘目录
|
| 63 |
+
export WANDB_DIR=$EXP_DIR
|
| 64 |
+
echo $EXP_DIR
|
| 65 |
+
|
| 66 |
+
mkdir -p $EXP_DIR/wandb
|
| 67 |
+
rm -rf $EXP_DIR/wandb/*
|
| 68 |
+
|
| 69 |
+
# 6. 切换到代码仓库
|
| 70 |
+
cd /home/v-menggao/code/VLM2Vec
|
| 71 |
+
|
| 72 |
+
# 7. 组装训练命令 # --lora --lora_r 16 \
|
| 73 |
+
cmd="CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=2207 --max_restarts=0 train_gp.py \
|
| 74 |
+
--model_name /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2.5vl_3B.all/checkpoint-1500 \
|
| 75 |
+
--new_modules_dir ashun989/GlimpsePrune_Qwen2.5-VL-3B-Instruct \
|
| 76 |
+
--gp_do_selection False \
|
| 77 |
+
--gp_use_chat_processing True \
|
| 78 |
+
--gp_aux_config /home/v-menggao/code/GlimpsePrune/dataset_configs/gqa_rand.yaml \
|
| 79 |
+
--bf16 \
|
| 80 |
+
--pooling eos \
|
| 81 |
+
--normalize True \
|
| 82 |
+
--temperature 0.02 \
|
| 83 |
+
--dataloader_num_workers 8 \
|
| 84 |
+
--dataset_config /home/v-menggao/code/VLM2Vec/experiments/public/train/train_image.yaml \
|
| 85 |
+
--run_name $EXP_NAME \
|
| 86 |
+
--output_dir $EXP_DIR \
|
| 87 |
+
--grad_cache True \
|
| 88 |
+
--per_device_train_batch_size 32 \
|
| 89 |
+
--gc_q_chunk_size 4 --gc_p_chunk_size 4 \
|
| 90 |
+
--interleave_batch_size 64 \
|
| 91 |
+
--lr_scheduler_type linear \
|
| 92 |
+
--learning_rate 5e-5 \
|
| 93 |
+
--max_steps 1000 --warmup_steps 20 \
|
| 94 |
+
--save_steps 100 --logging_steps 1 \
|
| 95 |
+
--save_safetensors True \
|
| 96 |
+
--remove_unused_columns False \
|
| 97 |
+
--image_encoder_freeze True \
|
| 98 |
+
--loc_weight 1.0 --le_weight 1.0 --loc_dice_weight 1.0 --loc_bce_weight 0.1 \
|
| 99 |
+
--resume_from auto \
|
| 100 |
+
--report_to wandb 2>&1 | tee $EXP_DIR/train.log"
|
| 101 |
+
|
| 102 |
+
echo $cmd
|
| 103 |
+
eval $cmd
|
experiments/public/train/train_v2-qwen2vl-2B_imageonly_add_CRD.sh
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# #!/bin/bash
|
| 2 |
+
# # NOTE: replace ... with actual paths
|
| 3 |
+
# export LD_LIBRARY_PATH=...
|
| 4 |
+
# export PATH=...
|
| 5 |
+
# echo "conda location: $(which conda)"
|
| 6 |
+
# echo "Python location: $(which python)"
|
| 7 |
+
# echo "Python version: $(python --version)"
|
| 8 |
+
|
| 9 |
+
# export HF_DATASETS_CACHE=...
|
| 10 |
+
# export HF_HOME=...
|
| 11 |
+
# export WANDB_DISABLED=false
|
| 12 |
+
# export WANDB_PROJECT=...
|
| 13 |
+
# export WANDB_API_KEY=...
|
| 14 |
+
# export HUGGING_FACE_HUB_TOKEN=...
|
| 15 |
+
# export WANDB_PROJECT=...
|
| 16 |
+
# export WANDB_RUN_GROUP=...
|
| 17 |
+
# export EXP_NAME=Qwen2vl_2B.image+visdoc+video.autoresize.lora16.BS1024.IB64.GCq8p8.NormTemp002.lr5e5.step5kwarm100.8H100
|
| 18 |
+
|
| 19 |
+
# export WANDB_NAME=$EXP_NAME
|
| 20 |
+
# export EXP_DIR=.../$EXP_NAME
|
| 21 |
+
# export WANDB_DIR=$EXP_DIR
|
| 22 |
+
# echo $EXP_DIR
|
| 23 |
+
|
| 24 |
+
# mkdir -p $EXP_DIR/wandb
|
| 25 |
+
# rm -rf $EXP_DIR/wandb/*
|
| 26 |
+
|
| 27 |
+
# cd PATH_TO_VLM2VEC_REPO
|
| 28 |
+
# cmd="CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=2207 --max_restarts=0 train.py --lora --lora_r 16 --model_name Qwen/Qwen2-VL-2B-Instruct --bf16 --pooling eos --normalize True --temperature 0.02 --dataloader_num_workers 8 --dataset_config experiments/release/train/train_image.yaml --run_name $EXP_NAME --output_dir $EXP_DIR --grad_cache True --per_device_train_batch_size 128 --gc_q_chunk_size 8 --gc_p_chunk_size 8 --interleave_batch_size 64 --lr_scheduler_type linear --learning_rate 5e-5 --max_steps 5000 --warmup_steps 100 --save_steps 50 --logging_steps 1 --save_safetensors True --remove_unused_columns False --resume_from auto --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
|
| 29 |
+
|
| 30 |
+
# echo $cmd
|
| 31 |
+
# eval $cmd
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
#!/bin/bash
|
| 35 |
+
|
| 36 |
+
# 获取脚本所在目录(保证无论从哪里运行都能找到代码路径)
|
| 37 |
+
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
| 38 |
+
CODE_DIR=$(realpath "$SCRIPT_DIR") # 代码仓库路径
|
| 39 |
+
HOME_DIR=$HOME # 当前用户 HOME 目录
|
| 40 |
+
EXP_ROOT="$CODE_DIR/experiments" # 实验主目录
|
| 41 |
+
|
| 42 |
+
# 1. CUDA 动态库路径
|
| 43 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
| 44 |
+
|
| 45 |
+
# 2. PATH 添加 conda 环境 bin 路径
|
| 46 |
+
export PATH="$HOME_DIR/miniconda3/envs/VLMtoVec/bin:$HOME_DIR/miniconda3/condabin:$PATH"
|
| 47 |
+
|
| 48 |
+
# 打印当前环境信息
|
| 49 |
+
echo "conda location: $(which conda)"
|
| 50 |
+
echo "Python location: $(which python)"
|
| 51 |
+
echo "Python version: $(python --version)"
|
| 52 |
+
|
| 53 |
+
# 3. HuggingFace 缓存路径
|
| 54 |
+
export HF_DATASETS_CACHE="$HOME_DIR/.cache/huggingface/datasets"
|
| 55 |
+
export HF_HOME="$HOME_DIR/.cache/huggingface"
|
| 56 |
+
|
| 57 |
+
# 4. W&B 设置(需要你自己填 project、API key)
|
| 58 |
+
export WANDB_DISABLED=false
|
| 59 |
+
export WANDB_PROJECT=vlm2vec_layer_prune
|
| 60 |
+
export WANDB_API_KEY=4d73ec74bcbb8dfa92520641573bd6ce93ad829a # 从 https://wandb.ai/settings 获取
|
| 61 |
+
export HUGGING_FACE_HUB_TOKEN=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 # 从 https://huggingface.co/settings/tokens 获取
|
| 62 |
+
export WANDB_RUN_GROUP=baseline_test
|
| 63 |
+
|
| 64 |
+
# 5. 实验名字和目录
|
| 65 |
+
export EXP_NAME=Qwen2vl_2B.add_CRD_try
|
| 66 |
+
export WANDB_NAME=$EXP_NAME
|
| 67 |
+
export EXP_DIR="$EXP_ROOT/$EXP_NAME"
|
| 68 |
+
export WANDB_DIR=$EXP_DIR
|
| 69 |
+
|
| 70 |
+
echo "实验目录: $EXP_DIR"
|
| 71 |
+
mkdir -p "$EXP_DIR/wandb"
|
| 72 |
+
rm -rf "$EXP_DIR/wandb/*"
|
| 73 |
+
|
| 74 |
+
# 6. 组装训练命令
|
| 75 |
+
cmd="CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=2207 --max_restarts=0 train_add_CRD_warmup.py \
|
| 76 |
+
--lora --lora_r 16 \
|
| 77 |
+
--model_name VLM2Vec/VLM2Vec-V2.0 \
|
| 78 |
+
--supervise_layers "20,-1" \
|
| 79 |
+
--supervise_weights "1,0" \
|
| 80 |
+
--use_crd true \
|
| 81 |
+
--crd_weight 0.2 \
|
| 82 |
+
--crd_temperature 0.07 \
|
| 83 |
+
--crd_layers 0,1 \
|
| 84 |
+
--crd_warmup_steps 200 \
|
| 85 |
+
--crd_detach_teacher true \
|
| 86 |
+
--crd_debug_every 0 \
|
| 87 |
+
--bf16 \
|
| 88 |
+
--pooling eos \
|
| 89 |
+
--normalize True \
|
| 90 |
+
--temperature 0.02 \
|
| 91 |
+
--dataloader_num_workers 8 \
|
| 92 |
+
--dataset_config experiments/public/train/train_image.yaml \
|
| 93 |
+
--run_name $EXP_NAME \
|
| 94 |
+
--output_dir $EXP_DIR \
|
| 95 |
+
--grad_cache True \
|
| 96 |
+
--per_device_train_batch_size 128 \
|
| 97 |
+
--gc_q_chunk_size 1 --gc_p_chunk_size 1 \
|
| 98 |
+
--interleave_batch_size 64 \
|
| 99 |
+
--lr_scheduler_type linear \
|
| 100 |
+
--learning_rate 5e-5 \
|
| 101 |
+
--max_steps 500 --warmup_steps 10 \
|
| 102 |
+
--save_steps 100 --logging_steps 1 \
|
| 103 |
+
--save_safetensors True \
|
| 104 |
+
--remove_unused_columns False \
|
| 105 |
+
--resume_from auto \
|
| 106 |
+
--report_to wandb 2>&1 | tee $EXP_DIR/train.log"
|
| 107 |
+
|
| 108 |
+
echo $cmd
|
| 109 |
+
eval $cmd
|
experiments/public/train/train_v2-qwen2vl-2B_imageonly_layer_prune.sh
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# #!/bin/bash
|
| 2 |
+
# # NOTE: replace ... with actual paths
|
| 3 |
+
# export LD_LIBRARY_PATH=...
|
| 4 |
+
# export PATH=...
|
| 5 |
+
# echo "conda location: $(which conda)"
|
| 6 |
+
# echo "Python location: $(which python)"
|
| 7 |
+
# echo "Python version: $(python --version)"
|
| 8 |
+
|
| 9 |
+
# export HF_DATASETS_CACHE=...
|
| 10 |
+
# export HF_HOME=...
|
| 11 |
+
# export WANDB_DISABLED=false
|
| 12 |
+
# export WANDB_PROJECT=...
|
| 13 |
+
# export WANDB_API_KEY=...
|
| 14 |
+
# export HUGGING_FACE_HUB_TOKEN=...
|
| 15 |
+
# export WANDB_PROJECT=...
|
| 16 |
+
# export WANDB_RUN_GROUP=...
|
| 17 |
+
# export EXP_NAME=Qwen2vl_2B.image+visdoc+video.autoresize.lora16.BS1024.IB64.GCq8p8.NormTemp002.lr5e5.step5kwarm100.8H100
|
| 18 |
+
|
| 19 |
+
# export WANDB_NAME=$EXP_NAME
|
| 20 |
+
# export EXP_DIR=.../$EXP_NAME
|
| 21 |
+
# export WANDB_DIR=$EXP_DIR
|
| 22 |
+
# echo $EXP_DIR
|
| 23 |
+
|
| 24 |
+
# mkdir -p $EXP_DIR/wandb
|
| 25 |
+
# rm -rf $EXP_DIR/wandb/*
|
| 26 |
+
|
| 27 |
+
# cd PATH_TO_VLM2VEC_REPO
|
| 28 |
+
# cmd="CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=2207 --max_restarts=0 train.py --lora --lora_r 16 --model_name Qwen/Qwen2-VL-2B-Instruct --bf16 --pooling eos --normalize True --temperature 0.02 --dataloader_num_workers 8 --dataset_config experiments/release/train/train_image.yaml --run_name $EXP_NAME --output_dir $EXP_DIR --grad_cache True --per_device_train_batch_size 128 --gc_q_chunk_size 8 --gc_p_chunk_size 8 --interleave_batch_size 64 --lr_scheduler_type linear --learning_rate 5e-5 --max_steps 5000 --warmup_steps 100 --save_steps 50 --logging_steps 1 --save_safetensors True --remove_unused_columns False --resume_from auto --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
|
| 29 |
+
|
| 30 |
+
# echo $cmd
|
| 31 |
+
# eval $cmd
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
#!/bin/bash
|
| 35 |
+
|
| 36 |
+
# 1. CUDA 动态库路径
|
| 37 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
| 38 |
+
|
| 39 |
+
# 2. PATH 添加 conda 环境 bin 路径
|
| 40 |
+
export PATH=/home/v-menggao/miniconda3/envs/VLMtoVec/bin:/home/v-menggao/miniconda3/condabin:$PATH
|
| 41 |
+
|
| 42 |
+
# 打印当前环境信息
|
| 43 |
+
echo "conda location: $(which conda)"
|
| 44 |
+
echo "Python location: $(which python)"
|
| 45 |
+
echo "Python version: $(python --version)"
|
| 46 |
+
|
| 47 |
+
# 3. HuggingFace 缓存路径
|
| 48 |
+
export HF_DATASETS_CACHE=/home/v-menggao/.cache/huggingface/datasets
|
| 49 |
+
export HF_HOME=/home/v-menggao/.cache/huggingface
|
| 50 |
+
|
| 51 |
+
# 4. W&B 设置(需要你自己填 project、API key)
|
| 52 |
+
export WANDB_DISABLED=false
|
| 53 |
+
export WANDB_PROJECT=vlm2vec_layer_prune
|
| 54 |
+
export WANDB_API_KEY=4d73ec74bcbb8dfa92520641573bd6ce93ad829a # 从 https://wandb.ai/settings 获取
|
| 55 |
+
export HUGGING_FACE_HUB_TOKEN=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 # 从 https://huggingface.co/settings/tokens 获取
|
| 56 |
+
export WANDB_RUN_GROUP=baseline_test
|
| 57 |
+
|
| 58 |
+
# 5. 实验名字和目录
|
| 59 |
+
export EXP_NAME=Qwen2vl_2B.image_qry_16_none+cand_16_none_0.1_try
|
| 60 |
+
export WANDB_NAME=$EXP_NAME
|
| 61 |
+
export EXP_DIR=/home/v-menggao/code/VLM2Vec/~/experiments/$EXP_NAME # /mnt/data 确保是有空间的磁盘目录
|
| 62 |
+
export WANDB_DIR=$EXP_DIR
|
| 63 |
+
echo $EXP_DIR
|
| 64 |
+
|
| 65 |
+
mkdir -p $EXP_DIR/wandb
|
| 66 |
+
rm -rf $EXP_DIR/wandb/*
|
| 67 |
+
|
| 68 |
+
# 6. 切换到代码仓库
|
| 69 |
+
cd /home/v-menggao/code/VLM2Vec
|
| 70 |
+
|
| 71 |
+
# 7. 组装训练命令
|
| 72 |
+
cmd="CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=2207 --max_restarts=0 train_layer_prune.py \
|
| 73 |
+
--lora --lora_r 16 \
|
| 74 |
+
--model_name VLM2Vec/VLM2Vec-V2.0 \
|
| 75 |
+
--dual_layer_idx 16 \
|
| 76 |
+
--dual_alpha 0.1 \
|
| 77 |
+
--bf16 \
|
| 78 |
+
--pooling eos \
|
| 79 |
+
--normalize True \
|
| 80 |
+
--temperature 0.02 \
|
| 81 |
+
--dataloader_num_workers 8 \
|
| 82 |
+
--dataset_config /home/v-menggao/code/VLM2Vec/experiments/public/train/train_image.yaml \
|
| 83 |
+
--run_name $EXP_NAME \
|
| 84 |
+
--output_dir $EXP_DIR \
|
| 85 |
+
--grad_cache True \
|
| 86 |
+
--per_device_train_batch_size 128 \
|
| 87 |
+
--gc_q_chunk_size 8 --gc_p_chunk_size 8 \
|
| 88 |
+
--interleave_batch_size 64 \
|
| 89 |
+
--lr_scheduler_type linear \
|
| 90 |
+
--learning_rate 7e-5 \
|
| 91 |
+
--max_steps 500 --warmup_steps 10 \
|
| 92 |
+
--save_steps 100 --logging_steps 1 \
|
| 93 |
+
--save_safetensors True \
|
| 94 |
+
--remove_unused_columns False \
|
| 95 |
+
--resume_from auto \
|
| 96 |
+
--report_to wandb 2>&1 | tee $EXP_DIR/train.log"
|
| 97 |
+
|
| 98 |
+
echo $cmd
|
| 99 |
+
eval $cmd
|