ZebangCheng commited on
Commit
c75b625
1 Parent(s): 4a87f94
minigpt4/configs/datasets/firstface/featureface.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ feature_face_caption:
3
+ data_type: images
4
+
5
+ build_info:
6
+ # image_path: /home/user/selected_face/first_face/images
7
+ image_path: /home/user/selected_face/first_face/first_frames
8
+
9
+ ann_path: /home/user/selected_face/face_emotion/relative_train_NCEV.txt
10
+ # ann_path: /home/user/selected_face/face_emotion/all_label_NCEV.txt
11
+ # ann_path: /home/user/selected_face/face_emotion/target_label_list_0512_smp.txt
minigpt4/configs/datasets/flickr/caption_to_phrase.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ datasets:
2
+ flickr_CaptionToPhrase:
3
+ data_type: images
4
+ build_info:
5
+ image_path: /path/to/filtered_flikcr/images
6
+ ann_path: /path/to/filtered_flickr/captiontobbox.json
minigpt4/configs/datasets/flickr/default.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ datasets:
2
+ flickr_grounded_caption:
3
+ data_type: images
4
+ build_info:
5
+ image_path: /path/to/filtered_flikcr/images
6
+ ann_path: /path/to/filtered_flikcr/groundedcaption.json
minigpt4/configs/datasets/flickr/object_to_phrase.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ datasets:
2
+ flickr_ObjectToPhrase:
3
+ data_type: images
4
+ build_info:
5
+ image_path: /path/to/filtered_flikcr/images
6
+ ann_path: /path/to/filtered_flikcr/phrasetobbox.json
minigpt4/configs/datasets/gqa/balanced_val.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ datasets:
7
+ gqa:
8
+ # data_dir: ${env.data_dir}/datasets
9
+ data_type: images # [images|videos|features]
10
+
11
+ build_info:
12
+ # Be careful not to append minus sign (-) before split to avoid itemizing
13
+ annotations:
14
+ train:
15
+ url:
16
+ - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/train_balanced_questions.json
17
+ storage:
18
+ - /path/to/gqa/train_balanced_questions.json
19
+
20
+ images:
21
+ storage: /path/to/gqa/images
minigpt4/configs/datasets/laion/defaults.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ datasets:
2
+ laion:
3
+ data_type: images
4
+ build_info:
5
+ storage: /path/to/laion_dataset/{00000..10488}.tar
minigpt4/configs/datasets/llava/conversation.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ datasets:
2
+
3
+ llava_conversation:
4
+ data_type: images
5
+ build_info:
6
+ image_path: /path/to/coco/images
7
+ ann_path: /path/to/llava/conversation_58k.json
minigpt4/configs/datasets/llava/detail.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ datasets:
2
+ llava_detail:
3
+ data_type: images
4
+ build_info:
5
+ image_path: /path/to/coco/images
6
+ ann_path: /path/to/llava/detail_23k.json
minigpt4/configs/datasets/llava/reason.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ datasets:
2
+
3
+ llava_reason:
4
+ data_type: images
5
+ build_info:
6
+ image_path: /path/to/coco/images
7
+ ann_path: /path/to/llava/complex_reasoning_77k.json
minigpt4/configs/datasets/multitask_conversation/default.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ multitask_conversation:
3
+ data_type: images
4
+ build_info:
5
+
6
+ image_path: /path/to/coco/images
7
+ ann_path: /path/to/multitask_conversation/multi_task_conversation.json
minigpt4/configs/datasets/nlp/unnatural_instruction.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ datasets:
2
+ unnatural_instruction:
3
+ data_type: text
4
+ build_info:
5
+ ann_path: /path/to/unnatural_instructions/filtered_unnatural_instruction.json
minigpt4/configs/datasets/ocrvqa/ocrvqa.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ datasets:
2
+ ocrvqa:
3
+ data_type: images
4
+ build_info:
5
+ image_path: /path/to/ocrvqa/images
6
+ ann_path: /path/to/ocrvqa/dataset.json
minigpt4/configs/datasets/okvqa/defaults.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ datasets:
7
+ ok_vqa:
8
+ # data_dir: ${env.data_dir}/datasets
9
+ data_type: images # [images|videos|features]
10
+
11
+ build_info:
12
+ # Be careful not to append minus sign (-) before split to avoid itemizing
13
+ annotations:
14
+ train:
15
+ url:
16
+ # TODO make this order insensitive
17
+ - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json
18
+ storage:
19
+ - /path/to/okvqa/okvqa_train.json
20
+ images:
21
+ storage: /path/to/coco/images
minigpt4/configs/datasets/textcaps/caption.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ textcaps_caption:
3
+ data_type: images
4
+
5
+ build_info:
6
+ image_path: /path/to/textcaps/train_images
7
+ ann_path: /path/to/textcaps/TextCaps_0.1_train.json
8
+
9
+
minigpt4/configs/datasets/vg/ref.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ datasets:
2
+ refvg:
3
+ data_type: images
4
+ build_info:
5
+ data_dir: /path/to/visual_genome
minigpt4/configs/default.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ env:
2
+ # For default users
3
+ # cache_root: "cache"
4
+ # For internal use with persistent storage
5
+ cache_root: "/export/home/.cache/minigpt4"
minigpt4/configs/models/minigpt4_llama2.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ arch: minigpt4
3
+
4
+ # vit encoder
5
+ image_size: 224
6
+ drop_path_rate: 0
7
+ use_grad_checkpoint: False
8
+ vit_precision: "fp16"
9
+ freeze_vit: True
10
+ has_qformer: False
11
+
12
+ # generation configs
13
+ prompt: ""
14
+
15
+ llama_model: "please set this value to the path of llama2-chat-7b"
16
+
17
+ preprocess:
18
+ vis_processor:
19
+ train:
20
+ name: "blip2_image_train"
21
+ image_size: 224
22
+ eval:
23
+ name: "blip2_image_eval"
24
+ image_size: 224
25
+ text_processor:
26
+ train:
27
+ name: "blip_caption"
28
+ eval:
29
+ name: "blip_caption"
minigpt4/configs/models/minigpt4_vicuna0.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ arch: minigpt4
3
+
4
+ # vit encoder
5
+ image_size: 224
6
+ drop_path_rate: 0
7
+ use_grad_checkpoint: False
8
+ vit_precision: "fp16"
9
+ freeze_vit: True
10
+ freeze_qformer: True
11
+
12
+ # Q-Former
13
+ num_query_token: 32
14
+
15
+ # generation configs
16
+ prompt: ""
17
+
18
+ llama_model: "please set this value to the path of vicuna model"
19
+
20
+ preprocess:
21
+ vis_processor:
22
+ train:
23
+ name: "blip2_image_train"
24
+ image_size: 224
25
+ eval:
26
+ name: "blip2_image_eval"
27
+ image_size: 224
28
+ text_processor:
29
+ train:
30
+ name: "blip_caption"
31
+ eval:
32
+ name: "blip_caption"
minigpt4/configs/models/minigpt_v2.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ arch: minigpt_v2
3
+
4
+ # vit encoder
5
+ image_size: 448
6
+ drop_path_rate: 0
7
+ use_grad_checkpoint: False
8
+ vit_precision: "fp16"
9
+ freeze_vit: True
10
+
11
+ # generation configs
12
+ prompt: ""
13
+
14
+ llama_model: /home/czb/project/MiniGPT-4-main/checkpoints/Llama-2-7b-chat-hf
15
+ # llama_model: "/home/user/project/Emotion-LLaMA/checkpoints/Llama-2-7b-chat-hf"
16
+ lora_r: 64
17
+ lora_alpha: 16
18
+
19
+
20
+ preprocess:
21
+ vis_processor:
22
+ train:
23
+ name: "blip2_image_train"
24
+ image_size: 448
25
+ eval:
26
+ name: "blip2_image_eval"
27
+ image_size: 448
28
+ text_processor:
29
+ train:
30
+ name: "blip_caption"
31
+ eval:
32
+ name: "blip_caption"