diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..a4dcc1a4edfef39d31630d176273c1f65ed3983f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +ChatUniVi/eval/questions/scienceqa/problems.json filter=lfs diff=lfs merge=lfs -text +examples/image0.jpg filter=lfs diff=lfs merge=lfs -text +examples/video0.mp4 filter=lfs diff=lfs merge=lfs -text +examples/video1.mp4 filter=lfs diff=lfs merge=lfs -text +examples/video2.mp4 filter=lfs diff=lfs merge=lfs -text +examples/video3.mp4 filter=lfs diff=lfs merge=lfs -text +examples/video4.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/ChatUniVi/__init__.py b/ChatUniVi/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a18750570d0e036417ec9c0751d7ca5e4f31b528 --- /dev/null +++ b/ChatUniVi/__init__.py @@ -0,0 +1 @@ +from .model import ChatUniViLlamaForCausalLM diff --git a/ChatUniVi/__pycache__/__init__.cpython-310.pyc b/ChatUniVi/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..06e6dd86be718ef778e061cf1979000a88eb1155 Binary files /dev/null and b/ChatUniVi/__pycache__/__init__.cpython-310.pyc differ diff --git a/ChatUniVi/__pycache__/constants.cpython-310.pyc b/ChatUniVi/__pycache__/constants.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2dad91308ffa4505c92691bfc498f6f17b6f4ed8 Binary files /dev/null and b/ChatUniVi/__pycache__/constants.cpython-310.pyc differ diff --git a/ChatUniVi/__pycache__/conversation.cpython-310.pyc b/ChatUniVi/__pycache__/conversation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12d0aa390993e69ca9e382f8c7478feaaf949565 Binary files /dev/null and b/ChatUniVi/__pycache__/conversation.cpython-310.pyc differ diff --git a/ChatUniVi/__pycache__/mm_utils.cpython-310.pyc b/ChatUniVi/__pycache__/mm_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13f043d58a68041a992b7381a0627745ba4472cf Binary files /dev/null and b/ChatUniVi/__pycache__/mm_utils.cpython-310.pyc differ diff --git a/ChatUniVi/__pycache__/utils.cpython-310.pyc b/ChatUniVi/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..336d9534ce7791126609747bffca159a4c936783 Binary files /dev/null and b/ChatUniVi/__pycache__/utils.cpython-310.pyc differ diff --git a/ChatUniVi/config/__init__.py b/ChatUniVi/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..359780c6c5acc8f62c6f74c12c6806e8858341f5 --- /dev/null +++ b/ChatUniVi/config/__init__.py @@ -0,0 +1,15 @@ +from .dataset_config import * +from .mode_config import * + + +ModelConfig = { + "PRETUNE": model_config_pretune, + "FINETUNE": model_config_finetune, +} + + +DataConfig = { + "Pretrain": [Pretrain, COCO_CAP, COCO_REG, COCO_REC], + "SQA": [SQA], + "FINETUNE": [VIT, MIMIC_imageonly, VIDEO], +} \ No newline at end of file diff --git a/ChatUniVi/config/dataset_config.py b/ChatUniVi/config/dataset_config.py new file mode 100644 index 0000000000000000000000000000000000000000..1a410c61a767f3f00a8eed230e6d3fc4d96ba98f --- /dev/null +++ b/ChatUniVi/config/dataset_config.py @@ -0,0 +1,41 @@ +Pretrain = { + "chat_path": "${PATH}/CC3M-595K/chat.json", + "CC3M": "${PATH}/CC3M-595K", +} + +VIT = { + "chat_path": "${PATH}/llava_instruct_150k.json", + "COCO2017": "${PATH}/COCO2017/train2017", +} + +MIMIC_imageonly = { + "chat_path": "${PATH}/MIMIC-IT-imageonly.json", + "CDG": "${PATH}/CGD/images", + "LA": "${PATH}/LA/images", + "SD": "${PATH}/SD/images", +} + +COCO_CAP = { + "chat_path": "${PATH}/COCO/coco_cap_chat.json", + "COCO2014": "${PATH}/COCO2014/train2014", +} + +COCO_REG = { + "chat_path": "${PATH}/COCO/coco_reg_chat.json", + "COCO2014": "${PATH}/COCO2014/train2014", +} + +COCO_REC = { + "chat_path": "${PATH}/COCO/coco_rec_chat.json", + "COCO2014": "${PATH}/COCO2014/train2014", +} + +VIDEO = { + "chat_path": "${PATH}/video_chat.json", + "VIDEO": "${PATH}/Activity_Videos", +} + +SQA = { + "chat_path": "${PATH}/llava_train_QCM-LEA.json", + "ScienceQA": "${PATH}/scienceqa/train", +} \ No newline at end of file diff --git a/ChatUniVi/config/mode_config.py b/ChatUniVi/config/mode_config.py new file mode 100644 index 0000000000000000000000000000000000000000..a7cb19b51fc130d8863335a350ababf7fa987d7a --- /dev/null +++ b/ChatUniVi/config/mode_config.py @@ -0,0 +1,24 @@ +model_config_pretune = { + "use_cluster": True, + "freeze": False, + "vision_tune": False, + + "spatial_cluster_rate0": 64, # 0.25 + "spatial_cluster_rate1": 32, # 0.5 + "spatial_cluster_rate2": 16, # 0.5 + + "temporal_cluster_rate": 1/16, +} + +model_config_finetune = { + "use_cluster": True, + "freeze": False, + "mm_tune": True, + "vision_tune": False, + + "spatial_cluster_rate0": 64, # 0.25 + "spatial_cluster_rate1": 32, # 0.5 + "spatial_cluster_rate2": 16, # 0.5 + + "temporal_cluster_rate": 1/16, +} \ No newline at end of file diff --git a/ChatUniVi/constants.py b/ChatUniVi/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..e16cb76517deec92529580e49273f0e8173974e7 --- /dev/null +++ b/ChatUniVi/constants.py @@ -0,0 +1,19 @@ +CONTROLLER_HEART_BEAT_EXPIRATION = 30 +WORKER_HEART_BEAT_INTERVAL = 15 + +LOGDIR = "." + +# Model Constants +MAX_IMAGE_LENGTH = 64 +IGNORE_INDEX = -100 +IMAGE_TOKEN_INDEX = -200 +DEFAULT_IMAGE_TOKEN = "" +DEFAULT_VIDEO_TOKEN = "