wenkai commited on
Commit
994daed
1 Parent(s): 309ab27

Upload 37 files

Browse files
Files changed (37) hide show
  1. FAPM_inference.py +76 -0
  2. LICENSE +21 -0
  3. README.md +70 -3
  4. blip2_eval_example.py +27 -0
  5. evaluate.py +92 -0
  6. requirements.txt +29 -0
  7. run_scripts/blip2/eval/eval_cap_coco_opt2.7b.sh +1 -0
  8. run_scripts/blip2/eval/eval_cap_protein.sh +16 -0
  9. run_scripts/blip2/eval/eval_cap_protein_opt2.7b.sh +1 -0
  10. run_scripts/blip2/eval/eval_ret_coco.sh +2 -0
  11. run_scripts/blip2/eval/eval_ret_flickr.sh +2 -0
  12. run_scripts/blip2/train/gptProcessed_protein_pretrain_stage1.sh +16 -0
  13. run_scripts/blip2/train/mol_instruction_stage1.sh +2 -0
  14. run_scripts/blip2/train/mol_instruction_stage2.sh +2 -0
  15. run_scripts/blip2/train/pretrain_stage1.sh +1 -0
  16. run_scripts/blip2/train/pretrain_stage2.sh +1 -0
  17. run_scripts/blip2/train/protein_evaluate_stage2.sh +16 -0
  18. run_scripts/blip2/train/protein_finetune_stage1.sh +16 -0
  19. run_scripts/blip2/train/protein_finetune_stage2.sh +16 -0
  20. run_scripts/blip2/train/protein_pretrain_domain_stage1.sh +2 -0
  21. run_scripts/blip2/train/protein_pretrain_domain_stage2.sh +2 -0
  22. run_scripts/blip2/train/protein_pretrain_stage1.sh +16 -0
  23. run_scripts/blip2/train/protein_pretrain_stage2.sh +16 -0
  24. run_scripts/blip2/train/train_caption_coco.sh +1 -0
  25. run_scripts/blip2/train/train_function_reviewed.sh +16 -0
  26. run_scripts/blip2/train/train_get_eval.sh +16 -0
  27. run_scripts/blip2/train/train_retrieval_coco.sh +1 -0
  28. run_scripts/blip2/train/union_stage1.sh +16 -0
  29. run_scripts/blip2/train/union_stage2.sh +16 -0
  30. salesforce_lavis.egg-info/PKG-INFO +336 -0
  31. salesforce_lavis.egg-info/SOURCES.txt +715 -0
  32. salesforce_lavis.egg-info/dependency_links.txt +1 -0
  33. salesforce_lavis.egg-info/not-zip-safe +1 -0
  34. salesforce_lavis.egg-info/requires.txt +29 -0
  35. salesforce_lavis.egg-info/top_level.txt +10 -0
  36. setup.py +36 -0
  37. train.py +103 -0
FAPM_inference.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import pandas as pd
4
+ import torch.nn.functional as F
5
+ from lavis.models.protein_models.protein_function_opt import Blip2ProteinMistral
6
+ # from lavis.models.base_model import FAPMConfig
7
+ # from lavis.models.blip2_models.blip2_opt import Blip2ProteinOPT
8
+ import random
9
+ from lavis.models.base_model import FAPMConfig
10
+
11
+ prop = True
12
+
13
+ # model = Blip2ProteinOPT(config=FAPMConfig(), esm_size='3b')
14
+ # model.load_checkpoint('/cluster/home/wenkai/LAVIS/lavis/output/BLIP2/Pretrain_stage2/20240327081/checkpoint_2.pth')
15
+ model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
16
+ model.load_checkpoint('model/checkpoint_mf2.pth')
17
+ # model.from_pretrained('/cluster/home/wenkai/FAPM_model/mf')
18
+ model.to('cuda')
19
+
20
+ # esm_emb = torch.load('/cluster/home/wenkai/LAVIS/data/pretrain/ipr_domain_emb_esm2_3b/Gp49.pt')['representations'][36]
21
+ esm_emb = torch.load('data/emb_esm2_3b/P18281.pt')['representations'][36]
22
+ esm_emb = F.pad(esm_emb.t(), (0, 1024 - len(esm_emb))).t().to('cuda')
23
+ samples = {'name': ['P18281'],
24
+ 'image': torch.unsqueeze(esm_emb, dim=0),
25
+ 'text_input': ['actin monomer binding'],
26
+ 'prompt': ['Acanthamoeba']}
27
+ prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1., repetition_penalty=1.0)
28
+ print(f"Text Prediction: {prediction}")
29
+
30
+
31
+ if prop == True:
32
+ from data.evaluate_data.utils import Ontology
33
+ import difflib
34
+ import re
35
+
36
+ # godb = Ontology(f'/cluster/home/wenkai/LAVIS/data/go1.4-basic.obo', with_rels=True)
37
+ godb = Ontology(f'data/go1.4-basic.obo', with_rels=True)
38
+
39
+ go_des = pd.read_csv('data/go_descriptions1.4.txt', sep='|', header=None)
40
+ go_des.columns = ['id', 'text']
41
+ go_des = go_des.dropna()
42
+ go_des['id'] = go_des['id'].apply(lambda x: re.sub('_', ':', x))
43
+ go_obo_set = set(go_des['id'].tolist())
44
+ go_des['text'] = go_des['text'].apply(lambda x: x.lower())
45
+ GO_dict = dict(zip(go_des['text'], go_des['id']))
46
+ Func_dict = dict(zip(go_des['id'], go_des['text']))
47
+
48
+ # terms_mf = pd.read_pickle('/cluster/home/wenkai/deepgo2/data/mf/terms.pkl')
49
+ terms_mf = pd.read_pickle('data/terms/mf_terms.pkl')
50
+ choices_mf = [Func_dict[i] for i in list(set(terms_mf['gos']))]
51
+ choices = {x.lower(): x for x in choices_mf}
52
+
53
+ pred_terms_list = []
54
+ pred_go_list = []
55
+ prop_annotations = []
56
+ for x in prediction:
57
+ x = [eval(i) for i in x.split('; ')]
58
+ pred_terms = []
59
+ pred_go = []
60
+ annot_set = set()
61
+ for i in x:
62
+ txt = i[0]
63
+ prob = i[1]
64
+ sim_list = difflib.get_close_matches(txt.lower(), choices, n=1, cutoff=0.9)
65
+ if len(sim_list) > 0:
66
+ pred_terms.append((sim_list[0], prob))
67
+ pred_go.append((GO_dict[sim_list[0]], prob))
68
+ annot_set |= godb.get_anchestors(GO_dict[sim_list[0]])
69
+ pred_terms_list.append(pred_terms)
70
+ pred_go_list.append(pred_go)
71
+ annots = list(annot_set)
72
+ prop_annotations.append(annots)
73
+
74
+ print(f"Predictions of GO terms: \n{pred_terms_list} \nPredictions of GO id: \n{pred_go_list} \nPredictions of GO id propgated: \n{prop_annotations}")
75
+
76
+
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 xiangwenkai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,3 +1,70 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Introduction
2
+ <p align="center">
3
+ <br>
4
+ <img src="assets/FAPM.png"/>
5
+ <br>
6
+ <p>
7
+
8
+ ## Installation
9
+
10
+ 1. (Optional) Creating conda environment
11
+
12
+ ```bash
13
+ conda create -n lavis python=3.8
14
+ conda activate lavis
15
+ ```
16
+
17
+ 2. for development, you may build from source
18
+
19
+ ```bash
20
+ git clone https://github.com/xiangwenkai/FAPM.git
21
+ cd FAPM
22
+ pip install -e .
23
+
24
+ pip install Biopython
25
+ pip install fair-esm
26
+ ```
27
+
28
+ ### Datasets
29
+ #### 1.raw dataset
30
+ Raw data are avaliable at *https://ftp.uniprot.org/pub/databases/uniprot/previous_releases/release-2023_04/knowledgebase/*, this file is very large and need to be processed to get its name, sequence, GO label, function description and prompt.
31
+ The domain level protein dataset we used are avaliable at *https://ftp.ebi.ac.uk/pub/databases/interpro/releases/95.0/protein2ipr.dat.gz*
32
+ In this respository, We provide the experimental train/val/test sets of Swiss-Prot, which are avaliable at data/swissprot_exp
33
+ #### 2.ESM2 embeddings
34
+ ESM2 embeddings generation code: *https://github.com/facebookresearch/esm*
35
+ The generation command:
36
+ ```bash
37
+ git clone https://github.com/facebookresearch/esm.git
38
+ python scripts/extract.py esm2_t33_3B_UR50D you_path/protein.fasta you_path_to_save_embedding_files --repr_layers 36 --truncation_seq_length 1024 --include per_tok
39
+ ```
40
+ The default path to save embedding files in this respository is **data/emb_esm2_3b**
41
+
42
+ ## Pretraining language models
43
+ Source: *https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B*
44
+
45
+ ## Training
46
+ data config: lavis/configs/datasets/protein/GO_defaults_cap.yaml
47
+ stage1 config: lavis/projects/blip2/train/protein_pretrain_stage1.yaml
48
+ stage1 training command: run_scripts/blip2/train/protein_pretrain_domain_stage1.sh
49
+ stage2 config: lavis/projects/blip2/train/protein_pretrain_stage2.yaml
50
+ stage2 training/finetuning command: run_scripts/blip2/train/protein_pretrain_domain_stage2.sh
51
+
52
+ ## Trained models
53
+ You can download our trained models from drive: *https://drive.google.com/drive/folders/1aA0eSYxNw3DvrU5GU1Cu-4q2kIxxAGSE?usp=drive_link*
54
+
55
+ ## Testing
56
+ config: lavis/projects/blip2/eval/caption_protein_eval.yaml
57
+ command: run_scripts/blip2/eval/eval_cap_protein.sh
58
+
59
+ ## Inference example
60
+ We provide an example in **FAPM_inference.py**. You can change the example protein to you custom case
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
blip2_eval_example.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ import requests
4
+ from lavis.models import load_model_and_preprocess
5
+
6
+ img_url = 'https://storage.googleapis.com/sfr-vision-language-research/LAVIS/assets/merlion.png'
7
+ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
8
+ display(raw_image.resize((596, 437)))
9
+
10
+ device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
11
+
12
+ model, vis_processors, _ = load_model_and_preprocess(
13
+ name="blip2_opt", model_type="caption_coco_opt2.7b", is_eval=True, device=device
14
+ )
15
+
16
+ image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
17
+
18
+ model.generate({"image": image})
19
+
20
+ # due to the non-determinstic nature of necleus sampling, you may get different captions.
21
+ model.generate({"image": image}, use_nucleus_sampling=True, num_captions=3)
22
+
23
+ model.generate({"image": image, "prompt": "Question: which city is this? Answer:"})
24
+
25
+ model.generate({
26
+ "image": image,
27
+ "prompt": "Question: which city is this? Answer: singapore. Question: why?"})
evaluate.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import argparse
9
+ import random
10
+
11
+ import numpy as np
12
+ import torch
13
+ import torch.backends.cudnn as cudnn
14
+
15
+ import lavis.tasks as tasks
16
+ from lavis.common.config import Config
17
+ from lavis.common.dist_utils import get_rank, init_distributed_mode
18
+ from lavis.common.logger import setup_logger
19
+ from lavis.common.optims import (
20
+ LinearWarmupCosineLRScheduler,
21
+ LinearWarmupStepLRScheduler,
22
+ )
23
+ from lavis.common.utils import now
24
+
25
+ # imports modules for registration
26
+ from lavis.datasets.builders import *
27
+ from lavis.models import *
28
+ from lavis.processors import *
29
+ from lavis.runners.runner_base import RunnerBase
30
+ from lavis.tasks import *
31
+
32
+
33
+ def parse_args():
34
+ parser = argparse.ArgumentParser(description="Training")
35
+
36
+ parser.add_argument("--cfg-path", required=True, help="path to configuration file.")
37
+ parser.add_argument(
38
+ "--options",
39
+ nargs="+",
40
+ help="override some settings in the used config, the key-value pair "
41
+ "in xxx=yyy format will be merged into config file (deprecate), "
42
+ "change to --cfg-options instead.",
43
+ )
44
+
45
+ args = parser.parse_args()
46
+ # if 'LOCAL_RANK' not in os.environ:
47
+ # os.environ['LOCAL_RANK'] = str(args.local_rank)
48
+
49
+ return args
50
+
51
+
52
+ def setup_seeds(config):
53
+ seed = config.run_cfg.seed + get_rank()
54
+
55
+ random.seed(seed)
56
+ np.random.seed(seed)
57
+ torch.manual_seed(seed)
58
+
59
+ cudnn.benchmark = False
60
+ cudnn.deterministic = True
61
+
62
+
63
+ def main():
64
+ # allow auto-dl completes on main process without timeout when using NCCL backend.
65
+ # os.environ["NCCL_BLOCKING_WAIT"] = "1"
66
+
67
+ # set before init_distributed_mode() to ensure the same job_id shared across all ranks.
68
+ job_id = now()
69
+
70
+ cfg = Config(parse_args())
71
+
72
+ init_distributed_mode(cfg.run_cfg)
73
+
74
+ setup_seeds(cfg)
75
+
76
+ # set after init_distributed_mode() to only log on master.
77
+ setup_logger()
78
+
79
+ cfg.pretty_print()
80
+
81
+ task = tasks.setup_task(cfg)
82
+ datasets = task.build_datasets(cfg)
83
+ model = task.build_model(cfg)
84
+
85
+ # model.generate({"image": ['MMSKLGVLLTICLLLFPLTAVPLDGDQPADQPAERKQNEQHPLFDQKRGCCRWPCPSRCGMARCCSS','MMSKQPAERKQNEQHPLFDQKRGCCRWPCPSRCGMARCCSS']})
86
+
87
+ runner = RunnerBase(cfg=cfg, job_id=job_id, task=task, model=model, datasets=datasets)
88
+ runner.evaluate(skip_reload=True)
89
+
90
+
91
+ if __name__ == "__main__":
92
+ main()
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ contexttimer
2
+ decord
3
+ diffusers<=0.16.0
4
+ einops>=0.4.1
5
+ fairscale==0.4.4
6
+ ftfy
7
+ iopath
8
+ ipython
9
+ omegaconf
10
+ opencv-python-headless==4.5.5.64
11
+ opendatasets
12
+ packaging
13
+ pandas
14
+ plotly
15
+ pre-commit
16
+ pycocoevalcap
17
+ pycocotools
18
+ python-magic
19
+ scikit-image
20
+ sentencepiece
21
+ spacy
22
+ streamlit
23
+ timm==0.4.12
24
+ torch>=1.10.0
25
+ torchvision
26
+ tqdm
27
+ transformers>=4.28.0
28
+ webdataset
29
+ wheel
run_scripts/blip2/eval/eval_cap_coco_opt2.7b.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m torch.distributed.run --nproc_per_node=2 evaluate.py --cfg-path lavis/projects/blip2/eval/caption_coco_opt2.7b_eval.yaml
run_scripts/blip2/eval/eval_cap_protein.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J get_eval1
3
+ #SBATCH -p nvidia
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[87]
6
+ #SBATCH --mem 40G
7
+ #SBATCH --gres=gpu:2
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log_eval1.out
11
+ #SBATCH --error=log_eval1.err
12
+ #SBATCH --cpus-per-task=4
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=2 evaluate.py --cfg-path lavis/projects/blip2/eval/caption_protein_eval.yaml
run_scripts/blip2/eval/eval_cap_protein_opt2.7b.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m torch.distributed.run --nproc_per_node=2 evaluate.py --cfg-path lavis/projects/blip2/eval/caption_protein_opt2.7b_eval.yaml
run_scripts/blip2/eval/eval_ret_coco.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ python -m torch.distributed.run --nproc_per_node=8 evaluate.py --cfg-path lavis/projects/blip2/eval/ret_coco_eval.yaml
2
+ # python -m torch.distributed.run --nproc_per_node=16 evaluate.py --cfg-path lavis/projects/blip2/eval/ret_coco_eval.yaml
run_scripts/blip2/eval/eval_ret_flickr.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # python -m torch.distributed.run --nproc_per_node=16 evaluate.py --cfg-path lavis/projects/blip2/eval/ret_flickr_eval.yaml
2
+ python -m torch.distributed.run --nproc_per_node=8 evaluate.py --cfg-path lavis/projects/blip2/eval/ret_flickr_eval.yaml
run_scripts/blip2/train/gptProcessed_protein_pretrain_stage1.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J xwk_GPTstage1
3
+ #SBATCH -p Jupyter
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[37]
6
+ #SBATCH --mem 220G
7
+ #SBATCH --gres=gpu:8
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate lavis
15
+
16
+ python -m torch.distributed.run --nproc_per_node=8 train.py --cfg-path lavis/projects/blip2/train/gptProcessed_test_stage1.yaml
run_scripts/blip2/train/mol_instruction_stage1.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ python -m torch.distributed.run --nproc_per_node=2 train.py --cfg-path lavis/projects/blip2/train/mol_instruction_stage1.yaml
run_scripts/blip2/train/mol_instruction_stage2.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ python -m torch.distributed.run --nproc_per_node=2 train.py --cfg-path lavis/projects/blip2/train/union_stage2.yaml
run_scripts/blip2/train/pretrain_stage1.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m torch.distributed.run --nproc_per_node=16 train.py --cfg-path lavis/projects/blip2/train/pretrain_stage1.yaml
run_scripts/blip2/train/pretrain_stage2.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m torch.distributed.run --nproc_per_node=16 train.py --cfg-path lavis/projects/blip2/train/pretrain_stage2.yaml
run_scripts/blip2/train/protein_evaluate_stage2.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J case
3
+ #SBATCH -p gpu1
4
+ #SBATCH -N 1
5
+ # #SBATCH -w node[85]
6
+ #SBATCH --mem 50G
7
+ #SBATCH --gres=gpu:1
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log_eval.out
11
+ #SBATCH --error=log_eval.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=1 train.py --cfg-path lavis/projects/blip2/train/test_stage2_evaluate.yaml
run_scripts/blip2/train/protein_finetune_stage1.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J fin
3
+ #SBATCH -p gpu1
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[83]
6
+ #SBATCH --mem 100G
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=4 train.py --cfg-path lavis/projects/blip2/train/finetune_stage1.yaml
run_scripts/blip2/train/protein_finetune_stage2.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J ft
3
+ #SBATCH -p gpu1
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[83]
6
+ #SBATCH --mem 200G
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=4 train.py --cfg-path lavis/projects/blip2/train/finetune_stage2.yaml
run_scripts/blip2/train/protein_pretrain_domain_stage1.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ python -m torch.distributed.run --nproc_per_node=4 train.py --cfg-path lavis/projects/blip2/train/protein_pretrain_stage1.yaml
run_scripts/blip2/train/protein_pretrain_domain_stage2.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ python -m torch.distributed.run --nproc_per_node=4 train.py --cfg-path lavis/projects/blip2/train/protein_pretrain_stage2.yaml
run_scripts/blip2/train/protein_pretrain_stage1.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J pre
3
+ #SBATCH -p gpu1
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[82]
6
+ #SBATCH --mem 100G
7
+ #SBATCH --gres=gpu:2
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=2 train.py --cfg-path lavis/projects/blip2/train/test_stage1.yaml
run_scripts/blip2/train/protein_pretrain_stage2.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J te
3
+ #SBATCH -p gpu1
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[83]
6
+ #SBATCH --mem 400G
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=4 train.py --cfg-path lavis/projects/blip2/train/test_stage2.yaml
run_scripts/blip2/train/train_caption_coco.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m torch.distributed.run --nproc_per_node=16 train.py --cfg-path lavis/projects/blip2/train/caption_coco_ft.yaml
run_scripts/blip2/train/train_function_reviewed.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J finetune_function
3
+ #SBATCH -p Jupyter
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[39]
6
+ #SBATCH --mem 260G
7
+ #SBATCH --gres=gpu:7
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=7 train.py --cfg-path lavis/projects/blip2/train/reviewed_function_ft.yaml
run_scripts/blip2/train/train_get_eval.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J get_eval1
3
+ #SBATCH -p nvidia
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[87]
6
+ #SBATCH --mem 20G
7
+ #SBATCH --gres=gpu:2
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log_eval1.out
11
+ #SBATCH --error=log_eval1.err
12
+ #SBATCH --cpus-per-task=4
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=2 train.py --cfg-path /cluster/home/wenkai/LAVIS/lavis/projects/blip2/train/test_stage2_evaluate.yaml
run_scripts/blip2/train/train_retrieval_coco.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m torch.distributed.run --nproc_per_node=16 train.py --cfg-path lavis/projects/blip2/train/retrieval_coco_ft.yaml
run_scripts/blip2/train/union_stage1.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J secc1
3
+ #SBATCH -p gpu1
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[83]
6
+ #SBATCH --mem 60G
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=4 train.py --cfg-path lavis/projects/blip2/train/union_stage1.yaml
run_scripts/blip2/train/union_stage2.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J secc2
3
+ #SBATCH -p gpu1
4
+ #SBATCH -N 1
5
+ #SBATCH -w node[84]
6
+ #SBATCH --mem 80G
7
+ #SBATCH --gres=gpu:8
8
+ #SBATCH --mail-type=ALL
9
+ #SBATCH --mail-user=icoxia@gmail.com
10
+ #SBATCH --output=log.%j.out
11
+ #SBATCH --error=log.%j.err
12
+ #SBATCH --cpus-per-task=8
13
+ module load anaconda3/2021.05
14
+ source activate LAVIS
15
+
16
+ python -m torch.distributed.run --nproc_per_node=8 train.py --cfg-path lavis/projects/blip2/train/union_stage2.yaml
salesforce_lavis.egg-info/PKG-INFO ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: salesforce-lavis
3
+ Version: 1.0.1
4
+ Summary: LAVIS - A One-stop Library for Language-Vision Intelligence
5
+ Author: Dongxu Li, Junnan Li, Hung Le, Guangsen Wang, Silvio Savarese, Steven C.H. Hoi
6
+ License: 3-Clause BSD
7
+ Keywords: Vision-Language,Multimodal,Image Captioning,Generative AI,Deep Learning,Library,PyTorch
8
+ Requires-Python: >=3.7.0
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE.txt
11
+
12
+ <p align="center">
13
+ <br>
14
+ <img src="docs/_static/logo_final.png" width="400"/>
15
+ <br>
16
+ <p>
17
+
18
+ <div align="center">
19
+ <a href="https://github.com/salesforce/LAVIS/releases"><img alt="Latest Release" src="https://img.shields.io/github/release/salesforce/LAVIS.svg" /></a>
20
+ <a href="https://opensource.salesforce.com/LAVIS/index.html">
21
+ <img alt="docs" src="https://github.com/salesforce/LAVIS/actions/workflows/docs.yaml/badge.svg"/>
22
+ <a href="https://opensource.org/licenses/BSD-3-Clause">
23
+ <img alt="license" src="https://img.shields.io/badge/License-BSD_3--Clause-blue.svg"/>
24
+ </a>
25
+ <a href="https://pepy.tech/project/salesforce-lavis">
26
+ <img alt="Downloads" src="https://pepy.tech/badge/salesforce-lavis">
27
+ </a>
28
+ </div>
29
+
30
+ <div align="center">
31
+ <a href="https://opensource.salesforce.com/LAVIS//latest/benchmark.html">Benchmark</a>,
32
+ <a href="https://arxiv.org/abs/2209.09019">Technical Report</a>,
33
+ <a href="https://opensource.salesforce.com/LAVIS//latest/index.html">Documentation</a>,
34
+ <a href="https://github.com/salesforce/LAVIS/tree/main/examples">Jupyter Notebook Examples</a>,
35
+ <a href="https://blog.salesforceairesearch.com/lavis-language-vision-library/">Blog</a>
36
+ </div>
37
+
38
+ # LAVIS - A Library for Language-Vision Intelligence
39
+
40
+ ## What's New: 🎉
41
+ * [Model Release] July 2023, released implementation of **BLIP-Diffusion** <br>
42
+ [Paper](https://arxiv.org/abs/2305.06500), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/blip-diffusion), [Website](https://dxli94.github.io/BLIP-Diffusion-website/)
43
+ > A text-to-image generation model that trains 20x than DreamBooth. Also facilitates zero-shot subject-driven generation and editing.
44
+ * [Model Release] May 2023, released implementation of **InstructBLIP** <br>
45
+ [Paper](https://arxiv.org/abs/2305.06500), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/instructblip)
46
+ > A new vision-language instruction-tuning framework using BLIP-2 models, achieving state-of-the-art zero-shot generalization performance on a wide range of vision-language tasks.
47
+ * [Model Release] Jan 2023, released implementation of **BLIP-2** <br>
48
+ [Paper](https://arxiv.org/abs/2301.12597), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/blip2), [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/salesforce/LAVIS/blob/main/examples/blip2_instructed_generation.ipynb)
49
+ > A generic and efficient pre-training strategy that easily harvests development of pretrained vision models and large language models (LLMs) for vision-language pretraining. BLIP-2 beats Flamingo on zero-shot VQAv2 (**65.0** vs **56.3**), establishing new state-of-the-art on zero-shot captioning (on NoCaps **121.6** CIDEr score vs previous best **113.2**). In addition, equipped with powerful LLMs (e.g. OPT, FlanT5), BLIP-2 also unlocks the new **zero-shot instructed vision-to-language generation** capabilities for various interesting applications!
50
+ * Jan 2023, LAVIS is now available on [PyPI](https://pypi.org/project/salesforce-lavis/) for installation!
51
+ * [Model Release] Dec 2022, released implementation of **Img2LLM-VQA** (**CVPR 2023**, _"From Images to Textual Prompts: Zero-shot VQA with Frozen Large Language Models"_, by Jiaxian Guo et al) <br>
52
+ [Paper](https://arxiv.org/pdf/2212.10846.pdf), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/img2llm-vqa), [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/salesforce/LAVIS/blob/main/projects/img2llm-vqa/img2llm_vqa.ipynb)
53
+ > A plug-and-play module that enables off-the-shelf use of Large Language Models (LLMs) for visual question answering (VQA). Img2LLM-VQA surpasses Flamingo on zero-shot VQA on VQAv2 (61.9 vs 56.3), while in contrast requiring no end-to-end training!
54
+ * [Model Release] Oct 2022, released implementation of **PNP-VQA** (**EMNLP Findings 2022**, _"Plug-and-Play VQA: Zero-shot VQA by Conjoining Large Pretrained Models with Zero Training"_, by Anthony T.M.H. et al), <br>
55
+ [Paper](https://arxiv.org/abs/2210.08773), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/pnp-vqa), [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/salesforce/LAVIS/blob/main/projects/pnp-vqa/pnp_vqa.ipynb))
56
+ > A modular zero-shot VQA framework that requires no PLMs training, achieving SoTA zero-shot VQA performance.
57
+
58
+ ## Table of Contents
59
+ - [Introduction](#introduction)
60
+ - [Installation](#installation)
61
+ - [Getting Started](#getting-started)
62
+ - [Model Zoo](#model-zoo)
63
+ - [Image Captioning](#image-captioning)
64
+ - [Visual question answering (VQA)](#visual-question-answering-vqa)
65
+ - [Unified Feature Extraction Interface](#unified-feature-extraction-interface)
66
+ - [Load Datasets](#load-datasets)
67
+ - [Jupyter Notebook Examples](#jupyter-notebook-examples)
68
+ - [Resources and Tools](#resources-and-tools)
69
+ - [Documentations](#documentations)
70
+ - [Ethical and Responsible Use](#ethical-and-responsible-use)
71
+ - [Technical Report and Citing LAVIS](#technical-report-and-citing-lavis)
72
+ - [License](#license)
73
+
74
+ ## Introduction
75
+ LAVIS is a Python deep learning library for LAnguage-and-VISion intelligence research and applications. This library aims to provide engineers and researchers with a one-stop solution to rapidly develop models for their specific multimodal scenarios, and benchmark them across standard and customized datasets.
76
+ It features a unified interface design to access
77
+ - **10+** tasks
78
+ (retrieval, captioning, visual question answering, multimodal classification etc.);
79
+ - **20+** datasets (COCO, Flickr, Nocaps, Conceptual
80
+ Commons, SBU, etc.);
81
+ - **30+** pretrained weights of state-of-the-art foundation language-vision models and their task-specific adaptations, including [ALBEF](https://arxiv.org/pdf/2107.07651.pdf),
82
+ [BLIP](https://arxiv.org/pdf/2201.12086.pdf), [ALPRO](https://arxiv.org/pdf/2112.09583.pdf), [CLIP](https://arxiv.org/pdf/2103.00020.pdf).
83
+ <p align="center">
84
+ <br>
85
+ <img src="assets/demo-6.png"/>
86
+ <br>
87
+ <p>
88
+
89
+ Key features of LAVIS include:
90
+
91
+ - **Unified and Modular Interface**: facilitating to easily leverage and repurpose existing modules (datasets, models, preprocessors), also to add new modules.
92
+
93
+ - **Easy Off-the-shelf Inference and Feature Extraction**: readily available pre-trained models let you take advantage of state-of-the-art multimodal understanding and generation capabilities on your own data.
94
+
95
+ - **Reproducible Model Zoo and Training Recipes**: easily replicate and extend state-of-the-art models on existing and new tasks.
96
+
97
+ - **Dataset Zoo and Automatic Downloading Tools**: it can be a hassle to prepare the many language-vision datasets. LAVIS provides automatic downloading scripts to help prepare a large variety of datasets and their annotations.
98
+
99
+
100
+ The following table shows the supported tasks, datasets and models in our library. This is a continuing effort and we are working on further growing the list.
101
+
102
+ | Tasks | Supported Models | Supported Datasets |
103
+ | :--------------------------------------: | :----------------------: | :----------------------------------------: |
104
+ | Image-text Pre-training | ALBEF, BLIP | COCO, VisualGenome, SBU ConceptualCaptions |
105
+ | Image-text Retrieval | ALBEF, BLIP, CLIP | COCO, Flickr30k |
106
+ | Text-image Retrieval | ALBEF, BLIP, CLIP | COCO, Flickr30k |
107
+ | Visual Question Answering | ALBEF, BLIP | VQAv2, OKVQA, A-OKVQA |
108
+ | Image Captioning | BLIP | COCO, NoCaps |
109
+ | Image Classification | CLIP | ImageNet |
110
+ | Natural Language Visual Reasoning (NLVR) | ALBEF, BLIP | NLVR2 |
111
+ | Visual Entailment (VE) | ALBEF | SNLI-VE |
112
+ | Visual Dialogue | BLIP | VisDial |
113
+ | Video-text Retrieval | BLIP, ALPRO | MSRVTT, DiDeMo |
114
+ | Text-video Retrieval | BLIP, ALPRO | MSRVTT, DiDeMo |
115
+ | Video Question Answering (VideoQA) | BLIP, ALPRO | MSRVTT, MSVD |
116
+ | Video Dialogue | VGD-GPT | AVSD |
117
+ | Multimodal Feature Extraction | ALBEF, CLIP, BLIP, ALPRO | customized |
118
+ | Text-to-image Generation | [COMING SOON] | |
119
+
120
+ ## Installation
121
+
122
+ 1. (Optional) Creating conda environment
123
+
124
+ ```bash
125
+ conda create -n lavis python=3.8
126
+ conda activate lavis
127
+ ```
128
+
129
+ 2. install from [PyPI](https://pypi.org/project/salesforce-lavis/)
130
+ ```bash
131
+ pip install salesforce-lavis
132
+ ```
133
+
134
+ 3. Or, for development, you may build from source
135
+
136
+ ```bash
137
+ git clone https://github.com/salesforce/LAVIS.git
138
+ cd LAVIS
139
+ pip install -e .
140
+ ```
141
+
142
+ ## Getting Started
143
+ ### Model Zoo
144
+ Model zoo summarizes supported models in LAVIS, to view:
145
+ ```python
146
+ from lavis.models import model_zoo
147
+ print(model_zoo)
148
+ # ==================================================
149
+ # Architectures Types
150
+ # ==================================================
151
+ # albef_classification ve
152
+ # albef_feature_extractor base
153
+ # albef_nlvr nlvr
154
+ # albef_pretrain base
155
+ # albef_retrieval coco, flickr
156
+ # albef_vqa vqav2
157
+ # alpro_qa msrvtt, msvd
158
+ # alpro_retrieval msrvtt, didemo
159
+ # blip_caption base_coco, large_coco
160
+ # blip_classification base
161
+ # blip_feature_extractor base
162
+ # blip_nlvr nlvr
163
+ # blip_pretrain base
164
+ # blip_retrieval coco, flickr
165
+ # blip_vqa vqav2, okvqa, aokvqa
166
+ # clip_feature_extractor ViT-B-32, ViT-B-16, ViT-L-14, ViT-L-14-336, RN50
167
+ # clip ViT-B-32, ViT-B-16, ViT-L-14, ViT-L-14-336, RN50
168
+ # gpt_dialogue base
169
+ ```
170
+
171
+ Let’s see how to use models in LAVIS to perform inference on example data. We first load a sample image from local.
172
+
173
+ ```python
174
+ import torch
175
+ from PIL import Image
176
+ # setup device to use
177
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
178
+ # load sample image
179
+ raw_image = Image.open("docs/_static/merlion.png").convert("RGB")
180
+ ```
181
+
182
+ This example image shows [Merlion park](https://en.wikipedia.org/wiki/Merlion) ([source](https://theculturetrip.com/asia/singapore/articles/what-exactly-is-singapores-merlion-anyway/)), a landmark in Singapore.
183
+
184
+
185
+ ### Image Captioning
186
+ In this example, we use the BLIP model to generate a caption for the image. To make inference even easier, we also associate each
187
+ pre-trained model with its preprocessors (transforms), accessed via ``load_model_and_preprocess()``.
188
+
189
+ ```python
190
+ import torch
191
+ from lavis.models import load_model_and_preprocess
192
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
193
+ # loads BLIP caption base model, with finetuned checkpoints on MSCOCO captioning dataset.
194
+ # this also loads the associated image processors
195
+ model, vis_processors, _ = load_model_and_preprocess(name="blip_caption", model_type="base_coco", is_eval=True, device=device)
196
+ # preprocess the image
197
+ # vis_processors stores image transforms for "train" and "eval" (validation / testing / inference)
198
+ image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
199
+ # generate caption
200
+ model.generate({"image": image})
201
+ # ['a large fountain spewing water into the air']
202
+ ```
203
+
204
+ ### Visual question answering (VQA)
205
+ BLIP model is able to answer free-form questions about images in natural language.
206
+ To access the VQA model, simply replace the ``name`` and ``model_type`` arguments
207
+ passed to ``load_model_and_preprocess()``.
208
+
209
+ ```python
210
+ from lavis.models import load_model_and_preprocess
211
+ model, vis_processors, txt_processors = load_model_and_preprocess(name="blip_vqa", model_type="vqav2", is_eval=True, device=device)
212
+ # ask a random question.
213
+ question = "Which city is this photo taken?"
214
+ image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
215
+ question = txt_processors["eval"](question)
216
+ model.predict_answers(samples={"image": image, "text_input": question}, inference_method="generate")
217
+ # ['singapore']
218
+ ```
219
+
220
+ ### Unified Feature Extraction Interface
221
+
222
+ LAVIS provides a unified interface to extract features from each architecture.
223
+ To extract features, we load the feature extractor variants of each model.
224
+ The multimodal feature can be used for multimodal classification.
225
+ The low-dimensional unimodal features can be used to compute cross-modal similarity.
226
+
227
+
228
+ ```python
229
+ from lavis.models import load_model_and_preprocess
230
+ model, vis_processors, txt_processors = load_model_and_preprocess(name="blip_feature_extractor", model_type="base", is_eval=True, device=device)
231
+ caption = "a large fountain spewing water into the air"
232
+ image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
233
+ text_input = txt_processors["eval"](caption)
234
+ sample = {"image": image, "text_input": [text_input]}
235
+
236
+ features_multimodal = model.extract_features(sample)
237
+ print(features_multimodal.multimodal_embeds.shape)
238
+ # torch.Size([1, 12, 768]), use features_multimodal[:,0,:] for multimodal classification tasks
239
+
240
+ features_image = model.extract_features(sample, mode="image")
241
+ features_text = model.extract_features(sample, mode="text")
242
+ print(features_image.image_embeds.shape)
243
+ # torch.Size([1, 197, 768])
244
+ print(features_text.text_embeds.shape)
245
+ # torch.Size([1, 12, 768])
246
+
247
+ # low-dimensional projected features
248
+ print(features_image.image_embeds_proj.shape)
249
+ # torch.Size([1, 197, 256])
250
+ print(features_text.text_embeds_proj.shape)
251
+ # torch.Size([1, 12, 256])
252
+ similarity = features_image.image_embeds_proj[:,0,:] @ features_text.text_embeds_proj[:,0,:].t()
253
+ print(similarity)
254
+ # tensor([[0.2622]])
255
+ ```
256
+
257
+ ### Load Datasets
258
+ LAVIS inherently supports a wide variety of common language-vision datasets by providing [automatic download tools](https://opensource.salesforce.com/LAVIS//latest/benchmark) to help download and organize these datasets. After downloading, to load the datasets, use the following code:
259
+
260
+ ```python
261
+ from lavis.datasets.builders import dataset_zoo
262
+ dataset_names = dataset_zoo.get_names()
263
+ print(dataset_names)
264
+ # ['aok_vqa', 'coco_caption', 'coco_retrieval', 'coco_vqa', 'conceptual_caption_12m',
265
+ # 'conceptual_caption_3m', 'didemo_retrieval', 'flickr30k', 'imagenet', 'laion2B_multi',
266
+ # 'msrvtt_caption', 'msrvtt_qa', 'msrvtt_retrieval', 'msvd_caption', 'msvd_qa', 'nlvr',
267
+ # 'nocaps', 'ok_vqa', 'sbu_caption', 'snli_ve', 'vatex_caption', 'vg_caption', 'vg_vqa']
268
+ ```
269
+ After downloading the images, we can use ``load_dataset()`` to obtain the dataset.
270
+ ```python
271
+ from lavis.datasets.builders import load_dataset
272
+ coco_dataset = load_dataset("coco_caption")
273
+ print(coco_dataset.keys())
274
+ # dict_keys(['train', 'val', 'test'])
275
+ print(len(coco_dataset["train"]))
276
+ # 566747
277
+ print(coco_dataset["train"][0])
278
+ # {'image': <PIL.Image.Image image mode=RGB size=640x480>,
279
+ # 'text_input': 'A woman wearing a net on her head cutting a cake. ',
280
+ # 'image_id': 0}
281
+ ```
282
+
283
+ If you already host a local copy of the dataset, you can pass in the ``vis_path`` argument to change the default location to load images.
284
+
285
+ ```python
286
+ coco_dataset = load_dataset("coco_caption", vis_path=YOUR_LOCAL_PATH)
287
+ ```
288
+
289
+ ## Jupyter Notebook Examples
290
+ See [examples](https://github.com/salesforce/LAVIS/tree/main/examples) for more inference examples, e.g. captioning, feature extraction, VQA, GradCam, zeros-shot classification.
291
+
292
+ ## Resources and Tools
293
+ - **Benchmarks**: see [Benchmark](https://opensource.salesforce.com/LAVIS//latest/benchmark) for instructions to evaluate and train supported models.
294
+ - **Dataset Download and Browsing**: see [Dataset Download](https://opensource.salesforce.com/LAVIS//latest/benchmark) for instructions and automatic tools on download common language-vision datasets.
295
+ - **GUI Demo**: to run the demo locally, run ```bash run_scripts/run_demo.sh``` and then follow the instruction on the prompts to view in browser. A web demo is coming soon.
296
+
297
+
298
+ ## Documentations
299
+ For more details and advanced usages, please refer to
300
+ [documentation](https://opensource.salesforce.com/LAVIS//latest/index.html#).
301
+
302
+ ## Ethical and Responsible Use
303
+ We note that models in LAVIS provide no guarantees on their multimodal abilities; incorrect or biased predictions may be observed. In particular, the datasets and pretrained models utilized in LAVIS may contain socioeconomic biases which could result in misclassification and other unwanted behaviors such as offensive or inappropriate speech. We strongly recommend that users review the pre-trained models and overall system in LAVIS before practical adoption. We plan to improve the library by investigating and mitigating these potential biases and
304
+ inappropriate behaviors in the future.
305
+
306
+
307
+ ## Technical Report and Citing LAVIS
308
+ You can find more details in our [technical report](https://arxiv.org/abs/2209.09019).
309
+
310
+ If you're using LAVIS in your research or applications, please cite using this BibTeX:
311
+ ```bibtex
312
+ @inproceedings{li-etal-2023-lavis,
313
+ title = "{LAVIS}: A One-stop Library for Language-Vision Intelligence",
314
+ author = "Li, Dongxu and
315
+ Li, Junnan and
316
+ Le, Hung and
317
+ Wang, Guangsen and
318
+ Savarese, Silvio and
319
+ Hoi, Steven C.H.",
320
+ booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
321
+ month = jul,
322
+ year = "2023",
323
+ address = "Toronto, Canada",
324
+ publisher = "Association for Computational Linguistics",
325
+ url = "https://aclanthology.org/2023.acl-demo.3",
326
+ pages = "31--41",
327
+ abstract = "We introduce LAVIS, an open-source deep learning library for LAnguage-VISion research and applications. LAVIS aims to serve as a one-stop comprehensive library that brings recent advancements in the language-vision field accessible for researchers and practitioners, as well as fertilizing future research and development. It features a unified interface to easily access state-of-the-art image-language, video-language models and common datasets. LAVIS supports training, evaluation and benchmarking on a rich variety of tasks, including multimodal classification, retrieval, captioning, visual question answering, dialogue and pre-training. In the meantime, the library is also highly extensible and configurable, facilitating future development and customization. In this technical report, we describe design principles, key components and functionalities of the library, and also present benchmarking results across common language-vision tasks.",
328
+ }
329
+ }
330
+ ```
331
+
332
+ ## Contact us
333
+ If you have any questions, comments or suggestions, please do not hesitate to contact us at lavis@salesforce.com.
334
+
335
+ ## License
336
+ [BSD 3-Clause License](LICENSE.txt)
salesforce_lavis.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE.txt
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ requirements.txt
6
+ setup.py
7
+ app/__init__.py
8
+ app/calculate_coco_features.py
9
+ app/caption.py
10
+ app/classification.py
11
+ app/dataset_browser.py
12
+ app/image_text_match.py
13
+ app/main.py
14
+ app/multimodal_search.py
15
+ app/multipage.py
16
+ app/text_localization.py
17
+ app/utils.py
18
+ app/vqa.py
19
+ data/prepare_go_cls_protein_function_data.py
20
+ data/prepare_go_cls_protein_function_data_reviewed.py
21
+ docs/conf.py
22
+ lavis/__init__.py
23
+ lavis/common/config.py
24
+ lavis/common/dist_utils.py
25
+ lavis/common/gradcam.py
26
+ lavis/common/logger.py
27
+ lavis/common/optims.py
28
+ lavis/common/registry.py
29
+ lavis/common/utils.py
30
+ lavis/common/annotator/util.py
31
+ lavis/common/annotator/canny/__init__.py
32
+ lavis/common/annotator/hed/__init__.py
33
+ lavis/common/annotator/midas/__init__.py
34
+ lavis/common/annotator/midas/api.py
35
+ lavis/common/annotator/midas/utils.py
36
+ lavis/common/annotator/midas/midas/__init__.py
37
+ lavis/common/annotator/midas/midas/base_model.py
38
+ lavis/common/annotator/midas/midas/blocks.py
39
+ lavis/common/annotator/midas/midas/dpt_depth.py
40
+ lavis/common/annotator/midas/midas/midas_net.py
41
+ lavis/common/annotator/midas/midas/midas_net_custom.py
42
+ lavis/common/annotator/midas/midas/transforms.py
43
+ lavis/common/annotator/midas/midas/vit.py
44
+ lavis/common/annotator/mlsd/__init__.py
45
+ lavis/common/annotator/mlsd/utils.py
46
+ lavis/common/annotator/mlsd/models/mbv2_mlsd_large.py
47
+ lavis/common/annotator/mlsd/models/mbv2_mlsd_tiny.py
48
+ lavis/common/annotator/openpose/__init__.py
49
+ lavis/common/annotator/openpose/body.py
50
+ lavis/common/annotator/openpose/hand.py
51
+ lavis/common/annotator/openpose/model.py
52
+ lavis/common/annotator/openpose/util.py
53
+ lavis/common/annotator/uniformer/__init__.py
54
+ lavis/common/annotator/uniformer/configs/_base_/default_runtime.py
55
+ lavis/common/annotator/uniformer/configs/_base_/datasets/ade20k.py
56
+ lavis/common/annotator/uniformer/configs/_base_/datasets/chase_db1.py
57
+ lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes.py
58
+ lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
59
+ lavis/common/annotator/uniformer/configs/_base_/datasets/drive.py
60
+ lavis/common/annotator/uniformer/configs/_base_/datasets/hrf.py
61
+ lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context.py
62
+ lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
63
+ lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
64
+ lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
65
+ lavis/common/annotator/uniformer/configs/_base_/datasets/stare.py
66
+ lavis/common/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
67
+ lavis/common/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
68
+ lavis/common/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
69
+ lavis/common/annotator/uniformer/configs/_base_/models/cgnet.py
70
+ lavis/common/annotator/uniformer/configs/_base_/models/danet_r50-d8.py
71
+ lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
72
+ lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
73
+ lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py
74
+ lavis/common/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py
75
+ lavis/common/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py
76
+ lavis/common/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py
77
+ lavis/common/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py
78
+ lavis/common/annotator/uniformer/configs/_base_/models/fast_scnn.py
79
+ lavis/common/annotator/uniformer/configs/_base_/models/fcn_hr18.py
80
+ lavis/common/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py
81
+ lavis/common/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py
82
+ lavis/common/annotator/uniformer/configs/_base_/models/fpn_r50.py
83
+ lavis/common/annotator/uniformer/configs/_base_/models/fpn_uniformer.py
84
+ lavis/common/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py
85
+ lavis/common/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py
86
+ lavis/common/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py
87
+ lavis/common/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py
88
+ lavis/common/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py
89
+ lavis/common/annotator/uniformer/configs/_base_/models/pointrend_r50.py
90
+ lavis/common/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py
91
+ lavis/common/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py
92
+ lavis/common/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py
93
+ lavis/common/annotator/uniformer/configs/_base_/models/upernet_r50.py
94
+ lavis/common/annotator/uniformer/configs/_base_/models/upernet_uniformer.py
95
+ lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_160k.py
96
+ lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_20k.py
97
+ lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_40k.py
98
+ lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_80k.py
99
+ lavis/common/annotator/uniformer/exp/upernet_global_small/config.py
100
+ lavis/common/annotator/uniformer/exp/upernet_global_small/test_config_g.py
101
+ lavis/common/annotator/uniformer/exp/upernet_global_small/test_config_h32.py
102
+ lavis/common/annotator/uniformer/exp/upernet_global_small/test_config_w32.py
103
+ lavis/common/annotator/uniformer/mmcv/__init__.py
104
+ lavis/common/annotator/uniformer/mmcv/version.py
105
+ lavis/common/annotator/uniformer/mmcv/arraymisc/__init__.py
106
+ lavis/common/annotator/uniformer/mmcv/arraymisc/quantization.py
107
+ lavis/common/annotator/uniformer/mmcv/cnn/__init__.py
108
+ lavis/common/annotator/uniformer/mmcv/cnn/alexnet.py
109
+ lavis/common/annotator/uniformer/mmcv/cnn/builder.py
110
+ lavis/common/annotator/uniformer/mmcv/cnn/resnet.py
111
+ lavis/common/annotator/uniformer/mmcv/cnn/vgg.py
112
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/__init__.py
113
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/activation.py
114
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/context_block.py
115
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv.py
116
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py
117
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv_module.py
118
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py
119
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py
120
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/drop.py
121
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py
122
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py
123
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/hswish.py
124
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/non_local.py
125
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/norm.py
126
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/padding.py
127
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/plugin.py
128
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/registry.py
129
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/scale.py
130
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/swish.py
131
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/transformer.py
132
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/upsample.py
133
+ lavis/common/annotator/uniformer/mmcv/cnn/bricks/wrappers.py
134
+ lavis/common/annotator/uniformer/mmcv/cnn/utils/__init__.py
135
+ lavis/common/annotator/uniformer/mmcv/cnn/utils/flops_counter.py
136
+ lavis/common/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py
137
+ lavis/common/annotator/uniformer/mmcv/cnn/utils/sync_bn.py
138
+ lavis/common/annotator/uniformer/mmcv/cnn/utils/weight_init.py
139
+ lavis/common/annotator/uniformer/mmcv/engine/__init__.py
140
+ lavis/common/annotator/uniformer/mmcv/engine/test.py
141
+ lavis/common/annotator/uniformer/mmcv/fileio/__init__.py
142
+ lavis/common/annotator/uniformer/mmcv/fileio/file_client.py
143
+ lavis/common/annotator/uniformer/mmcv/fileio/io.py
144
+ lavis/common/annotator/uniformer/mmcv/fileio/parse.py
145
+ lavis/common/annotator/uniformer/mmcv/fileio/handlers/__init__.py
146
+ lavis/common/annotator/uniformer/mmcv/fileio/handlers/base.py
147
+ lavis/common/annotator/uniformer/mmcv/fileio/handlers/json_handler.py
148
+ lavis/common/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py
149
+ lavis/common/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py
150
+ lavis/common/annotator/uniformer/mmcv/image/__init__.py
151
+ lavis/common/annotator/uniformer/mmcv/image/colorspace.py
152
+ lavis/common/annotator/uniformer/mmcv/image/geometric.py
153
+ lavis/common/annotator/uniformer/mmcv/image/io.py
154
+ lavis/common/annotator/uniformer/mmcv/image/misc.py
155
+ lavis/common/annotator/uniformer/mmcv/image/photometric.py
156
+ lavis/common/annotator/uniformer/mmcv/ops/__init__.py
157
+ lavis/common/annotator/uniformer/mmcv/ops/assign_score_withk.py
158
+ lavis/common/annotator/uniformer/mmcv/ops/ball_query.py
159
+ lavis/common/annotator/uniformer/mmcv/ops/bbox.py
160
+ lavis/common/annotator/uniformer/mmcv/ops/border_align.py
161
+ lavis/common/annotator/uniformer/mmcv/ops/box_iou_rotated.py
162
+ lavis/common/annotator/uniformer/mmcv/ops/carafe.py
163
+ lavis/common/annotator/uniformer/mmcv/ops/cc_attention.py
164
+ lavis/common/annotator/uniformer/mmcv/ops/contour_expand.py
165
+ lavis/common/annotator/uniformer/mmcv/ops/corner_pool.py
166
+ lavis/common/annotator/uniformer/mmcv/ops/correlation.py
167
+ lavis/common/annotator/uniformer/mmcv/ops/deform_conv.py
168
+ lavis/common/annotator/uniformer/mmcv/ops/deform_roi_pool.py
169
+ lavis/common/annotator/uniformer/mmcv/ops/deprecated_wrappers.py
170
+ lavis/common/annotator/uniformer/mmcv/ops/focal_loss.py
171
+ lavis/common/annotator/uniformer/mmcv/ops/furthest_point_sample.py
172
+ lavis/common/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py
173
+ lavis/common/annotator/uniformer/mmcv/ops/gather_points.py
174
+ lavis/common/annotator/uniformer/mmcv/ops/group_points.py
175
+ lavis/common/annotator/uniformer/mmcv/ops/info.py
176
+ lavis/common/annotator/uniformer/mmcv/ops/iou3d.py
177
+ lavis/common/annotator/uniformer/mmcv/ops/knn.py
178
+ lavis/common/annotator/uniformer/mmcv/ops/masked_conv.py
179
+ lavis/common/annotator/uniformer/mmcv/ops/merge_cells.py
180
+ lavis/common/annotator/uniformer/mmcv/ops/modulated_deform_conv.py
181
+ lavis/common/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py
182
+ lavis/common/annotator/uniformer/mmcv/ops/nms.py
183
+ lavis/common/annotator/uniformer/mmcv/ops/pixel_group.py
184
+ lavis/common/annotator/uniformer/mmcv/ops/point_sample.py
185
+ lavis/common/annotator/uniformer/mmcv/ops/points_in_boxes.py
186
+ lavis/common/annotator/uniformer/mmcv/ops/points_sampler.py
187
+ lavis/common/annotator/uniformer/mmcv/ops/psa_mask.py
188
+ lavis/common/annotator/uniformer/mmcv/ops/roi_align.py
189
+ lavis/common/annotator/uniformer/mmcv/ops/roi_align_rotated.py
190
+ lavis/common/annotator/uniformer/mmcv/ops/roi_pool.py
191
+ lavis/common/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
192
+ lavis/common/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
193
+ lavis/common/annotator/uniformer/mmcv/ops/saconv.py
194
+ lavis/common/annotator/uniformer/mmcv/ops/scatter_points.py
195
+ lavis/common/annotator/uniformer/mmcv/ops/sync_bn.py
196
+ lavis/common/annotator/uniformer/mmcv/ops/three_interpolate.py
197
+ lavis/common/annotator/uniformer/mmcv/ops/three_nn.py
198
+ lavis/common/annotator/uniformer/mmcv/ops/tin_shift.py
199
+ lavis/common/annotator/uniformer/mmcv/ops/upfirdn2d.py
200
+ lavis/common/annotator/uniformer/mmcv/ops/voxelize.py
201
+ lavis/common/annotator/uniformer/mmcv/parallel/__init__.py
202
+ lavis/common/annotator/uniformer/mmcv/parallel/_functions.py
203
+ lavis/common/annotator/uniformer/mmcv/parallel/collate.py
204
+ lavis/common/annotator/uniformer/mmcv/parallel/data_container.py
205
+ lavis/common/annotator/uniformer/mmcv/parallel/data_parallel.py
206
+ lavis/common/annotator/uniformer/mmcv/parallel/distributed.py
207
+ lavis/common/annotator/uniformer/mmcv/parallel/distributed_deprecated.py
208
+ lavis/common/annotator/uniformer/mmcv/parallel/registry.py
209
+ lavis/common/annotator/uniformer/mmcv/parallel/scatter_gather.py
210
+ lavis/common/annotator/uniformer/mmcv/parallel/utils.py
211
+ lavis/common/annotator/uniformer/mmcv/runner/__init__.py
212
+ lavis/common/annotator/uniformer/mmcv/runner/base_module.py
213
+ lavis/common/annotator/uniformer/mmcv/runner/base_runner.py
214
+ lavis/common/annotator/uniformer/mmcv/runner/builder.py
215
+ lavis/common/annotator/uniformer/mmcv/runner/checkpoint.py
216
+ lavis/common/annotator/uniformer/mmcv/runner/default_constructor.py
217
+ lavis/common/annotator/uniformer/mmcv/runner/dist_utils.py
218
+ lavis/common/annotator/uniformer/mmcv/runner/epoch_based_runner.py
219
+ lavis/common/annotator/uniformer/mmcv/runner/fp16_utils.py
220
+ lavis/common/annotator/uniformer/mmcv/runner/iter_based_runner.py
221
+ lavis/common/annotator/uniformer/mmcv/runner/log_buffer.py
222
+ lavis/common/annotator/uniformer/mmcv/runner/priority.py
223
+ lavis/common/annotator/uniformer/mmcv/runner/utils.py
224
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/__init__.py
225
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/checkpoint.py
226
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/closure.py
227
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/ema.py
228
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/evaluation.py
229
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/hook.py
230
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/iter_timer.py
231
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
232
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/memory.py
233
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
234
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/optimizer.py
235
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/profiler.py
236
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
237
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
238
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py
239
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/base.py
240
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
241
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
242
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
243
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
244
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
245
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/text.py
246
+ lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
247
+ lavis/common/annotator/uniformer/mmcv/runner/optimizer/__init__.py
248
+ lavis/common/annotator/uniformer/mmcv/runner/optimizer/builder.py
249
+ lavis/common/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py
250
+ lavis/common/annotator/uniformer/mmcv/utils/__init__.py
251
+ lavis/common/annotator/uniformer/mmcv/utils/config.py
252
+ lavis/common/annotator/uniformer/mmcv/utils/env.py
253
+ lavis/common/annotator/uniformer/mmcv/utils/ext_loader.py
254
+ lavis/common/annotator/uniformer/mmcv/utils/logging.py
255
+ lavis/common/annotator/uniformer/mmcv/utils/misc.py
256
+ lavis/common/annotator/uniformer/mmcv/utils/parrots_jit.py
257
+ lavis/common/annotator/uniformer/mmcv/utils/parrots_wrapper.py
258
+ lavis/common/annotator/uniformer/mmcv/utils/path.py
259
+ lavis/common/annotator/uniformer/mmcv/utils/progressbar.py
260
+ lavis/common/annotator/uniformer/mmcv/utils/registry.py
261
+ lavis/common/annotator/uniformer/mmcv/utils/testing.py
262
+ lavis/common/annotator/uniformer/mmcv/utils/timer.py
263
+ lavis/common/annotator/uniformer/mmcv/utils/trace.py
264
+ lavis/common/annotator/uniformer/mmcv/utils/version_utils.py
265
+ lavis/common/annotator/uniformer/mmcv/video/__init__.py
266
+ lavis/common/annotator/uniformer/mmcv/video/io.py
267
+ lavis/common/annotator/uniformer/mmcv/video/optflow.py
268
+ lavis/common/annotator/uniformer/mmcv/video/processing.py
269
+ lavis/common/annotator/uniformer/mmcv/visualization/__init__.py
270
+ lavis/common/annotator/uniformer/mmcv/visualization/color.py
271
+ lavis/common/annotator/uniformer/mmcv/visualization/image.py
272
+ lavis/common/annotator/uniformer/mmcv/visualization/optflow.py
273
+ lavis/common/annotator/uniformer/mmcv_custom/__init__.py
274
+ lavis/common/annotator/uniformer/mmcv_custom/checkpoint.py
275
+ lavis/common/annotator/uniformer/mmseg/apis/__init__.py
276
+ lavis/common/annotator/uniformer/mmseg/apis/inference.py
277
+ lavis/common/annotator/uniformer/mmseg/apis/test.py
278
+ lavis/common/annotator/uniformer/mmseg/apis/train.py
279
+ lavis/common/annotator/uniformer/mmseg/core/__init__.py
280
+ lavis/common/annotator/uniformer/mmseg/core/evaluation/__init__.py
281
+ lavis/common/annotator/uniformer/mmseg/core/evaluation/class_names.py
282
+ lavis/common/annotator/uniformer/mmseg/core/evaluation/eval_hooks.py
283
+ lavis/common/annotator/uniformer/mmseg/core/evaluation/metrics.py
284
+ lavis/common/annotator/uniformer/mmseg/core/seg/__init__.py
285
+ lavis/common/annotator/uniformer/mmseg/core/seg/builder.py
286
+ lavis/common/annotator/uniformer/mmseg/core/seg/sampler/__init__.py
287
+ lavis/common/annotator/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py
288
+ lavis/common/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
289
+ lavis/common/annotator/uniformer/mmseg/core/utils/__init__.py
290
+ lavis/common/annotator/uniformer/mmseg/core/utils/misc.py
291
+ lavis/common/annotator/uniformer/mmseg/datasets/__init__.py
292
+ lavis/common/annotator/uniformer/mmseg/datasets/ade.py
293
+ lavis/common/annotator/uniformer/mmseg/datasets/builder.py
294
+ lavis/common/annotator/uniformer/mmseg/datasets/chase_db1.py
295
+ lavis/common/annotator/uniformer/mmseg/datasets/cityscapes.py
296
+ lavis/common/annotator/uniformer/mmseg/datasets/custom.py
297
+ lavis/common/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
298
+ lavis/common/annotator/uniformer/mmseg/datasets/drive.py
299
+ lavis/common/annotator/uniformer/mmseg/datasets/hrf.py
300
+ lavis/common/annotator/uniformer/mmseg/datasets/pascal_context.py
301
+ lavis/common/annotator/uniformer/mmseg/datasets/stare.py
302
+ lavis/common/annotator/uniformer/mmseg/datasets/voc.py
303
+ lavis/common/annotator/uniformer/mmseg/datasets/pipelines/__init__.py
304
+ lavis/common/annotator/uniformer/mmseg/datasets/pipelines/compose.py
305
+ lavis/common/annotator/uniformer/mmseg/datasets/pipelines/formating.py
306
+ lavis/common/annotator/uniformer/mmseg/datasets/pipelines/loading.py
307
+ lavis/common/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
308
+ lavis/common/annotator/uniformer/mmseg/datasets/pipelines/transforms.py
309
+ lavis/common/annotator/uniformer/mmseg/models/__init__.py
310
+ lavis/common/annotator/uniformer/mmseg/models/builder.py
311
+ lavis/common/annotator/uniformer/mmseg/models/backbones/__init__.py
312
+ lavis/common/annotator/uniformer/mmseg/models/backbones/cgnet.py
313
+ lavis/common/annotator/uniformer/mmseg/models/backbones/fast_scnn.py
314
+ lavis/common/annotator/uniformer/mmseg/models/backbones/hrnet.py
315
+ lavis/common/annotator/uniformer/mmseg/models/backbones/mobilenet_v2.py
316
+ lavis/common/annotator/uniformer/mmseg/models/backbones/mobilenet_v3.py
317
+ lavis/common/annotator/uniformer/mmseg/models/backbones/resnest.py
318
+ lavis/common/annotator/uniformer/mmseg/models/backbones/resnet.py
319
+ lavis/common/annotator/uniformer/mmseg/models/backbones/resnext.py
320
+ lavis/common/annotator/uniformer/mmseg/models/backbones/unet.py
321
+ lavis/common/annotator/uniformer/mmseg/models/backbones/uniformer.py
322
+ lavis/common/annotator/uniformer/mmseg/models/backbones/vit.py
323
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/__init__.py
324
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/ann_head.py
325
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/apc_head.py
326
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/aspp_head.py
327
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/cascade_decode_head.py
328
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/cc_head.py
329
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/da_head.py
330
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/decode_head.py
331
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/dm_head.py
332
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/dnl_head.py
333
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/ema_head.py
334
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/enc_head.py
335
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/fcn_head.py
336
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/fpn_head.py
337
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/gc_head.py
338
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/lraspp_head.py
339
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/nl_head.py
340
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/ocr_head.py
341
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/point_head.py
342
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/psa_head.py
343
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/psp_head.py
344
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/sep_aspp_head.py
345
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/sep_fcn_head.py
346
+ lavis/common/annotator/uniformer/mmseg/models/decode_heads/uper_head.py
347
+ lavis/common/annotator/uniformer/mmseg/models/losses/__init__.py
348
+ lavis/common/annotator/uniformer/mmseg/models/losses/accuracy.py
349
+ lavis/common/annotator/uniformer/mmseg/models/losses/cross_entropy_loss.py
350
+ lavis/common/annotator/uniformer/mmseg/models/losses/dice_loss.py
351
+ lavis/common/annotator/uniformer/mmseg/models/losses/lovasz_loss.py
352
+ lavis/common/annotator/uniformer/mmseg/models/losses/utils.py
353
+ lavis/common/annotator/uniformer/mmseg/models/necks/__init__.py
354
+ lavis/common/annotator/uniformer/mmseg/models/necks/fpn.py
355
+ lavis/common/annotator/uniformer/mmseg/models/necks/multilevel_neck.py
356
+ lavis/common/annotator/uniformer/mmseg/models/segmentors/__init__.py
357
+ lavis/common/annotator/uniformer/mmseg/models/segmentors/base.py
358
+ lavis/common/annotator/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py
359
+ lavis/common/annotator/uniformer/mmseg/models/segmentors/encoder_decoder.py
360
+ lavis/common/annotator/uniformer/mmseg/models/utils/__init__.py
361
+ lavis/common/annotator/uniformer/mmseg/models/utils/drop.py
362
+ lavis/common/annotator/uniformer/mmseg/models/utils/inverted_residual.py
363
+ lavis/common/annotator/uniformer/mmseg/models/utils/make_divisible.py
364
+ lavis/common/annotator/uniformer/mmseg/models/utils/res_layer.py
365
+ lavis/common/annotator/uniformer/mmseg/models/utils/se_layer.py
366
+ lavis/common/annotator/uniformer/mmseg/models/utils/self_attention_block.py
367
+ lavis/common/annotator/uniformer/mmseg/models/utils/up_conv_block.py
368
+ lavis/common/annotator/uniformer/mmseg/models/utils/weight_init.py
369
+ lavis/common/annotator/uniformer/mmseg/ops/__init__.py
370
+ lavis/common/annotator/uniformer/mmseg/ops/encoding.py
371
+ lavis/common/annotator/uniformer/mmseg/ops/wrappers.py
372
+ lavis/common/annotator/uniformer/mmseg/utils/__init__.py
373
+ lavis/common/annotator/uniformer/mmseg/utils/collect_env.py
374
+ lavis/common/annotator/uniformer/mmseg/utils/logger.py
375
+ lavis/common/vqa_tools/__init__.py
376
+ lavis/common/vqa_tools/vqa.py
377
+ lavis/common/vqa_tools/vqa_eval.py
378
+ lavis/configs/default.yaml
379
+ lavis/configs/datasets/aokvqa/defaults.yaml
380
+ lavis/configs/datasets/avsd/defaults_dial.yaml
381
+ lavis/configs/datasets/blip_diffusion_datasets/defaults.yaml
382
+ lavis/configs/datasets/coco/defaults_cap.yaml
383
+ lavis/configs/datasets/coco/defaults_ret.yaml
384
+ lavis/configs/datasets/coco/defaults_vqa.yaml
385
+ lavis/configs/datasets/coco/eval_vqa.yaml
386
+ lavis/configs/datasets/conceptual_caption/defaults_12m.yaml
387
+ lavis/configs/datasets/conceptual_caption/defaults_3m.yaml
388
+ lavis/configs/datasets/didemo/defaults_ret.yaml
389
+ lavis/configs/datasets/flickr30k/defaults.yaml
390
+ lavis/configs/datasets/gqa/balanced_testdev.yaml
391
+ lavis/configs/datasets/gqa/balanced_val.yaml
392
+ lavis/configs/datasets/gqa/defaults.yaml
393
+ lavis/configs/datasets/imagenet/defaults.yaml
394
+ lavis/configs/datasets/laion/defaults_2B_multi.yaml
395
+ lavis/configs/datasets/msrvtt/defaults_cap.yaml
396
+ lavis/configs/datasets/msrvtt/defaults_qa.yaml
397
+ lavis/configs/datasets/msrvtt/defaults_ret.yaml
398
+ lavis/configs/datasets/msvd/defaults_cap.yaml
399
+ lavis/configs/datasets/msvd/defaults_qa.yaml
400
+ lavis/configs/datasets/nlvr/defaults.yaml
401
+ lavis/configs/datasets/nocaps/defaults.yaml
402
+ lavis/configs/datasets/okvqa/defaults.yaml
403
+ lavis/configs/datasets/protein/GO_defaults_cap.yaml
404
+ lavis/configs/datasets/protein/defaults_cap.yaml
405
+ lavis/configs/datasets/sbu_caption/defaults.yaml
406
+ lavis/configs/datasets/snli_ve/defaults.yaml
407
+ lavis/configs/datasets/vatex/defaults_cap.yaml
408
+ lavis/configs/datasets/vg/defaults_caption.yaml
409
+ lavis/configs/datasets/vg/defaults_vqa.yaml
410
+ lavis/configs/models/albef_classification_ve.yaml
411
+ lavis/configs/models/albef_feature_extractor.yaml
412
+ lavis/configs/models/albef_nlvr.yaml
413
+ lavis/configs/models/albef_pretrain_base.yaml
414
+ lavis/configs/models/albef_retrieval_coco.yaml
415
+ lavis/configs/models/albef_retrieval_flickr.yaml
416
+ lavis/configs/models/albef_vqav2.yaml
417
+ lavis/configs/models/alpro_qa_msrvtt.yaml
418
+ lavis/configs/models/alpro_qa_msvd.yaml
419
+ lavis/configs/models/alpro_retrieval_didemo.yaml
420
+ lavis/configs/models/alpro_retrieval_msrvtt.yaml
421
+ lavis/configs/models/bert_config.json
422
+ lavis/configs/models/bert_config_alpro.json
423
+ lavis/configs/models/blip_caption_base_coco.yaml
424
+ lavis/configs/models/blip_caption_large_coco.yaml
425
+ lavis/configs/models/blip_classification_base.yaml
426
+ lavis/configs/models/blip_feature_extractor_base.yaml
427
+ lavis/configs/models/blip_itm_base.yaml
428
+ lavis/configs/models/blip_itm_large.yaml
429
+ lavis/configs/models/blip_nlvr.yaml
430
+ lavis/configs/models/blip_pretrain_base.yaml
431
+ lavis/configs/models/blip_pretrain_large.yaml
432
+ lavis/configs/models/blip_retrieval_coco.yaml
433
+ lavis/configs/models/blip_retrieval_flickr.yaml
434
+ lavis/configs/models/blip_vqa_aokvqa.yaml
435
+ lavis/configs/models/blip_vqa_okvqa.yaml
436
+ lavis/configs/models/blip_vqav2.yaml
437
+ lavis/configs/models/clip_resnet50.yaml
438
+ lavis/configs/models/clip_vit_base16.yaml
439
+ lavis/configs/models/clip_vit_base32.yaml
440
+ lavis/configs/models/clip_vit_large14.yaml
441
+ lavis/configs/models/clip_vit_large14_336.yaml
442
+ lavis/configs/models/gpt_dialogue_base.yaml
443
+ lavis/configs/models/med_config.json
444
+ lavis/configs/models/med_config_albef.json
445
+ lavis/configs/models/med_large_config.json
446
+ lavis/configs/models/blip-diffusion/blip_diffusion_base.yaml
447
+ lavis/configs/models/blip-diffusion/blip_diffusion_controlnet_canny.yaml
448
+ lavis/configs/models/blip-diffusion/blip_diffusion_controlnet_depth.yaml
449
+ lavis/configs/models/blip-diffusion/blip_diffusion_controlnet_hed.yaml
450
+ lavis/configs/models/blip2/blip2_caption_flant5xl.yaml
451
+ lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml
452
+ lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml
453
+ lavis/configs/models/blip2/blip2_coco.yaml
454
+ lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml
455
+ lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml
456
+ lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml
457
+ lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml
458
+ lavis/configs/models/blip2/blip2_pretrain.yaml
459
+ lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml
460
+ lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml
461
+ lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml
462
+ lavis/configs/models/blip2/blip2_pretrain_llama7b.yaml
463
+ lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml
464
+ lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml
465
+ lavis/configs/models/blip2/blip2_pretrain_vitL.yaml
466
+ lavis/configs/models/blip2/pretrain_protein_opt2.7b.yaml
467
+ lavis/configs/models/blip2/pretrain_protein_opt350m.yaml
468
+ lavis/configs/models/clip/RN101-quickgelu.json
469
+ lavis/configs/models/clip/RN101.json
470
+ lavis/configs/models/clip/RN50-quickgelu.json
471
+ lavis/configs/models/clip/RN50.json
472
+ lavis/configs/models/clip/RN50x16.json
473
+ lavis/configs/models/clip/RN50x4.json
474
+ lavis/configs/models/clip/ViT-B-16-plus-240.json
475
+ lavis/configs/models/clip/ViT-B-16-plus.json
476
+ lavis/configs/models/clip/ViT-B-16.json
477
+ lavis/configs/models/clip/ViT-B-32-plus-256.json
478
+ lavis/configs/models/clip/ViT-B-32-quickgelu.json
479
+ lavis/configs/models/clip/ViT-B-32.json
480
+ lavis/configs/models/clip/ViT-H-14.json
481
+ lavis/configs/models/clip/ViT-H-16.json
482
+ lavis/configs/models/clip/ViT-L-14-280.json
483
+ lavis/configs/models/clip/ViT-L-14-336.json
484
+ lavis/configs/models/clip/ViT-L-14.json
485
+ lavis/configs/models/clip/ViT-L-16-320.json
486
+ lavis/configs/models/clip/ViT-L-16.json
487
+ lavis/configs/models/clip/ViT-g-14.json
488
+ lavis/configs/models/clip/timm-efficientnetv2_rw_s.json
489
+ lavis/configs/models/clip/timm-resnet50d.json
490
+ lavis/configs/models/clip/timm-resnetaa50d.json
491
+ lavis/configs/models/clip/timm-resnetblur50.json
492
+ lavis/configs/models/clip/timm-swin_base_patch4_window7_224.json
493
+ lavis/configs/models/clip/timm-vit_base_patch16_224.json
494
+ lavis/configs/models/clip/timm-vit_base_patch32_224.json
495
+ lavis/configs/models/clip/timm-vit_small_patch16_224.json
496
+ lavis/configs/models/img2prompt-vqa/img2prompt_vqa_base.yaml
497
+ lavis/configs/models/pnp-vqa/pnp_vqa_3b.yaml
498
+ lavis/configs/models/pnp-vqa/pnp_vqa_base.yaml
499
+ lavis/configs/models/pnp-vqa/pnp_vqa_large.yaml
500
+ lavis/configs/models/pnp-vqa/unifiedqav2_3b_config.json
501
+ lavis/configs/models/pnp-vqa/unifiedqav2_base_config.json
502
+ lavis/configs/models/pnp-vqa/unifiedqav2_large_config.json
503
+ lavis/datasets/data_utils.py
504
+ lavis/datasets/builders/__init__.py
505
+ lavis/datasets/builders/base_dataset_builder.py
506
+ lavis/datasets/builders/caption_builder.py
507
+ lavis/datasets/builders/classification_builder.py
508
+ lavis/datasets/builders/dialogue_builder.py
509
+ lavis/datasets/builders/image_text_pair_builder.py
510
+ lavis/datasets/builders/imagefolder_builder.py
511
+ lavis/datasets/builders/retrieval_builder.py
512
+ lavis/datasets/builders/text_to_image_generation_builder.py
513
+ lavis/datasets/builders/video_qa_builder.py
514
+ lavis/datasets/builders/vqa_builder.py
515
+ lavis/datasets/datasets/aok_vqa_datasets.py
516
+ lavis/datasets/datasets/avsd_dialogue_datasets.py
517
+ lavis/datasets/datasets/base_dataset.py
518
+ lavis/datasets/datasets/caption_datasets.py
519
+ lavis/datasets/datasets/coco_caption_datasets.py
520
+ lavis/datasets/datasets/coco_vqa_datasets.py
521
+ lavis/datasets/datasets/dataloader_utils.py
522
+ lavis/datasets/datasets/dialogue_datasets.py
523
+ lavis/datasets/datasets/gqa_datasets.py
524
+ lavis/datasets/datasets/image_text_pair_datasets.py
525
+ lavis/datasets/datasets/imagefolder_dataset.py
526
+ lavis/datasets/datasets/laion_dataset.py
527
+ lavis/datasets/datasets/multimodal_classification_datasets.py
528
+ lavis/datasets/datasets/nlvr_datasets.py
529
+ lavis/datasets/datasets/retrieval_datasets.py
530
+ lavis/datasets/datasets/snli_ve_datasets.py
531
+ lavis/datasets/datasets/subject_driven_t2i_dataset.py
532
+ lavis/datasets/datasets/vg_vqa_datasets.py
533
+ lavis/datasets/datasets/video_caption_datasets.py
534
+ lavis/datasets/datasets/video_vqa_datasets.py
535
+ lavis/datasets/datasets/vqa_datasets.py
536
+ lavis/models/__init__.py
537
+ lavis/models/base_model.py
538
+ lavis/models/clip_vit.py
539
+ lavis/models/eva_vit.py
540
+ lavis/models/med.py
541
+ lavis/models/vit.py
542
+ lavis/models/albef_models/__init__.py
543
+ lavis/models/albef_models/albef_classification.py
544
+ lavis/models/albef_models/albef_feature_extractor.py
545
+ lavis/models/albef_models/albef_nlvr.py
546
+ lavis/models/albef_models/albef_outputs.py
547
+ lavis/models/albef_models/albef_pretrain.py
548
+ lavis/models/albef_models/albef_retrieval.py
549
+ lavis/models/albef_models/albef_vqa.py
550
+ lavis/models/alpro_models/__init__.py
551
+ lavis/models/alpro_models/alpro_outputs.py
552
+ lavis/models/alpro_models/alpro_qa.py
553
+ lavis/models/alpro_models/alpro_retrieval.py
554
+ lavis/models/blip2_models/Qformer.py
555
+ lavis/models/blip2_models/__init__.py
556
+ lavis/models/blip2_models/blip2.py
557
+ lavis/models/blip2_models/blip2_image_text_matching.py
558
+ lavis/models/blip2_models/blip2_opt.py
559
+ lavis/models/blip2_models/blip2_qformer.py
560
+ lavis/models/blip2_models/blip2_t5.py
561
+ lavis/models/blip2_models/blip2_t5_instruct.py
562
+ lavis/models/blip2_models/blip2_vicuna_instruct.py
563
+ lavis/models/blip2_models/modeling_llama.py
564
+ lavis/models/blip2_models/modeling_opt.py
565
+ lavis/models/blip2_models/modeling_t5.py
566
+ lavis/models/blip_diffusion_models/__init__.py
567
+ lavis/models/blip_diffusion_models/blip_diffusion.py
568
+ lavis/models/blip_diffusion_models/modeling_ctx_clip.py
569
+ lavis/models/blip_diffusion_models/ptp_utils.py
570
+ lavis/models/blip_diffusion_models/utils.py
571
+ lavis/models/blip_models/__init__.py
572
+ lavis/models/blip_models/blip.py
573
+ lavis/models/blip_models/blip_caption.py
574
+ lavis/models/blip_models/blip_classification.py
575
+ lavis/models/blip_models/blip_feature_extractor.py
576
+ lavis/models/blip_models/blip_image_text_matching.py
577
+ lavis/models/blip_models/blip_nlvr.py
578
+ lavis/models/blip_models/blip_outputs.py
579
+ lavis/models/blip_models/blip_pretrain.py
580
+ lavis/models/blip_models/blip_retrieval.py
581
+ lavis/models/blip_models/blip_vqa.py
582
+ lavis/models/blip_models/nlvr_encoder.py
583
+ lavis/models/clip_models/__init__.py
584
+ lavis/models/clip_models/bpe_simple_vocab_16e6.txt.gz
585
+ lavis/models/clip_models/clip_outputs.py
586
+ lavis/models/clip_models/loss.py
587
+ lavis/models/clip_models/model.py
588
+ lavis/models/clip_models/pretrained.py
589
+ lavis/models/clip_models/timm_model.py
590
+ lavis/models/clip_models/tokenizer.py
591
+ lavis/models/clip_models/transform.py
592
+ lavis/models/clip_models/utils.py
593
+ lavis/models/gpt_models/gpt_dialogue.py
594
+ lavis/models/img2prompt_models/__init__.py
595
+ lavis/models/img2prompt_models/img2prompt_vqa.py
596
+ lavis/models/pnp_vqa_models/__init__.py
597
+ lavis/models/pnp_vqa_models/pnp_unifiedqav2_fid.py
598
+ lavis/models/pnp_vqa_models/pnp_vqa.py
599
+ lavis/models/timesformer/__init__.py
600
+ lavis/models/timesformer/conv2d_same.py
601
+ lavis/models/timesformer/features.py
602
+ lavis/models/timesformer/helpers.py
603
+ lavis/models/timesformer/linear.py
604
+ lavis/models/timesformer/vit.py
605
+ lavis/models/timesformer/vit_utils.py
606
+ lavis/processors/__init__.py
607
+ lavis/processors/alpro_processors.py
608
+ lavis/processors/base_processor.py
609
+ lavis/processors/blip_diffusion_processors.py
610
+ lavis/processors/blip_processors.py
611
+ lavis/processors/clip_processors.py
612
+ lavis/processors/functional_video.py
613
+ lavis/processors/gpt_processors.py
614
+ lavis/processors/randaugment.py
615
+ lavis/processors/transforms_video.py
616
+ lavis/projects/albef/eval/nlvr_eval.yaml
617
+ lavis/projects/albef/eval/ret_coco_eval.yaml
618
+ lavis/projects/albef/eval/ret_flickr30k_eval.yaml
619
+ lavis/projects/albef/eval/snli_ve_eval.yaml
620
+ lavis/projects/albef/eval/vqa_test.yaml
621
+ lavis/projects/albef/eval/vqa_val.yaml
622
+ lavis/projects/albef/train/aokvqa_ft.yaml
623
+ lavis/projects/albef/train/nlvr_ft.yaml
624
+ lavis/projects/albef/train/okvqa_ft.yaml
625
+ lavis/projects/albef/train/pretrain.yaml
626
+ lavis/projects/albef/train/ret_coco_ft.yaml
627
+ lavis/projects/albef/train/ret_flickr30k_ft.yaml
628
+ lavis/projects/albef/train/snli_ve_ft.yaml
629
+ lavis/projects/albef/train/vqa_ft.yaml
630
+ lavis/projects/alpro/eval/didemo_ret_eval.yaml
631
+ lavis/projects/alpro/eval/msrvtt_qa_eval.yaml
632
+ lavis/projects/alpro/eval/msrvtt_ret_eval.yaml
633
+ lavis/projects/alpro/eval/msvd_qa_eval.yaml
634
+ lavis/projects/alpro/train/didemo_ret_ft.yaml
635
+ lavis/projects/alpro/train/msrvtt_qa_ft.yaml
636
+ lavis/projects/alpro/train/msrvtt_retrieval_ft.yaml
637
+ lavis/projects/alpro/train/msvd_qa_ft.yaml
638
+ lavis/projects/blip/coco_cap_ft_iter.yaml
639
+ lavis/projects/blip/eval/aokvqa_eval.yaml
640
+ lavis/projects/blip/eval/caption_coco_eval.yaml
641
+ lavis/projects/blip/eval/caption_coco_eval_large.yaml
642
+ lavis/projects/blip/eval/nlvr_eval.yaml
643
+ lavis/projects/blip/eval/nocaps_eval.yaml
644
+ lavis/projects/blip/eval/okvqa_eval.yaml
645
+ lavis/projects/blip/eval/ret_coco_eval.yaml
646
+ lavis/projects/blip/eval/ret_flickr_eval.yaml
647
+ lavis/projects/blip/eval/vqav2_eval.yaml
648
+ lavis/projects/blip/train/aokvqa_ft.yaml
649
+ lavis/projects/blip/train/caption_coco_ft.yaml
650
+ lavis/projects/blip/train/caption_coco_large_ft.yaml
651
+ lavis/projects/blip/train/nlvr_ft.yaml
652
+ lavis/projects/blip/train/okvqa_ft.yaml
653
+ lavis/projects/blip/train/pretrain_14m.yaml
654
+ lavis/projects/blip/train/retrieval_coco_ft.yaml
655
+ lavis/projects/blip/train/retrieval_flickr_ft.yaml
656
+ lavis/projects/blip/train/vqav2_ft.yaml
657
+ lavis/projects/blip2/eval/caption_coco_flant5xl_eval.yaml
658
+ lavis/projects/blip2/eval/caption_coco_opt2.7b_eval.yaml
659
+ lavis/projects/blip2/eval/caption_coco_opt6.7b_eval.yaml
660
+ lavis/projects/blip2/eval/caption_protein_opt2.7b_eval.yaml
661
+ lavis/projects/blip2/eval/gqa_zeroshot_flant5xl_eval.yaml
662
+ lavis/projects/blip2/eval/okvqa_zeroshot_flant5xl_eval.yaml
663
+ lavis/projects/blip2/eval/ret_coco_eval.yaml
664
+ lavis/projects/blip2/eval/ret_flickr_eval.yaml
665
+ lavis/projects/blip2/eval/vqav2_zeroshot_flant5xl_eval.yaml
666
+ lavis/projects/blip2/eval/vqav2_zeroshot_opt_eval.yaml
667
+ lavis/projects/blip2/train/caption_coco_ft.yaml
668
+ lavis/projects/blip2/train/gptProcessed_test_stage1.yaml
669
+ lavis/projects/blip2/train/pretrain_stage1.yaml
670
+ lavis/projects/blip2/train/pretrain_stage2.yaml
671
+ lavis/projects/blip2/train/retrieval_coco_ft.yaml
672
+ lavis/projects/blip2/train/test_stage1.yaml
673
+ lavis/projects/blip2/train/test_stage2.yaml
674
+ lavis/projects/blip_diffusion/finetune-db-dog.yaml
675
+ lavis/projects/blip_diffusion/finetune-db-pink-dress.yaml
676
+ lavis/projects/blip_diffusion/finetune-db-shein-jacket.yaml
677
+ lavis/projects/blip_diffusion/finetune-db-template.yaml
678
+ lavis/projects/clip/exp_coco_ret_eval.yaml
679
+ lavis/projects/clip/exp_flickr_ret_eval.yaml
680
+ lavis/projects/clip/exp_imnet_zs_eval.yaml
681
+ lavis/projects/gpt/eval/dialogue_avsd_eval.yaml
682
+ lavis/projects/gpt/train/dialogue_avsd_ft.yaml
683
+ lavis/projects/pnp-vqa/eval/gqa_eval.yaml
684
+ lavis/projects/pnp-vqa/eval/gqa_eval_3b.yaml
685
+ lavis/projects/pnp-vqa/eval/gqa_eval_large.yaml
686
+ lavis/projects/pnp-vqa/eval/okvqa_eval.yaml
687
+ lavis/projects/pnp-vqa/eval/okvqa_eval_3b.yaml
688
+ lavis/projects/pnp-vqa/eval/okvqa_eval_large.yaml
689
+ lavis/projects/pnp-vqa/eval/vqav2_eval.yaml
690
+ lavis/projects/pnp-vqa/eval/vqav2_eval_3b.yaml
691
+ lavis/projects/pnp-vqa/eval/vqav2_eval_large.yaml
692
+ lavis/projects/pnp-vqa/eval/vqav2_test_eval.yaml
693
+ lavis/projects/pnp-vqa/eval/vqav2_test_eval_3b.yaml
694
+ lavis/projects/pnp-vqa/eval/vqav2_test_eval_large.yaml
695
+ lavis/runners/__init__.py
696
+ lavis/runners/runner_base.py
697
+ lavis/runners/runner_iter.py
698
+ lavis/tasks/__init__.py
699
+ lavis/tasks/base_task.py
700
+ lavis/tasks/captioning.py
701
+ lavis/tasks/dialogue.py
702
+ lavis/tasks/image_text_pretrain.py
703
+ lavis/tasks/multimodal_classification.py
704
+ lavis/tasks/retrieval.py
705
+ lavis/tasks/text_to_image_generation.py
706
+ lavis/tasks/vqa.py
707
+ lavis/tasks/vqa_reading_comprehension.py
708
+ projects/img2llm-vqa/img2llm_vqa.py
709
+ projects/instructblip/run_demo.py
710
+ salesforce_lavis.egg-info/PKG-INFO
711
+ salesforce_lavis.egg-info/SOURCES.txt
712
+ salesforce_lavis.egg-info/dependency_links.txt
713
+ salesforce_lavis.egg-info/not-zip-safe
714
+ salesforce_lavis.egg-info/requires.txt
715
+ salesforce_lavis.egg-info/top_level.txt
salesforce_lavis.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
salesforce_lavis.egg-info/not-zip-safe ADDED
@@ -0,0 +1 @@
 
 
1
+
salesforce_lavis.egg-info/requires.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ contexttimer
2
+ decord
3
+ diffusers<=0.16.0
4
+ einops>=0.4.1
5
+ fairscale==0.4.4
6
+ ftfy
7
+ iopath
8
+ ipython
9
+ omegaconf
10
+ opencv-python-headless==4.5.5.64
11
+ opendatasets
12
+ packaging
13
+ pandas
14
+ plotly
15
+ pre-commit
16
+ pycocoevalcap
17
+ pycocotools
18
+ python-magic
19
+ scikit-image
20
+ sentencepiece
21
+ spacy
22
+ streamlit
23
+ timm==0.4.12
24
+ torch>=1.10.0
25
+ torchvision
26
+ tqdm
27
+ transformers>=4.28.0
28
+ webdataset
29
+ wheel
salesforce_lavis.egg-info/top_level.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ app
2
+ assets
3
+ data
4
+ dataset_card
5
+ docs
6
+ examples
7
+ lavis
8
+ projects
9
+ protein
10
+ run_scripts
setup.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ from setuptools import setup, find_namespace_packages
9
+ import platform
10
+
11
+ DEPENDENCY_LINKS = []
12
+ if platform.system() == "Windows":
13
+ DEPENDENCY_LINKS.append("https://download.pytorch.org/whl/torch_stable.html")
14
+
15
+
16
+ def fetch_requirements(filename):
17
+ with open(filename) as f:
18
+ return [ln.strip() for ln in f.read().split("\n")]
19
+
20
+
21
+ setup(
22
+ name="salesforce-lavis",
23
+ version="1.0.1",
24
+ author="Dongxu Li, Junnan Li, Hung Le, Guangsen Wang, Silvio Savarese, Steven C.H. Hoi",
25
+ description="LAVIS - A One-stop Library for Language-Vision Intelligence",
26
+ long_description=open("README.md", "r", encoding="utf-8").read(),
27
+ long_description_content_type="text/markdown",
28
+ keywords="Vision-Language, Multimodal, Image Captioning, Generative AI, Deep Learning, Library, PyTorch",
29
+ license="3-Clause BSD",
30
+ packages=find_namespace_packages(include="lavis.*"),
31
+ install_requires=fetch_requirements("requirements.txt"),
32
+ python_requires=">=3.7.0",
33
+ include_package_data=True,
34
+ dependency_links=DEPENDENCY_LINKS,
35
+ zip_safe=False,
36
+ )
train.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import argparse
9
+ import os
10
+ import random
11
+
12
+ import numpy as np
13
+ import torch
14
+ import torch.backends.cudnn as cudnn
15
+
16
+ import lavis.tasks as tasks
17
+ from lavis.common.config import Config
18
+ from lavis.common.dist_utils import get_rank, init_distributed_mode
19
+ from lavis.common.logger import setup_logger
20
+ from lavis.common.optims import (
21
+ LinearWarmupCosineLRScheduler,
22
+ LinearWarmupStepLRScheduler,
23
+ )
24
+ from lavis.common.registry import registry
25
+ from lavis.common.utils import now
26
+
27
+ # imports modules for registration
28
+ from lavis.datasets.builders import *
29
+ from lavis.models import *
30
+ from lavis.processors import *
31
+ from lavis.runners import *
32
+ from lavis.tasks import *
33
+
34
+
35
+ def parse_args():
36
+ parser = argparse.ArgumentParser(description="Training")
37
+
38
+ parser.add_argument("--cfg-path", required=True, help="path to configuration file.")
39
+ parser.add_argument(
40
+ "--options",
41
+ nargs="+",
42
+ help="override some settings in the used config, the key-value pair "
43
+ "in xxx=yyy format will be merged into config file (deprecate), "
44
+ "change to --cfg-options instead.",
45
+ )
46
+
47
+ args = parser.parse_args()
48
+ # if 'LOCAL_RANK' not in os.environ:
49
+ # os.environ['LOCAL_RANK'] = str(args.local_rank)
50
+
51
+ return args
52
+
53
+
54
+ def setup_seeds(config):
55
+ seed = config.run_cfg.seed + get_rank()
56
+
57
+ random.seed(seed)
58
+ np.random.seed(seed)
59
+ torch.manual_seed(seed)
60
+
61
+ cudnn.benchmark = False
62
+ cudnn.deterministic = True
63
+
64
+
65
+ def get_runner_class(cfg):
66
+ """
67
+ Get runner class from config. Default to epoch-based runner.
68
+ """
69
+ runner_cls = registry.get_runner_class(cfg.run_cfg.get("runner", "runner_base"))
70
+
71
+ return runner_cls
72
+
73
+
74
+ def main():
75
+ # allow auto-dl completes on main process without timeout when using NCCL backend.
76
+ # os.environ["NCCL_BLOCKING_WAIT"] = "1"
77
+
78
+ # set before init_distributed_mode() to ensure the same job_id shared across all ranks.
79
+ job_id = now()
80
+
81
+ cfg = Config(parse_args())
82
+
83
+ init_distributed_mode(cfg.run_cfg)
84
+
85
+ setup_seeds(cfg)
86
+
87
+ # set after init_distributed_mode() to only log on master.
88
+ setup_logger()
89
+
90
+ cfg.pretty_print()
91
+
92
+ task = tasks.setup_task(cfg)
93
+ datasets = task.build_datasets(cfg)
94
+ model = task.build_model(cfg)
95
+
96
+ runner = get_runner_class(cfg)(
97
+ cfg=cfg, job_id=job_id, task=task, model=model, datasets=datasets
98
+ )
99
+ runner.train()
100
+
101
+
102
+ if __name__ == "__main__":
103
+ main()