File size: 1,234 Bytes
85ab89d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
import logging
import warnings

from minigpt4.common.registry import registry
from minigpt4.datasets.builders.pdb_base_dataset_builder import PDB_BaseDatasetBuilder
from minigpt4.datasets.datasets.pdb_dataset import ESMDataset

@registry.register_builder("pdb")
class PDBBuilder(PDB_BaseDatasetBuilder):
    train_dataset_cls = ESMDataset

    DATASET_CONFIG_DICT = {
        "default": "configs/datasets/pdb/pdb.yaml",
    }

    def build_datasets(self):
        # at this point, all the annotations and image/videos should be all downloaded to the specified locations.
        logging.info("Building datasets...")
        self.build_processors()

        build_info = self.config.build_info
        storage_path = build_info.storage

        datasets = dict()

        if not os.path.exists(storage_path):
            warnings.warn("storage path {} does not exist.".format(storage_path))

        # create datasets
        dataset_cls = self.train_dataset_cls
        datasets['train'] = dataset_cls(
            text_processor=self.text_processors["train"],
            ann_paths=[os.path.join(storage_path, 'filter_cap.json')],
            pdb_root=os.path.join(storage_path, 'pdb'),
        )

        return datasets