Spaces:

shikunl
/

prismer

Runtime error

App Files Files Community

shikunl commited on Mar 12, 2023

Commit

3cce9aa

•

1 Parent(s): bd0d673

Fix path

Browse files

Files changed (7) hide show

prismer/dataset/__init__.py +1 -12
prismer/dataset/caption_dataset.py +2 -2
prismer/dataset/classification_dataset.py +0 -72
prismer/dataset/pretrain_dataset.py +0 -73
prismer/dataset/utils.py +9 -5
prismer/model/prismer.py +4 -1
prismer_model.py +7 -7

prismer/dataset/__init__.py CHANGED Viewed

@@ -6,18 +6,12 @@
 from torch.utils.data import DataLoader
-from dataset.pretrain_dataset import Pretrain
 from dataset.vqa_dataset import VQA
 from dataset.caption_dataset import Caption
-from dataset.classification_dataset import Classification
 def create_dataset(dataset, config):
-    if dataset == 'pretrain':
-        dataset = Pretrain(config)
-        return dataset
-    elif dataset == 'vqa':
         train_dataset = VQA(config, train=True)
         test_dataset = VQA(config, train=False)
         return train_dataset, test_dataset
@@ -26,11 +20,6 @@ def create_dataset(dataset, config):
         train_dataset = Caption(config, train=True)
         test_dataset = Caption(config, train=False)
         return train_dataset, test_dataset
-    elif dataset == 'classification':
-        train_dataset = Classification(config, train=True)
-        test_dataset = Classification(config, train=False)
-        return train_dataset, test_dataset
 def create_loader(dataset, batch_size, num_workers, train, collate_fn=None):

 from torch.utils.data import DataLoader
 from dataset.vqa_dataset import VQA
 from dataset.caption_dataset import Caption
 def create_dataset(dataset, config):
+    if dataset == 'vqa':
         train_dataset = VQA(config, train=True)
         test_dataset = VQA(config, train=False)
         return train_dataset, test_dataset
         train_dataset = Caption(config, train=True)
         test_dataset = Caption(config, train=False)
         return train_dataset, test_dataset
 def create_loader(dataset, batch_size, num_workers, train, collate_fn=None):

prismer/dataset/caption_dataset.py CHANGED Viewed

@@ -7,7 +7,7 @@
 import glob
 from torch.utils.data import Dataset
-from dataset.utils import *
 from PIL import ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
@@ -50,7 +50,7 @@ class Caption(Dataset):
         elif self.dataset == 'demo':
             img_path_split = self.data_list[index]['image'].split('/')
             img_name = img_path_split[-2] + '/' + img_path_split[-1]
-            image, labels, labels_info = get_expert_labels('', self.label_path, img_name, 'helpers', self.experts)
         experts = self.transform(image, labels)
         experts = post_label_process(experts, labels_info)

 import glob
 from torch.utils.data import Dataset
+from prismer.dataset.utils import *
 from PIL import ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
         elif self.dataset == 'demo':
             img_path_split = self.data_list[index]['image'].split('/')
             img_name = img_path_split[-2] + '/' + img_path_split[-1]
+            image, labels, labels_info = get_expert_labels('prismer', self.label_path, img_name, 'helpers', self.experts)
         experts = self.transform(image, labels)
         experts = post_label_process(experts, labels_info)

prismer/dataset/classification_dataset.py DELETED Viewed

@@ -1,72 +0,0 @@
-# Copyright (c) 2023, NVIDIA Corporation & Affiliates. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://github.com/NVlabs/prismer/blob/main/LICENSE
-import glob
-from torch.utils.data import Dataset
-from dataset.utils import *
-class Classification(Dataset):
-    def __init__(self, config, train):
-        self.data_path = config['data_path']
-        self.label_path = config['label_path']
-        self.experts = config['experts']
-        self.dataset = config['dataset']
-        self.shots = config['shots']
-        self.prefix = config['prefix']
-        self.train = train
-        self.transform = Transform(resize_resolution=config['image_resolution'], scale_size=[0.5, 1.0], train=True)
-        if train:
-            data_folders = glob.glob(f'{self.data_path}/imagenet_train/*/')
-            self.data_list = [{'image': data} for f in data_folders for data in glob.glob(f + '*.JPEG')[:self.shots]]
-            self.answer_list = json.load(open(f'{self.data_path}/imagenet/' + 'imagenet_answer.json'))
-            self.class_list = json.load(open(f'{self.data_path}/imagenet/' + 'imagenet_class.json'))
-        else:
-            data_folders = glob.glob(f'{self.data_path}/imagenet/*/')
-            self.data_list = [{'image': data} for f in data_folders for data in glob.glob(f + '*.JPEG')]
-            self.answer_list = json.load(open(f'{self.data_path}/imagenet/' + 'imagenet_answer.json'))
-            self.class_list = json.load(open(f'{self.data_path}/imagenet/' + 'imagenet_class.json'))
-    def __len__(self):
-        return len(self.data_list)
-    def __getitem__(self, index):
-        img_path = self.data_list[index]['image']
-        if self.train:
-            img_path_split = img_path.split('/')
-            img_name = img_path_split[-2] + '/' + img_path_split[-1]
-            class_name = img_path_split[-2]
-            image, labels, labels_info = get_expert_labels(self.data_path, self.label_path, img_name, 'imagenet_train', self.experts)
-        else:
-            img_path_split = img_path.split('/')
-            img_name = img_path_split[-2] + '/' + img_path_split[-1]
-            class_name = img_path_split[-2]
-            image, labels, labels_info = get_expert_labels(self.data_path, self.label_path, img_name, 'imagenet', self.experts)
-        experts = self.transform(image, labels)
-        experts = post_label_process(experts, labels_info)
-        if self.train:
-            caption = self.prefix + ' ' + self.answer_list[int(self.class_list[class_name])].lower()
-            return experts, caption
-        else:
-            return experts, self.class_list[class_name]
-# import os
-# import glob
-#
-# data_path = '/Users/shikunliu/Documents/dataset/mscoco/mscoco'
-#
-# data_folders = glob.glob(f'{data_path}/*/')
-# data_list = [data for f in data_folders for data in glob.glob(f + '*.jpg')]

prismer/dataset/pretrain_dataset.py DELETED Viewed

@@ -1,73 +0,0 @@
-# Copyright (c) 2023, NVIDIA Corporation & Affiliates. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://github.com/NVlabs/prismer/blob/main/LICENSE
-import glob
-from torch.utils.data import Dataset
-from dataset.utils import *
-class Pretrain(Dataset):
-    def __init__(self, config):
-        self.cc12m_data_path = config['cc12m_data_path']
-        self.cc3m_data_path = config['cc3m_data_path']
-        self.coco_data_path = config['coco_data_path']
-        self.vg_data_path = config['vg_data_path']
-        self.label_path = config['label_path']
-        self.experts = config['experts']
-        self.data_list = []
-        if 'cc12m' in config['datasets']:
-            data_folders = glob.glob(f'{self.cc12m_data_path}/cc12m/*/')
-            self.data_list += [{'image': data} for f in data_folders for data in glob.glob(f + '*.jpg')]
-        if 'cc3m_sgu' in config['datasets']:
-            data_folders = glob.glob(f'{self.cc3m_data_path}/cc3m_sgu/*/')
-            self.data_list += [{'image': data} for f in data_folders for data in glob.glob(f + '*.jpg')]
-        if 'coco' in config['datasets']:
-            self.data_list += json.load(open(os.path.join(self.coco_data_path, 'coco_karpathy_train.json'), 'r'))
-        if 'vg' in config['datasets']:
-            self.data_list += json.load(open(os.path.join(self.vg_data_path, 'vg_caption.json'), 'r'))
-        self.transform = Transform(resize_resolution=config['image_resolution'], scale_size=[0.5, 1.5], train=True)
-    def __len__(self):
-        return len(self.data_list)
-    def __getitem__(self, index):
-        img_path = self.data_list[index]['image']
-        if 'cc12m' in img_path:
-            img_path_split = img_path.split('/')
-            img_name = img_path_split[-2] + '/' + img_path_split[-1]
-            image, labels, labels_info = get_expert_labels(self.cc12m_data_path, self.label_path, img_name, 'cc12m', self.experts)
-            caption_path = img_path.replace('.jpg', '.txt')
-            with open(caption_path) as f:
-                caption = f.readlines()[0]
-        elif 'cc3m_sgu' in img_path:
-            img_path_split = img_path.split('/')
-            img_name = img_path_split[-2] + '/' + img_path_split[-1]
-            image, labels, labels_info = get_expert_labels(self.cc3m_data_path, self.label_path, img_name, 'cc3m_sgu', self.experts)
-            caption_path = img_path.replace('.jpg', '.txt')
-            with open(caption_path) as f:
-                caption = f.readlines()[0]
-        elif 'train2014' in img_path or 'val2014' in img_path:
-            image, labels, labels_info = get_expert_labels(self.coco_data_path, self.label_path, img_path, 'vqav2', self.experts)
-            caption = self.data_list[index]['caption']
-        elif 'visual-genome' in img_path:
-            img_path_split = img_path.split('/')
-            img_name = img_path_split[-2] + '/' + img_path_split[-1]
-            image, labels, labels_info = get_expert_labels(self.vg_data_path, self.label_path, img_name, 'vg', self.experts)
-            caption = self.data_list[index]['caption']
-        experts = self.transform(image, labels)
-        experts = post_label_process(experts, labels_info)
-        caption = pre_caption(caption, max_words=30)
-        return experts, caption

prismer/dataset/utils.py CHANGED Viewed

@@ -12,12 +12,16 @@ import PIL.Image as Image
 import numpy as np
 import torchvision.transforms as transforms
 import torchvision.transforms.functional as transforms_f
-from dataset.randaugment import RandAugment
-COCO_FEATURES = torch.load('dataset/coco_features.pt')['features']
-ADE_FEATURES = torch.load('dataset/ade_features.pt')['features']
-DETECTION_FEATURES = torch.load('dataset/detection_features.pt')['features']
-BACKGROUND_FEATURES = torch.load('dataset/background_features.pt')
 class Transform:

 import numpy as np
 import torchvision.transforms as transforms
 import torchvision.transforms.functional as transforms_f
+import pathlib
+from prismer.dataset.randaugment import RandAugment
+cur_dir = pathlib.Path(__file__).parent
+COCO_FEATURES = torch.load(cur_dir / 'coco_features.pt')['features']
+ADE_FEATURES = torch.load(cur_dir / 'ade_features.pt')['features']
+DETECTION_FEATURES = torch.load(cur_dir / 'detection_features.pt')['features']
+BACKGROUND_FEATURES = torch.load(cur_dir / 'background_features.pt')
 class Transform:

prismer/model/prismer.py CHANGED Viewed

@@ -5,12 +5,15 @@
 # https://github.com/NVlabs/prismer/blob/main/LICENSE
 import json
 import torch.nn as nn
 from model.modules.vit import load_encoder
 from model.modules.roberta import load_decoder
 from transformers import RobertaTokenizer, RobertaConfig
 class Prismer(nn.Module):
     def __init__(self, config):
@@ -26,7 +29,7 @@ class Prismer(nn.Module):
             elif exp in ['obj_detection', 'ocr_detection']:
                 self.experts[exp] = 64
-        prismer_config = json.load(open('configs/prismer.json', 'r'))[config['prismer_model']]
         roberta_config = RobertaConfig.from_dict(prismer_config['roberta_model'])
         self.tokenizer = RobertaTokenizer.from_pretrained(prismer_config['roberta_model']['model_name'])

 # https://github.com/NVlabs/prismer/blob/main/LICENSE
 import json
+import pathlib
 import torch.nn as nn
 from model.modules.vit import load_encoder
 from model.modules.roberta import load_decoder
 from transformers import RobertaTokenizer, RobertaConfig
+cur_dir = pathlib.Path(__file__).parent
 class Prismer(nn.Module):
     def __init__(self, config):
             elif exp in ['obj_detection', 'ocr_detection']:
                 self.experts[exp] = 64
+        prismer_config = json.load(open(f'{cur_dir.parent}/configs/prismer.json', 'r'))[config['prismer_model']]
         roberta_config = RobertaConfig.from_dict(prismer_config['roberta_model'])
         self.tokenizer = RobertaTokenizer.from_pretrained(prismer_config['roberta_model']['model_name'])

prismer_model.py CHANGED Viewed

@@ -7,12 +7,12 @@ import sys
 import cv2
 import torch
 repo_dir = pathlib.Path(__file__).parent
 submodule_dir = repo_dir / 'prismer'
-sys.path.insert(0, submodule_dir.as_posix())
-from dataset import create_dataset, create_loader
-from model.prismer_caption import PrismerCaption
 def download_models() -> None:
@@ -50,11 +50,11 @@ def run_experts(image_path: str) -> tuple[str | None, ...]:
     for expert_name in expert_names:
         env = os.environ.copy()
         if 'PYTHONPATH' in env:
-            env['PYTHONPATH'] = f'{submodule_dir.as_posix()}:{env["PYTHONPATH"]}'
         else:
-            env['PYTHONPATH'] = submodule_dir.as_posix()
-        subprocess.run(shlex.split(f'python experts/generate_{expert_name}.py'), cwd='prismer', env=env, check=True)
     # keys = ['depth', 'edge', 'normal', 'seg_coco', 'obj_detection', 'ocr_detection']
     keys = ['depth', 'edge', 'normal']

 import cv2
 import torch
+from prismer.dataset import create_dataset, create_loader
+from prismer.model.prismer_caption import PrismerCaption
 repo_dir = pathlib.Path(__file__).parent
 submodule_dir = repo_dir / 'prismer'
 def download_models() -> None:
     for expert_name in expert_names:
         env = os.environ.copy()
         if 'PYTHONPATH' in env:
+            env['PYTHONPATH'] = f'{repo_dir.as_posix()}:{env["PYTHONPATH"]}'
         else:
+            env['PYTHONPATH'] = repo_dir.as_posix()
+        subprocess.run(shlex.split(f'python prismer/experts/generate_{expert_name}.py'), env=env, check=True)
     # keys = ['depth', 'edge', 'normal', 'seg_coco', 'obj_detection', 'ocr_detection']
     keys = ['depth', 'edge', 'normal']