Spaces:

umichVision
/

virtex-redcaps

Runtime error

App Files Files Community

zamborg commited on Nov 14, 2021

Commit

a4c3b59

•

1 Parent(s): 2f0a51c

I think this works

Browse files

Files changed (5) hide show

app.py +5 -2
model.py +2 -0
virtex/virtex/data/__init__.py +0 -1
virtex/virtex/data/datasets/redcaps.py +0 -129
virtex/virtex/factories.py +0 -2

app.py CHANGED Viewed

@@ -42,11 +42,14 @@ if uploaded_image is None and submitted:
 else:
     image_file = sample_image if sample_image is not None else random_image
-    image = uploaded_image if uploaded_image is not None else Image.open()
     image_dict = imageLoader.transform(image)
-    show.image(st.image(image_dict["image"]), "Target Image")
     with st.spinner("Generating Caption"):
         subreddit, caption = virtexModel.predict(image_dict)

 else:
     image_file = sample_image if sample_image is not None else random_image
+    image = uploaded_image if uploaded_image is not None else Image.open(image_file)
     image_dict = imageLoader.transform(image)
+    image = imageLoader.to_image(image_dict["image"].squeeze(0))
+    show = st.image(image)
+    show.image(image, "Your Image")
     with st.spinner("Generating Caption"):
         subreddit, caption = virtexModel.predict(image_dict)

model.py CHANGED Viewed

@@ -30,6 +30,8 @@ class ImageLoader():
     def transform(self, image):
         im = torch.FloatTensor(self.transformer(image)).unsqueeze(0)
         return {"image": im}
 class VirTexModel():
     def __init__(self):

     def transform(self, image):
         im = torch.FloatTensor(self.transformer(image)).unsqueeze(0)
         return {"image": im}
+    def to_image(self, tensor):
+        return torchvision.transforms.ToPILImage()(tensor)
 class VirTexModel():
     def __init__(self):

virtex/virtex/data/__init__.py CHANGED Viewed

@@ -10,7 +10,6 @@ from .datasets.downstream import (
     VOC07ClassificationDataset,
     ImageDirectoryDataset,
 )
-from .datasets.redcaps import TarfileDataset
 __all__ = [

     VOC07ClassificationDataset,
     ImageDirectoryDataset,
 )
 __all__ = [

virtex/virtex/data/datasets/redcaps.py CHANGED Viewed

@@ -1,129 +0,0 @@
-import glob
-import os
-import random
-from typing import Callable
-import numpy as np
-import torch
-from torch.utils.data import IterableDataset
-import webdataset as wds
-import wordsegment as ws
-from virtex.data.tokenizers import SentencePieceBPETokenizer
-from virtex.data import transforms as T
-import virtex.utils.distributed as dist
-ws.load()
-class TarfileDataset(IterableDataset):
-    def __init__(
-        self,
-        data_root: str,
-        batch_size: int,
-        tokenizer: SentencePieceBPETokenizer,
-        image_transform: Callable = T.DEFAULT_IMAGE_TRANSFORM,
-        shuffle_buffer_size: int = 3000,  # Set -1 to turn off shuffle.
-        max_caption_length: int = 50,
-    ):
-        super().__init__()
-        self.tokenizer = tokenizer
-        self.image_transform = image_transform
-        self.max_caption_length = max_caption_length
-        self.padding_idx = tokenizer.token_to_id("<unk>")
-        self.sos_idx = tokenizer.token_to_id("[SOS]")
-        self.eos_idx = tokenizer.token_to_id("[EOS]")
-        self.sep_idx = tokenizer.token_to_id("[SEP]")
-        # Glob expand all paths in data root.
-        all_data_paths = []
-        for dr in data_root.split(" "):
-            all_data_paths.extend(glob.glob(dr))
-        # Deterministic shuffle across GPU process.
-        all_data_paths = sorted(all_data_paths)
-        random.Random(0).shuffle(all_data_paths)
-        # Shard the data paths as per gpu process.
-        all_data_paths = all_data_paths[dist.get_rank()::dist.get_world_size()]
-        self._dset = (
-            wds.WebDataset(all_data_paths)
-            .shuffle(shuffle_buffer_size, initial=shuffle_buffer_size)
-            .decode("rgb8", handler=wds.warn_and_continue)
-            .map(self._preprocess)
-            .batched(batch_size)
-        )
-        # Perform word-segmentation of all subreddit names (that's how the
-        # tokenizer was prepared). Subreddit names can be obtained from
-        # TAR file names: `{subreddit}_{year}_{index}.tar`.
-        if "redcaps" in data_root:
-            self.subreddit_segs = {
-                sub: " ".join(ws.segment(ws.clean(sub))) for sub in
-                set([os.path.basename(p).split("_")[0] for p in all_data_paths])
-            }
-    def _preprocess(self, annotation):
-        image, caption = annotation["jpg"], annotation["json"]["caption"]
-        # Transform image-caption pair and convert image from HWC to CHW format.
-        # Pass in caption to image_transform due to paired horizontal flip.
-        # Caption won't be tokenized/processed here.
-        image_caption = self.image_transform(image=image, caption=caption)
-        image, caption = image_caption["image"], image_caption["caption"]
-        image = np.transpose(image, (2, 0, 1))
-        # Tokenize caption.
-        _caption_tokens = self.tokenizer.encode(caption)
-        # Get subreddit name if it exists, and tokenize it. Only for RedCaps.
-        if "subreddit" in annotation["json"]:
-            subreddit = annotation["json"]["subreddit"].lower()
-            subreddit = self.subreddit_segs[subreddit]
-            # Add special [SEP] token after subreddit.
-            _subreddit_tokens = self.tokenizer.encode(subreddit) + [self.sep_idx]
-        else:
-            _subreddit_tokens = []
-        # Create forward and backward caption with subreddit token at the start.
-        caption_tokens = (
-            [self.sos_idx] + _subreddit_tokens + _caption_tokens + [self.eos_idx]
-        )[: self.max_caption_length]
-        noitpac_tokens = (
-            [self.eos_idx] + _subreddit_tokens + _caption_tokens[::-1] + [self.sos_idx]
-        )[: self.max_caption_length]
-        return image, caption_tokens, noitpac_tokens, len(caption_tokens)
-    def __len__(self):
-        raise NotImplementedError
-    def __iter__(self):
-        for batch in iter(self._dset):
-            # Collate the batch properly here. `image` and `caption_lengths`
-            # are already tensors.
-            image, caption_tokens, noitpac_tokens, caption_lengths = batch
-            # Pad `caption_tokens` and `masked_labels` up to this length.
-            caption_tokens = torch.nn.utils.rnn.pad_sequence(
-                [torch.tensor(c, dtype=torch.long) for c in caption_tokens],
-                batch_first=True,
-                padding_value=self.padding_idx,
-            )
-            noitpac_tokens = torch.nn.utils.rnn.pad_sequence(
-                [torch.tensor(c, dtype=torch.long) for c in noitpac_tokens],
-                batch_first=True,
-                padding_value=self.padding_idx,
-            )
-            caption_lengths = torch.tensor(caption_lengths, dtype=torch.long)
-            yield {
-                "image": torch.tensor(image, dtype=torch.float),
-                "caption_tokens": caption_tokens,
-                "noitpac_tokens": noitpac_tokens,
-                "caption_lengths": caption_lengths,
-            }

virtex/virtex/factories.py CHANGED Viewed

@@ -194,8 +194,6 @@ class PretrainingDatasetFactory(Factory):
         "masked_lm": vdata.MaskedLmDataset,
         "token_classification": vdata.TokenClassificationDataset,
         "multilabel_classification": vdata.MultiLabelClassificationDataset,
-        "virtex_web": vdata.TarfileDataset,
-        "miniclip_web": vdata.TarfileDataset,
     }
     @classmethod

         "masked_lm": vdata.MaskedLmDataset,
         "token_classification": vdata.TokenClassificationDataset,
         "multilabel_classification": vdata.MultiLabelClassificationDataset,
     }
     @classmethod