Spaces:

umichVision
/

virtex-redcaps

Runtime error

App Files Files Community

zamborg commited on Nov 13, 2021

Commit

49c0315

1 Parent(s): a5f8a35

app start

Browse files

Files changed (3) hide show

.gitignore +3 -0
app.py +96 -2
samples/test.jpg +0 -0

.gitignore CHANGED Viewed

	@@ -1 +1,4 @@
1	.ipynb_checkpoints/*

 .ipynb_checkpoints/*
+*.pth
+*.yaml
+*ipynb_checkpoints

app.py CHANGED Viewed

@@ -1,4 +1,98 @@
 import streamlit as st
-x = st.slider("Select a value")
-st.write(x, "squared is", x * x)

 import streamlit as st
+from huggingface_hub import snapshot_download
+from PIL import Image
+import argparse
+import json
+import os
+from typing import Any, Dict, List
+from loguru import logger
+import torch
+import torchvision
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+import wordsegment as ws
+from virtex.config import Config
+from virtex.data import ImageDirectoryDataset
+from virtex.factories import TokenizerFactory, PretrainingModelFactory
+from virtex.utils.checkpointing import CheckpointManager
+from virtex.utils.common import common_parser
+CONFIG_PATH = "config.yaml"
+MODEL_PATH = "checkpoint_last5.pth"
+# x = st.slider("Select a value")
+# st.write(x, "squared is", x * x)
+class ImageLoader():
+    def __init__(self):
+        self.transformer = torchvision.transforms.Compose([torchvision.transforms.Resize(256),
+                               torchvision.transforms.CenterCrop(224),
+                               torchvision.transforms.ToTensor()])
+    def load(self, im_path, prompt):
+        im = torch.FloatTensor(self.transformer(Image.open(im_path))).unsqueeze(0)
+        return {"image": im, "decode_prompt": prompt}
+class VirTexModel():
+    def __init__(self):
+        self.config = Config(CONFIG_PATH)
+        ws.load()
+        self.device = 'cpu'
+        self.tokenizer = TokenizerFactory.from_config(self.config)
+        self.model = PretrainingModelFactory.from_config(self.config).to(self.device)
+        CheckpointManager(model=self.model).load("./checkpoint_last5.pth")
+        self.model.eval()
+        self.loader = ImageLoader()
+    def predict(self, im_path):
+        subreddit_tokens = torch.tensor([self.model.sos_index], device=self.device).long()
+        predictions: List[Dict[str, Any]] = []
+        image = self.loader.load(im_path, subreddit_tokens) # should be of shape 1, 3, 224, 224
+        output_dict = self.model(image)
+        caption = output_dict["predictions"][0] #only one prediction
+        caption = caption.tolist()
+        if self.tokenizer.token_to_id("[SEP]") in caption: # this is just the 0 index actually
+            sos_index = caption.index(self.tokenizer.token_to_id("[SEP]"))
+            caption[sos_index] = self.tokenizer.token_to_id("::")
+        caption = self.tokenizer.decode(caption)
+        # Separate out subreddit from the rest of caption.
+        if "⁇" in caption: # "⁇" is the token decode equivalent of "::"
+            subreddit, rest_of_caption = caption.split("⁇")
+            subreddit = "".join(subreddit.split())
+            rest_of_caption = rest_of_caption.strip()
+        else:
+            subreddit, rest_of_caption = "", caption
+        return subreddit, rest_of_caption
+def load_models():
+    #download model files
+    download_files = [CONFIG_PATH, MODEL_PATH]
+    for f in download_files:
+        fp = cached_download(hf_hub_url("zamborg/redcaps", filename=f))
+        os.system(f"cp {fp} ./{f}")
+# load a virtex model
+from huggingface_hub import hf_hub_url, cached_download
+# #download model files
+download_files = [CONFIG_PATH, MODEL_PATH]
+for f in download_files:
+    fp = cached_download(hf_hub_url("zamborg/redcaps", filename=f))
+    os.system(f"cp {fp} ./{f}")
+#inference on test.jpg
+virtexModel = VirTexModel()
+subreddit, caption = virtexModel.predict("./test.jpg")
+print(subreddit)
+print(caption)

samples/test.jpg ADDED Viewed