Spaces:

ryparmar
/

fashion-aggregator

Runtime error

App Files Files Community

ryparmar commited on Nov 8, 2022

Commit

10a6818

1 Parent(s): 355ebb4

add fashion-aggregator test app

Browse files

Files changed (5) hide show

app.py +145 -4
fashion_aggregator/__init__.py +1 -0
fashion_aggregator/fashion_aggregator.py +125 -0
fashion_aggregator/util.py +0 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,7 +1,148 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+"""Provide a text query describing what you are looking for and get back out images with links!"""
+import argparse
+import logging
+import os
+import wandb
 import gradio as gr
+from pathlib import Path
+from typing import Callable, Dict, List, Tuple
+from PIL.Image import Image
+print(__file__)
+import fashion_aggregator.fashion_aggregator as fa
+os.environ["CUDA_VISIBLE_DEVICES"] = ""  # do not use GPU
+logging.basicConfig(level=logging.INFO)
+DEFAULT_APPLICATION_NAME = "fashion-aggregator"
+APP_DIR = Path(__file__).resolve().parent  # what is the directory for this application?
+FAVICON = APP_DIR / "t-shirt_1f455.png"  # path to a small image for display in browser tab and social media
+README = APP_DIR / "README.md"  # path to an app readme file in HTML/markdown
+DEFAULT_PORT = 11700
+# Download image embeddings
+api = wandb.Api()
+artifact = api.artifact("ryparmar/fashion-aggregator/unimoda-images:v0")
+artifact.download("fashion_aggregator/artifacts/img-embeddings")
+def main(args):
+    predictor = PredictorBackend(url=args.model_url)
+    frontend = make_frontend(predictor.run, flagging=args.flagging, gantry=args.gantry, app_name=args.application)
+    frontend.launch(
+        server_name="0.0.0.0",  # make server accessible, binding all interfaces  # noqa: S104
+        server_port=args.port,  # set a port to bind to, failing if unavailable
+        share=True,  # should we create a (temporary) public link on https://gradio.app?
+        favicon_path=FAVICON,  # what icon should we display in the address bar?
+    )
+def make_frontend(
+    fn: Callable[[Image], str], flagging: bool = False, gantry: bool = False, app_name: str = "fashion-aggregator"
+):
+    """Creates a gradio.Interface frontend for text to image search function."""
+    allow_flagging = "never"
+    readme = _load_readme(with_logging=allow_flagging == "manual")
+    # build a basic browser interface to a Python function
+    frontend = gr.Interface(
+        fn=fn,  # which Python function are we interacting with?
+        outputs=gr.Gallery(label="Relevant Items"),
+        # what input widgets does it need? we configure an image widget
+        inputs=gr.components.Textbox(label="Item Description"),
+        title="📝 Text2Image 👕",  # what should we display at the top of the page?
+        thumbnail=FAVICON,  # what should we display when the link is shared, e.g. on social media?
+        description=__doc__,  # what should we display just above the interface?
+        article=readme,  # what long-form content should we display below the interface?
+        cache_examples=False,  # should we cache those inputs for faster inference? slows down start
+        allow_flagging=allow_flagging,  # should we show users the option to "flag" outputs?
+        flagging_options=["incorrect", "offensive", "other"],  # what options do users have for feedback?
+    )
+    return frontend
+class PredictorBackend:
+    """Interface to a backend that serves predictions.
+    To communicate with a backend accessible via a URL, provide the url kwarg.
+    Otherwise, runs a predictor locally.
+    """
+    def __init__(self, url=None):
+        if url is not None:
+            self.url = url
+            self._predict = self._predict_from_endpoint
+        else:
+            model = fa.Retriever()
+            self._predict = model.predict
+            self._search_images = model.search_images
+    def run(self, text: str):
+        pred, metrics = self._predict_with_metrics(text)
+        self._log_inference(pred, metrics)
+        return pred
+    def _predict_with_metrics(self, text: str) -> Tuple[List[str], Dict[str, float]]:
+        paths_and_scores = self._search_images(text)
+        metrics = {"mean_score": sum(paths_and_scores["score"]) / len(paths_and_scores["score"])}
+        return paths_and_scores["path"], metrics
+    def _log_inference(self, pred, metrics):
+        for key, value in metrics.items():
+            logging.info(f"METRIC {key} {value}")
+        logging.info(f"PRED >begin\n{pred}\nPRED >end")
+def _load_readme(with_logging=False):
+    with open(README) as f:
+        lines = f.readlines()
+        if not with_logging:
+            lines = lines[: lines.index("<!-- logging content below -->\n")]
+        readme = "".join(lines)
+    return readme
+def _make_parser():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--model_url",
+        default=None,
+        type=str,
+        help="Identifies a URL to which to send image data. Data is base64-encoded, converted to a utf-8 string, and then set via a POST request as JSON with the key 'image'. Default is None, which instead sends the data to a model running locally.",
+    )
+    parser.add_argument(
+        "--port",
+        default=DEFAULT_PORT,
+        type=int,
+        help=f"Port on which to expose this server. Default is {DEFAULT_PORT}.",
+    )
+    parser.add_argument(
+        "--flagging",
+        action="store_true",
+        help="Pass this flag to allow users to 'flag' model behavior and provide feedback.",
+    )
+    parser.add_argument(
+        "--gantry",
+        action="store_true",
+        help="Pass --flagging and this flag to log user feedback to Gantry. Requires GANTRY_API_KEY to be defined as an environment variable.",
+    )
+    parser.add_argument(
+        "--application",
+        default=DEFAULT_APPLICATION_NAME,
+        type=str,
+        help=f"Name of the Gantry application to which feedback should be logged, if --gantry and --flagging are passed. Default is {DEFAULT_APPLICATION_NAME}.",
+    )
+    return parser
+if __name__ == "__main__":
+    parser = _make_parser()
+    args = parser.parse_args()
+    main(args)

fashion_aggregator/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Modules for creating and running a fashion aggregator."""

fashion_aggregator/fashion_aggregator.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""Detects a paragraph of text in an input image.
+Example usage as a script:
+  python fashion_aggregator/fashion_aggregator.py \
+    "Two dogs playing in the snow"
+"""
+import os
+import argparse
+import pickle
+from pathlib import Path
+from typing import List, Any, Dict
+from PIL import Image
+from pathlib import Path
+from transformers import AutoTokenizer
+from sentence_transformers import SentenceTransformer, util
+from multilingual_clip import pt_multilingual_clip
+import torch
+STAGED_TEXT_ENCODER_MODEL_DIRNAME = Path(__file__).resolve().parent / "artifacts" / "text-encoder"
+STAGED_TEXT_TOKENIZER_DIRNAME = Path(__file__).resolve().parent / "artifacts" / "text-tokenizer"
+STAGED_IMG_ENCODER_MODEL_DIRNAME = Path(__file__).resolve().parent / "artifacts" / "img-encoder"
+STAGED_IMG_EMBEDDINGS_DIRNAME = Path(__file__).resolve().parent / "artifacts" / "img-embeddings"
+RAW_PHOTOS_DIR = Path(__file__).resolve().parent / "data" / "photos"
+MODEL_FILE = "model.pt"
+EMBEDDINGS_FILE = "embeddings.pkl"
+class TextEncoder:
+    """Encodes the given text"""
+    def __init__(self, model_path='M-CLIP/XLM-Roberta-Large-Vit-B-32'):
+        if model_path is None:
+            model_path = STAGED_TEXT_ENCODER_MODEL_DIRNAME / MODEL_FILE
+        self.model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(model_path)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+    @torch.no_grad()
+    def encode(self, query: str) -> torch.Tensor:
+        """Predict/infer text embedding for a given query."""
+        query_emb = query_emb = self.model.forward([query], self.tokenizer)
+        return query_emb
+class ImageEnoder:
+    """Encodes the given image"""
+    def __init__(self, model_path='clip-ViT-B-32'):
+        if model_path is None:
+            model_path = STAGED_IMG_ENCODER_MODEL_DIRNAME / MODEL_FILE
+        self.model = SentenceTransformer(model_path)
+    @torch.no_grad()
+    def encode(self, image: Image.Image) -> torch.Tensor:
+        """Predict/infer text embedding for a given query."""
+        image_emb = self.model.encode([image], convert_to_tensor=True, show_progress_bar=False)
+        return image_emb
+class Retriever:
+    """Retrieves relevant images for a given text embedding."""
+    def __init__(self, image_embeddings_path=None):
+        if image_embeddings_path is None:
+            image_embeddings_path = STAGED_IMG_EMBEDDINGS_DIRNAME / EMBEDDINGS_FILE
+        self.text_encoder = TextEncoder()
+        self.image_encoder = ImageEnoder()
+        with open(image_embeddings_path, 'rb') as file:
+            self.image_names, self.image_embeddings = pickle.load(file)
+        print("Images:", len(self.image_names))
+    @torch.no_grad()
+    def predict(self, text_query: str, k: int=10) -> List[Any]:
+        """Return top-k relevant items for a given embedding"""
+        query_emb = self.text_encoder.encode(text_query)
+        relevant_images = util.semantic_search(query_emb, self.image_embeddings, top_k=k)[0]
+        return relevant_images
+    @torch.no_grad()
+    def search_images(self, text_query: str, k: int=6) -> Dict[str, List[Any]]:
+        """Return top-k relevant images for a given embedding"""
+        images = self.predict(text_query, k)
+        paths_and_scores = {"path": [], "score": []}
+        for img in images:
+            paths_and_scores["path"].append(os.path.join(RAW_PHOTOS_DIR, self.image_names[img["corpus_id"]]))
+            paths_and_scores["score"].append(img["score"])
+        return paths_and_scores
+    @torch.no_grad()
+    def save(self, output_dir: str = None):
+        if output_dir:
+            Path(output_dir).mkdir(parents=True, exist_ok=True)
+            text_encoder_path = Path(output_dir) / "text-encoder"
+            text_tokenizer_path = Path(output_dir) / "text-tokenizer"
+            img_encoder_path = Path(output_dir) / "img-encoder"
+            text_encoder_path.mkdir(parents=True, exist_ok=True)
+            text_tokenizer_path.mkdir(parents=True, exist_ok=True)
+            img_encoder_path.mkdir(parents=True, exist_ok=True)
+        else:
+            Path(STAGED_TEXT_ENCODER_MODEL_DIRNAME).mkdir(parents=True, exist_ok=True)
+            Path(STAGED_TEXT_TOKENIZER_DIRNAME).mkdir(parents=True, exist_ok=True)
+            Path(STAGED_IMG_ENCODER_MODEL_DIRNAME).mkdir(parents=True, exist_ok=True)
+def main():
+    parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
+    parser.add_argument(
+        "text_query",
+        type=str,
+        help="Text query",
+    )
+    args = parser.parse_args()
+    retriever = Retriever()
+    print(f"Given query: {args.text_query}")
+    print(retriever.predict(args.text_query))
+if __name__ == "__main__":
+    main()

fashion_aggregator/util.py ADDED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 sentence-transformers==2.2.2
 clip @ git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1
-multilingual-clip==1.0.10

 sentence-transformers==2.2.2
 clip @ git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1
+multilingual-clip==1.0.10
+wandb