ydshieh
/

vit-gpt2-coco-en-ckpts

Image Classification

vision-encoder-decoder

Model card Files Files and versions Metrics Training metrics Community

ydshieh HF staff commited on Oct 24, 2021

Commit

3077814

·

1 Parent(s): fdc844c

Update pipeline.py

Files changed (1) hide show

pipeline.py +33 -29

pipeline.py CHANGED Viewed

@@ -1,40 +1,44 @@
-from typing import Dict, List, Any
-from PIL import Image
 import os
-import json
-import numpy as np
-from fastai.learner import load_learner
-from helpers import is_cat
 class PreTrainedPipeline():
     def __init__(self, path=""):
-        # IMPLEMENT_THIS
-        # Preload all the elements you are going to need at inference.
-        # For instance your model, processors, tokenizer that might be needed.
-        # This function is only called once, so do all the heavy processing I/O here"""
-        self.model = load_learner(os.path.join(path, "model.pkl"))
-        with open(os.path.join(path, "config.json")) as config:
-            config = json.load(config)
-        self.id2label = config["id2label"]
     def __call__(self, inputs: "Image.Image") -> List[Dict[str, Any]]:
         """
         Args:
-            inputs (:obj:`PIL.Image`):
-                The raw image representation as PIL.
-                No transformation made whatsoever from the input. Make all necessary transformations here.
         Return:
-            A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82}
-                It is preferred if the returned list is in decreasing `score` order
         """
-        # IMPLEMENT_THIS
-        # FastAI expects a np array, not a PIL Image.
-        _, _, preds = self.model.predict(np.array(inputs))
-        preds = preds.tolist()
-        labels = [
-            {"label": str(self.id2label["0"]), "score": preds[0]},
-            {"label": str(self.id2label["1"]), "score": preds[1]},
-        ]
-        return labels

 import os
+from PIL import Image
+from transformers import ViTFeatureExtractor, AutoTokenizer, FlaxVisionEncoderDecoderModel
 class PreTrainedPipeline():
     def __init__(self, path=""):
+        model_dir = os.path.join(path, "ckpt_epoch_3_step_6900")
+        self.model = FlaxVisionEncoderDecoderModel.from_pretrained(model_dir)
+        self.feature_extractor = ViTFeatureExtractor.from_pretrained(model_dir)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
+        max_length = 16
+        num_beams = 4
+        self.gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
+        # compile the model
+        image_path = os.path.join(path, 'val_000000039769.jpg')
+        image = Image.open(image_path)
+        self(image)
+        image.close()
+    @jax.jit
+    def generate(self, pixel_values):
+        output_ids = self.model.generate(pixel_values, **self.gen_kwargs).sequences
+        return output_ids
     def __call__(self, inputs: "Image.Image") -> List[Dict[str, Any]]:
         """
         Args:
         Return:
         """
+        pixel_values = self.feature_extractor(images=inputs, return_tensors="np").pixel_values
+        output_ids = self.generate(pixel_values)
+        preds = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+        preds = [pred.strip() for pred in preds]
+        return preds[0]