florentgbelidji
/

blip_captioning

image-captioning

endpoints-template

Inference Endpoints

Model card Files Files and versions Community

florentgbelidji HF staff commited on Aug 16, 2022

Commit

cb05228

·

1 Parent(s): ecd409c

Decoding image in input

Files changed (1) hide show

pipeline.py +5 -1

pipeline.py CHANGED Viewed

@@ -2,6 +2,8 @@ from typing import  Dict, List, Any
 from PIL import Image
 import requests
 import torch
 from blip import blip_decoder
 from torchvision import transforms
 from torchvision.transforms.functional import InterpolationMode
@@ -36,9 +38,11 @@ class PreTrainedPipeline():
                 - "label": A string representing what the label/class is. There can be multiple labels.
                 - "score": A score between 0 and 1 describing how confident the model is for this label/class.
         """
-        image = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
         image = transform(image).unsqueeze(0).to(device)
         with torch.no_grad():
             caption = self.model.generate(image, sample=True, top_p=0.9, max_length=20, min_length=5)

 from PIL import Image
 import requests
 import torch
+import base64
+from io import BytesIO
 from blip import blip_decoder
 from torchvision import transforms
 from torchvision.transforms.functional import InterpolationMode
                 - "label": A string representing what the label/class is. There can be multiple labels.
                 - "score": A score between 0 and 1 describing how confident the model is for this label/class.
         """
+        inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
+        # decode base64 image to PIL
+        image = Image.open(BytesIO(base64.b64decode(inputs['image'])))
         image = transform(image).unsqueeze(0).to(device)
         with torch.no_grad():
             caption = self.model.generate(image, sample=True, top_p=0.9, max_length=20, min_length=5)