|
OpenCLIP-VIT-g image encoder extracted from [https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/tree/main/image_encoder](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/tree/main/image_encoder) |
|
|
|
```python |
|
from PIL import Image |
|
import requests |
|
from transformers import AutoProcessor, CLIPVisionModelWithProjection |
|
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor |
|
|
|
model_name = "Jiayi-Pan/SD-v2-1-Image-Encoder" |
|
model = CLIPVisionModelWithProjection.from_pretrained(model_name) |
|
processor = transformers.CLIPImageProcessor.from_pretrained(model_name) |
|
|
|
url = "http://images.cocodataset.org/val2017/000000039769.jpg" |
|
image = Image.open(requests.get(url, stream=True).raw) |
|
|
|
inputs = processor(images=image, return_tensors="pt") |
|
|
|
outputs = model(**inputs) |
|
image_embeds = outputs.image_embeds |
|
``` |