image_variation / app.py
mrLarry's picture
Create app.py
6719685
import torch
from PIL import Image
from torchvision import transforms
from transformers import CLIPProcessor, CLIPModel
# load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
model.to(device)
# load the CLIP processor
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# load the image
image_path = "path/to/image.jpg"
image = Image.open(image_path)
# resize the image
resize = transforms.Resize((224, 224))
image = resize(image)
# convert the image to a tensor
tensor = transforms.ToTensor()(image)
tensor = tensor.to(device)
# get the image features using the CLIP model
with torch.no_grad():
features = model.encode_image(tensor.unsqueeze(0))
# generate variations of the image using the CLIP model and processor
with torch.no_grad():
outputs = model.generate_images(
features=features,
num_images=5, # number of different variations to generate
max_length=50, # maximum length of the generated caption for the variation
clip=processor,
temperature=1.0, # temperature of the sampling process
top_p=0.9, # top-p probability for the sampling process
batch_size=1,
device=device,
)
# save the generated images
for i, output in enumerate(outputs):
generated_image = transforms.functional.to_pil_image(output[0])
generated_image.save(f"output/image_variation_{i}.jpg")