from transformers import pipeline device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device) from PIL import Image import requests url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg" image = Image.open(requests.get(url, stream=True).raw) print(image.size) upscaled = pipe(image) print(upscaled.size) from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device) processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64") pixel_values = processor(image, return_tensors="pt").pixel_values print(pixel_values.shape) pixel_values = pixel_values.to(device) import torch with torch.no_grad(): outputs = model(pixel_values) (loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275, ..., 0.7463, 0.7446, 0.7453], [0.8287, 0.8278, 0.8283, ..., 0.7451, 0.7448, 0.7457], [0.8280, 0.8273, 0.8269, ..., 0.7447, 0.7446, 0.7452], ..., [0.5923, 0.5933, 0.5924, ..., 0.0697, 0.0695, 0.0706], [0.5926, 0.5932, 0.5926, ..., 0.0673, 0.0687, 0.0705], [0.5927, 0.5914, 0.5922, ..., 0.0664, 0.0694, 0.0718]]]], device='cuda:0'), hidden_states=None, attentions=None) outputs.reconstruction.data.shape # torch.Size([1, 3, 880, 1072]) import numpy as np # squeeze, take to CPU and clip the values output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy() # rearrange the axes output = np.moveaxis(output, source=0, destination=-1) # bring values back to pixel values range output = (output * 255.0).round().astype(np.uint8) Image.fromarray(output)