Spaces:
Runtime error
Runtime error
from torchvision.io import read_image, ImageReadMode | |
import torch | |
import numpy as np | |
from torchvision.transforms import CenterCrop, ConvertImageDtype, Normalize, Resize | |
from torchvision.transforms.functional import InterpolationMode | |
from transformers import MBart50TokenizerFast | |
import json | |
from PIL import Image | |
class Transform(torch.nn.Module): | |
def __init__(self, image_size): | |
super().__init__() | |
self.transforms = torch.nn.Sequential( | |
Resize([image_size], interpolation=InterpolationMode.BICUBIC), | |
CenterCrop(image_size), | |
ConvertImageDtype(torch.float), | |
Normalize( | |
(0.48145466, 0.4578275, 0.40821073), | |
(0.26862954, 0.26130258, 0.27577711), | |
), | |
) | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
with torch.no_grad(): | |
x = self.transforms(x) | |
return x | |
transform = Transform(224) | |
def get_transformed_image(image): | |
if image.shape[-1] == 3 and isinstance(image, np.ndarray): | |
image = image.transpose(2, 0, 1) | |
image = torch.tensor(image) | |
return transform(image).unsqueeze(0).permute(0, 2, 3, 1).numpy() | |
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50") | |
language_mapping = { | |
"english": "en_XX", | |
"german": "de_DE", | |
"french": "fr_XX", | |
"spanish": "es_XX" | |
} | |
def generate_sequence(model, pixel_values, lang_code): | |
lang_code = language_mapping[lang_code] | |
output_ids = model.generate(input_ids=pixel_values, decoder_start_token_id=tokenizer.lang_code_to_id[lang_code], max_length=64, num_beams=4) | |
output_sequence = tokenizer.batch_decode(output_ids[0], skip_special_tokens=True, max_length=64) | |
return output_sequence | |