from transformers import AutoTokenizer,AutoFeatureExtractor from datasets import load_dataset, Audio # tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") # dataset = load_dataset("rotten_tomatoes", split="train") # print(tokenizer(dataset[0]["text"])) # def tokenization(example): # return tokenizer(example["text"]) # dataset = dataset.map(tokenization, batched=True) # feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h") # dataset = load_dataset("PolyAI/minds14", "en-US", split="train") # print(dataset[0]["audio"]) # dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000)) # print(dataset[0]["audio"]) # def preprocess_function(examples): # audio_arrays = [x["array"] for x in examples["audio"]] # inputs = feature_extractor( # audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True # ) # return inputs # dataset = dataset.map(preprocess_function, batched=True) feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") dataset = load_dataset("beans", split="train") print(dataset[0]["image"]) from torchvision.transforms import RandomRotation rotate = RandomRotation(degrees=(0, 90)) def transforms(examples): examples["pixel_values"] = [rotate(image.convert("RGB")) for image in examples["image"]] return examples dataset.set_transform(transforms) print(dataset[0]["pixel_values"])