File size: 1,470 Bytes
b9a8411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from transformers import AutoTokenizer,AutoFeatureExtractor
from datasets import load_dataset, Audio

# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# dataset = load_dataset("rotten_tomatoes", split="train")

# print(tokenizer(dataset[0]["text"]))

# def tokenization(example):
#     return tokenizer(example["text"])

# dataset = dataset.map(tokenization, batched=True)

# feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
# dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
# print(dataset[0]["audio"])
# dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
# print(dataset[0]["audio"])

# def preprocess_function(examples):
#     audio_arrays = [x["array"] for x in examples["audio"]]
#     inputs = feature_extractor(
#         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
#     )
#     return inputs
# dataset = dataset.map(preprocess_function, batched=True)

feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
dataset = load_dataset("beans", split="train")

print(dataset[0]["image"])

from torchvision.transforms import RandomRotation

rotate = RandomRotation(degrees=(0, 90))
def transforms(examples):
    examples["pixel_values"] = [rotate(image.convert("RGB")) for image in examples["image"]]
    return examples

dataset.set_transform(transforms)
print(dataset[0]["pixel_values"])