from transformers import ViTConfig, ViTForImageClassification
from transformers import ViTFeatureExtractor
from PIL import Image
import requests
import matplotlib.pyplot as plt
import gradio as gr


# option 1: load with randomly initialized weights (train from scratch)

config = ViTConfig(num_hidden_layers=12, hidden_size=768)
model = ViTForImageClassification(config)

print(config)

feature_extractor = ViTFeatureExtractor()

# or, to load one that corresponds to a checkpoint on the hub:
#feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")


image = "cats.jpg"