{"model_type": "vit", "architectures": [
    "ViTForImageClassification"
  ], "input_size": 256, "num_classes": 5, "drop_path_rate": 0.2, "layer_decay": 0.65, "weight_decay": 0.05, "base_learning_rate": 0.005, "batch_size": 10, "epochs": 50, "remove_background": true}