Upload 4 files

Browse files

Files changed (4) hide show

app.py +125 -0
evaluate_clip_openai.ipynb +694 -0
requirements.txt +11 -0
train_cat_vit.ipynb +842 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+from pathlib import Path
+import gradio as gr
+from transformers import pipeline
+# ----------------------------
+# Paths
+# ----------------------------
+BASE_DIR = Path(__file__).resolve().parent
+# HIER ggf. den Modellordner anpassen
+MODEL_PATH = BASE_DIR.parent / "flower-vit"
+EXAMPLE_DIR = BASE_DIR / "example_images"
+# ----------------------------
+# Labels
+# ----------------------------
+CAT_LABELS = ["cheetah", "leopard", "lion", "puma", "tiger"]
+# ----------------------------
+# Load models
+# ----------------------------
+print("Loading custom model...")
+vit_classifier = pipeline(
+    "image-classification",
+    model=str(MODEL_PATH)
+)
+print("Loading CLIP model...")
+clip_classifier = pipeline(
+    task="zero-shot-image-classification",
+    model="openai/clip-vit-base-patch32"
+)
+# ----------------------------
+# Helper functions
+# ----------------------------
+def normalize_custom_labels(results):
+    id2label = {
+        "LABEL_0": "cheetah",
+        "LABEL_1": "leopard",
+        "LABEL_2": "lion",
+        "LABEL_3": "puma",
+        "LABEL_4": "tiger",
+    }
+    output = {}
+    for r in results:
+        label = r["label"]
+        score = float(r["score"])
+        if label in id2label:
+            label = id2label[label]
+        else:
+            label = label.lower()
+        output[label] = score
+    return output
+# ----------------------------
+# Main function
+# ----------------------------
+def classify_cat(image):
+    # Custom Model
+    vit_results = vit_classifier(image)
+    vit_output = normalize_custom_labels(vit_results)
+    # CLIP
+    clip_labels = [f"a photo of a {label}" for label in CAT_LABELS]
+    clip_results = clip_classifier(image, candidate_labels=clip_labels)
+    clip_output = {}
+    for r in clip_results:
+        label = r["label"].replace("a photo of a ", "").lower()
+        score = float(r["score"])
+        clip_output[label] = score
+    return vit_output, clip_output
+# ----------------------------
+# Example images
+# ----------------------------
+example_images = [
+    [str(EXAMPLE_DIR / "Cheetah_032.jpg")],
+    [str(EXAMPLE_DIR / "Leopard_001.jpg")],
+    [str(EXAMPLE_DIR / "Lion_003.jpg")],
+    [str(EXAMPLE_DIR / "Puma_001.jpg")],
+    [str(EXAMPLE_DIR / "Tiger_001.jpg")]
+]
+# ----------------------------
+# Interface
+# ----------------------------
+iface = gr.Interface(
+    fn=classify_cat,
+    inputs=gr.Image(type="filepath"),
+    outputs=[
+        gr.Label(label="Custom Model"),
+        gr.Label(label="CLIP")
+    ],
+    title="Big Cat Classification",
+    description="Compare Custom Model vs CLIP",
+    examples=example_images
+)
+if __name__ == "__main__":
+    iface.launch()

evaluate_clip_openai.ipynb ADDED Viewed

	@@ -0,0 +1,694 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: transformers in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (5.5.0)\n",
+      "Requirement already satisfied: torch in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (2.11.0)\n",
+      "Requirement already satisfied: pillow in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (12.1.1)\n",
+      "Requirement already satisfied: openai in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (2.30.0)\n",
+      "Requirement already satisfied: huggingface-hub<2.0,>=1.5.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (1.6.0)\n",
+      "Requirement already satisfied: numpy>=1.17 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (2.4.2)\n",
+      "Requirement already satisfied: packaging>=20.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from transformers) (26.0)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (6.0.3)\n",
+      "Requirement already satisfied: regex>=2025.10.22 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (2026.4.4)\n",
+      "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (0.22.2)\n",
+      "Requirement already satisfied: typer in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (0.24.1)\n",
+      "Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (0.7.0)\n",
+      "Requirement already satisfied: tqdm>=4.27 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from transformers) (4.67.3)\n",
+      "Requirement already satisfied: filelock>=3.10.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (3.25.0)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (2026.2.0)\n",
+      "Requirement already satisfied: hf-xet<2.0.0,>=1.3.2 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (1.3.2)\n",
+      "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (0.28.1)\n",
+      "Requirement already satisfied: typing-extensions>=4.1.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (4.15.0)\n",
+      "Requirement already satisfied: anyio in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (4.12.1)\n",
+      "Requirement already satisfied: certifi in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (2026.2.25)\n",
+      "Requirement already satisfied: httpcore==1.* in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (1.0.9)\n",
+      "Requirement already satisfied: idna in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (3.11)\n",
+      "Requirement already satisfied: h11>=0.16 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (0.16.0)\n",
+      "Requirement already satisfied: setuptools<82 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from torch) (81.0.0)\n",
+      "Requirement already satisfied: sympy>=1.13.3 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from torch) (1.14.0)\n",
+      "Requirement already satisfied: networkx>=2.5.1 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from torch) (3.6.1)\n",
+      "Requirement already satisfied: jinja2 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from torch) (3.1.6)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from openai) (1.9.0)\n",
+      "Requirement already satisfied: jiter<1,>=0.10.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from openai) (0.13.0)\n",
+      "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from openai) (2.12.5)\n",
+      "Requirement already satisfied: sniffio in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from openai) (1.3.1)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.41.5 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (2.41.5)\n",
+      "Requirement already satisfied: typing-inspection>=0.4.2 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.4.2)\n",
+      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from sympy>=1.13.3->torch) (1.3.0)\n",
+      "Requirement already satisfied: colorama in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from tqdm>=4.27->transformers) (0.4.6)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from jinja2->torch) (3.0.3)\n",
+      "Requirement already satisfied: click>=8.2.1 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from typer->transformers) (8.3.1)\n",
+      "Requirement already satisfied: shellingham>=1.3.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from typer->transformers) (1.5.4)\n",
+      "Requirement already satisfied: rich>=12.3.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from typer->transformers) (14.3.3)\n",
+      "Requirement already satisfied: annotated-doc>=0.0.2 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from typer->transformers) (0.0.4)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from rich>=12.3.0->typer->transformers) (4.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from rich>=12.3.0->typer->transformers) (2.19.2)\n",
+      "Requirement already satisfied: mdurl~=0.1 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=12.3.0->typer->transformers) (0.1.2)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "[notice] A new release of pip is available: 25.3 -> 26.0.1\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install transformers torch pillow openai\n",
+    "from transformers import pipeline\n",
+    "from PIL import Image\n",
+    "import os\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model path exists: True\n",
+      "Image folder exists: True\n",
+      "Images: ['Cheetah_032.jpg', 'Leopard_001.jpg', 'Lion_003.jpg', 'Puma_001.jpg', 'Tiger_001.jpg']\n"
+     ]
+    }
+   ],
+   "source": [
+    "MODEL_PATH = \"./cat-vit\"\n",
+    "IMAGE_FOLDER = \"./Cats-classification-app/example_images\"\n",
+    "\n",
+    "labels = [\"cheetah\", \"leopard\", \"lion\", \"puma\", \"tiger\"]\n",
+    "clip_labels = [f\"a photo of a {label}\" for label in labels]\n",
+    "\n",
+    "print(\"Model path exists:\", os.path.exists(MODEL_PATH))\n",
+    "print(\"Image folder exists:\", os.path.exists(IMAGE_FOLDER))\n",
+    "print(\"Images:\", [f for f in os.listdir(IMAGE_FOLDER) if f.lower().endswith((\".jpg\", \".jpeg\", \".png\"))])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1bf87a05bbc346c9b3f30eb950c1f3a5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading weights:   0%|          | 0/200 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fad1554b05bf40d7b31480f8daa8ad35",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading weights:   0%|          | 0/398 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1mCLIPModel LOAD REPORT\u001b[0m from: openai/clip-vit-base-patch32\n",
+      "Key                                  | Status     |  | \n",
+      "-------------------------------------+------------+--+-\n",
+      "text_model.embeddings.position_ids   | UNEXPECTED |  | \n",
+      "vision_model.embeddings.position_ids | UNEXPECTED |  | \n",
+      "\n",
+      "Notes:\n",
+      "- UNEXPECTED:\tcan be ignored when loading from different task/architecture; not ok if you expect identical arch.\n"
+     ]
+    }
+   ],
+   "source": [
+    "custom_model = pipeline(\"image-classification\", model=MODEL_PATH)\n",
+    "\n",
+    "clip_model = pipeline(\n",
+    "    \"zero-shot-image-classification\",\n",
+    "    model=\"openai/clip-vit-base-patch32\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bc19664791384aceb2502dfe76b5dd1d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading weights:   0%|          | 0/398 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1mCLIPModel LOAD REPORT\u001b[0m from: openai/clip-vit-base-patch32\n",
+      "Key                                  | Status     |  | \n",
+      "-------------------------------------+------------+--+-\n",
+      "text_model.embeddings.position_ids   | UNEXPECTED |  | \n",
+      "vision_model.embeddings.position_ids | UNEXPECTED |  | \n",
+      "\n",
+      "Notes:\n",
+      "- UNEXPECTED:\tcan be ignored when loading from different task/architecture; not ok if you expect identical arch.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CLIP model loaded!\n"
+     ]
+    }
+   ],
+   "source": [
+    "clip_model = pipeline(\n",
+    "    \"zero-shot-image-classification\",\n",
+    "    model=\"openai/clip-vit-base-patch32\"\n",
+    ")\n",
+    "\n",
+    "print(\"CLIP model loaded!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_true_label(filename):\n",
+    "    name = filename.lower()\n",
+    "    \n",
+    "    if name.startswith(\"cheetah\"):\n",
+    "        return \"cheetah\"\n",
+    "    elif name.startswith(\"leopard\"):\n",
+    "        return \"leopard\"\n",
+    "    elif name.startswith(\"lion\"):\n",
+    "        return \"lion\"\n",
+    "    elif name.startswith(\"puma\"):\n",
+    "        return \"puma\"\n",
+    "    elif name.startswith(\"tiger\"):\n",
+    "        return \"tiger\"\n",
+    "    else:\n",
+    "        return \"unknown\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found images: ['Cheetah_032.jpg', 'Leopard_001.jpg', 'Lion_003.jpg', 'Puma_001.jpg', 'Tiger_001.jpg']\n",
+      "results length: 5\n",
+      "             image true_label custom_pred  custom_score clip_pred  clip_score  \\\n",
+      "0  Cheetah_032.jpg    cheetah     cheetah        0.5264   cheetah      0.8319   \n",
+      "1  Leopard_001.jpg    leopard     leopard        0.5127   leopard      0.9232   \n",
+      "2     Lion_003.jpg       lion        lion        0.5408      lion      0.9949   \n",
+      "3     Puma_001.jpg       puma        puma        0.6112      puma      0.9986   \n",
+      "4    Tiger_001.jpg      tiger       tiger        0.6976     tiger      0.9892   \n",
+      "\n",
+      "   custom_correct  clip_correct  \n",
+      "0            True          True  \n",
+      "1            True          True  \n",
+      "2            True          True  \n",
+      "3            True          True  \n",
+      "4            True          True  \n",
+      "columns: ['image', 'true_label', 'custom_pred', 'custom_score', 'clip_pred', 'clip_score', 'custom_correct', 'clip_correct']\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = []\n",
+    "\n",
+    "id2label = {\n",
+    "    0: \"cheetah\",\n",
+    "    1: \"leopard\",\n",
+    "    2: \"lion\",\n",
+    "    3: \"puma\",\n",
+    "    4: \"tiger\"\n",
+    "}\n",
+    "\n",
+    "image_files = sorted([\n",
+    "    f for f in os.listdir(IMAGE_FOLDER)\n",
+    "    if f.lower().endswith((\".jpg\", \".jpeg\", \".png\"))\n",
+    "])\n",
+    "\n",
+    "print(\"Found images:\", image_files)\n",
+    "\n",
+    "for img_file in image_files:\n",
+    "    image_path = os.path.join(IMAGE_FOLDER, img_file)\n",
+    "    image = Image.open(image_path).convert(\"RGB\")\n",
+    "    true_label = get_true_label(img_file)\n",
+    "\n",
+    "    custom_result = custom_model(image)[0]\n",
+    "    raw_custom_label = custom_result[\"label\"]\n",
+    "    custom_score = float(custom_result[\"score\"])\n",
+    "\n",
+    "    if raw_custom_label.startswith(\"LABEL_\"):\n",
+    "        label_id = int(raw_custom_label.split(\"_\")[1])\n",
+    "        custom_pred = id2label[label_id]\n",
+    "    else:\n",
+    "        custom_pred = raw_custom_label.lower()\n",
+    "\n",
+    "    clip_result = clip_model(image, candidate_labels=clip_labels)[0]\n",
+    "    clip_pred = clip_result[\"label\"].replace(\"a photo of a \", \"\").lower()\n",
+    "    clip_score = float(clip_result[\"score\"])\n",
+    "\n",
+    "    results.append({\n",
+    "        \"image\": img_file,\n",
+    "        \"true_label\": true_label,\n",
+    "        \"custom_pred\": custom_pred,\n",
+    "        \"custom_score\": round(custom_score, 4),\n",
+    "        \"clip_pred\": clip_pred,\n",
+    "        \"clip_score\": round(clip_score, 4),\n",
+    "        \"custom_correct\": custom_pred == true_label,\n",
+    "        \"clip_correct\": clip_pred == true_label,\n",
+    "    })\n",
+    "\n",
+    "print(\"results length:\", len(results))\n",
+    "\n",
+    "df = pd.DataFrame(results)\n",
+    "print(df)\n",
+    "print(\"columns:\", df.columns.tolist())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Custom accuracy: 1.0\n",
+      "CLIP accuracy: 1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "custom_accuracy = df[\"custom_correct\"].mean()\n",
+    "clip_accuracy = df[\"clip_correct\"].mean()\n",
+    "\n",
+    "print(\"Custom accuracy:\", round(custom_accuracy, 4))\n",
+    "print(\"CLIP accuracy:\", round(clip_accuracy, 4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saved to comparison_results.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.to_csv(\"comparison_results.csv\", index=False)\n",
+    "print(\"Saved to comparison_results.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MODEL_PATH = \"./cat-vit\"\n",
+    "IMAGE_FOLDER = \"./Cats-classification-app/example_images\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from openai import OpenAI\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
+    "\n",
+    "client = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_openai_label(image_path):\n",
+    "    with open(image_path, \"rb\") as image_file:\n",
+    "        image_base64 = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+    "\n",
+    "    response = client.responses.create(\n",
+    "        model=\"gpt-4.1-mini\",\n",
+    "        input=[\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"input_text\",\n",
+    "                        \"text\": \"Classify this image as exactly one of these labels: cheetah, leopard, lion, puma, tiger. Return only one label in lowercase.\"\n",
+    "                    },\n",
+    "                    {\n",
+    "                        \"type\": \"input_image\",\n",
+    "                        \"image_url\": f\"data:image/jpeg;base64,{image_base64}\"\n",
+    "                    }\n",
+    "                ]\n",
+    "            }\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    return response.output_text.strip().lower()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_openai_label(image_path):\n",
+    "    with open(image_path, \"rb\") as image_file:\n",
+    "        image_base64 = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+    "\n",
+    "    response = client.responses.create(\n",
+    "        model=\"gpt-4.1-mini\",\n",
+    "        input=[\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"input_text\",\n",
+    "                        \"text\": \"Classify this image as exactly one of these labels: cheetah, leopard, lion, puma, tiger. Return only one label in lowercase.\"\n",
+    "                    },\n",
+    "                    {\n",
+    "                        \"type\": \"input_image\",\n",
+    "                        \"image_url\": f\"data:image/jpeg;base64,{image_base64}\"\n",
+    "                    }\n",
+    "                ]\n",
+    "            }\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    return response.output_text.strip().lower()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>image</th>\n",
+       "      <th>true_label</th>\n",
+       "      <th>custom_pred</th>\n",
+       "      <th>custom_score</th>\n",
+       "      <th>clip_pred</th>\n",
+       "      <th>clip_score</th>\n",
+       "      <th>openai_pred</th>\n",
+       "      <th>custom_correct</th>\n",
+       "      <th>clip_correct</th>\n",
+       "      <th>openai_correct</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Cheetah_032.jpg</td>\n",
+       "      <td>cheetah</td>\n",
+       "      <td>cheetah</td>\n",
+       "      <td>0.5264</td>\n",
+       "      <td>cheetah</td>\n",
+       "      <td>0.8319</td>\n",
+       "      <td>ERROR: name 'base64' is not defined</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Leopard_001.jpg</td>\n",
+       "      <td>leopard</td>\n",
+       "      <td>leopard</td>\n",
+       "      <td>0.5127</td>\n",
+       "      <td>leopard</td>\n",
+       "      <td>0.9232</td>\n",
+       "      <td>ERROR: name 'base64' is not defined</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Lion_003.jpg</td>\n",
+       "      <td>lion</td>\n",
+       "      <td>lion</td>\n",
+       "      <td>0.5408</td>\n",
+       "      <td>lion</td>\n",
+       "      <td>0.9949</td>\n",
+       "      <td>ERROR: name 'base64' is not defined</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Puma_001.jpg</td>\n",
+       "      <td>puma</td>\n",
+       "      <td>puma</td>\n",
+       "      <td>0.6112</td>\n",
+       "      <td>puma</td>\n",
+       "      <td>0.9986</td>\n",
+       "      <td>ERROR: name 'base64' is not defined</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Tiger_001.jpg</td>\n",
+       "      <td>tiger</td>\n",
+       "      <td>tiger</td>\n",
+       "      <td>0.6976</td>\n",
+       "      <td>tiger</td>\n",
+       "      <td>0.9892</td>\n",
+       "      <td>ERROR: name 'base64' is not defined</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             image true_label custom_pred  custom_score clip_pred  clip_score  \\\n",
+       "0  Cheetah_032.jpg    cheetah     cheetah        0.5264   cheetah      0.8319   \n",
+       "1  Leopard_001.jpg    leopard     leopard        0.5127   leopard      0.9232   \n",
+       "2     Lion_003.jpg       lion        lion        0.5408      lion      0.9949   \n",
+       "3     Puma_001.jpg       puma        puma        0.6112      puma      0.9986   \n",
+       "4    Tiger_001.jpg      tiger       tiger        0.6976     tiger      0.9892   \n",
+       "\n",
+       "                           openai_pred  custom_correct  clip_correct  \\\n",
+       "0  ERROR: name 'base64' is not defined            True          True   \n",
+       "1  ERROR: name 'base64' is not defined            True          True   \n",
+       "2  ERROR: name 'base64' is not defined            True          True   \n",
+       "3  ERROR: name 'base64' is not defined            True          True   \n",
+       "4  ERROR: name 'base64' is not defined            True          True   \n",
+       "\n",
+       "   openai_correct  \n",
+       "0           False  \n",
+       "1           False  \n",
+       "2           False  \n",
+       "3           False  \n",
+       "4           False  "
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results = []\n",
+    "\n",
+    "image_files = sorted([\n",
+    "    f for f in os.listdir(IMAGE_FOLDER)\n",
+    "    if f.lower().endswith((\".jpg\", \".jpeg\", \".png\"))\n",
+    "])\n",
+    "\n",
+    "for img_file in image_files:\n",
+    "    image_path = os.path.join(IMAGE_FOLDER, img_file)\n",
+    "    image = Image.open(image_path).convert(\"RGB\")\n",
+    "    true_label = get_true_label(img_file)\n",
+    "\n",
+    "    # Custom model\n",
+    "    custom_result = custom_model(image)[0]\n",
+    "    custom_pred = custom_result[\"label\"].lower()\n",
+    "    custom_score = float(custom_result[\"score\"])\n",
+    "\n",
+    "    # CLIP model\n",
+    "    clip_result = clip_model(image, candidate_labels=clip_labels)[0]\n",
+    "    clip_pred = clip_result[\"label\"].replace(\"a photo of a \", \"\").lower()\n",
+    "    clip_score = float(clip_result[\"score\"])\n",
+    "\n",
+    "    # OpenAI model\n",
+    "    try:\n",
+    "        openai_pred = predict_openai_label(image_path)\n",
+    "        openai_correct = openai_pred == true_label\n",
+    "    except Exception as e:\n",
+    "        openai_pred = f\"ERROR: {e}\"\n",
+    "        openai_correct = False\n",
+    "\n",
+    "    results.append({\n",
+    "        \"image\": img_file,\n",
+    "        \"true_label\": true_label,\n",
+    "        \"custom_pred\": custom_pred,\n",
+    "        \"custom_score\": round(custom_score, 4),\n",
+    "        \"clip_pred\": clip_pred,\n",
+    "        \"clip_score\": round(clip_score, 4),\n",
+    "        \"openai_pred\": openai_pred,\n",
+    "        \"custom_correct\": custom_pred == true_label,\n",
+    "        \"clip_correct\": clip_pred == true_label,\n",
+    "        \"openai_correct\": openai_correct,\n",
+    "    })\n",
+    "\n",
+    "df = pd.DataFrame(results)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Custom accuracy: 1.0\n",
+      "CLIP accuracy: 1.0\n",
+      "OpenAI accuracy: 0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "custom_accuracy = df[\"custom_correct\"].mean()\n",
+    "clip_accuracy = df[\"clip_correct\"].mean()\n",
+    "openai_accuracy = df[\"openai_correct\"].mean()\n",
+    "\n",
+    "print(\"Custom accuracy:\", round(custom_accuracy, 4))\n",
+    "print(\"CLIP accuracy:\", round(clip_accuracy, 4))\n",
+    "print(\"OpenAI accuracy:\", round(openai_accuracy, 4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saved to ../comparison_results_with_openai.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.to_csv(\"../comparison_results_with_openai.csv\", index=False)\n",
+    "print(\"Saved to ../comparison_results_with_openai.csv\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+transformers
+torch
+torchvision
+datasets
+evaluate
+accelerate
+scikit-learn
+pillow
+gradio
+openai
+huggingface_hub

train_cat_vit.ipynb ADDED Viewed

	@@ -0,0 +1,842 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a0c0c143",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "[notice] A new release of pip is available: 25.3 -> 26.0.1\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: matplotlib in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (3.10.8)\n",
+      "Requirement already satisfied: ipywidgets in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (8.1.8)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from matplotlib) (1.3.3)\n",
+      "Requirement already satisfied: cycler>=0.10 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from matplotlib) (0.12.1)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from matplotlib) (4.62.1)\n",
+      "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from matplotlib) (1.5.0)\n",
+      "Requirement already satisfied: numpy>=1.23 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from matplotlib) (2.4.2)\n",
+      "Requirement already satisfied: packaging>=20.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from matplotlib) (26.0)\n",
+      "Requirement already satisfied: pillow>=8 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from matplotlib) (12.1.1)\n",
+      "Requirement already satisfied: pyparsing>=3 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from matplotlib) (3.3.2)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from matplotlib) (2.9.0.post0)\n",
+      "Requirement already satisfied: comm>=0.1.3 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipywidgets) (0.2.3)\n",
+      "Requirement already satisfied: ipython>=6.1.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipywidgets) (9.11.0)\n",
+      "Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipywidgets) (5.14.3)\n",
+      "Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from ipywidgets) (4.0.15)\n",
+      "Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\kathe\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from ipywidgets) (3.0.16)\n",
+      "Requirement already satisfied: colorama>=0.4.4 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
+      "Requirement already satisfied: decorator>=5.1.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
+      "Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
+      "Requirement already satisfied: jedi>=0.18.2 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
+      "Requirement already satisfied: matplotlib-inline>=0.1.6 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.1)\n",
+      "Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n",
+      "Requirement already satisfied: pygments>=2.14.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
+      "Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
+      "Requirement already satisfied: wcwidth in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.6.0)\n",
+      "Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from jedi>=0.18.2->ipython>=6.1.0->ipywidgets) (0.8.6)\n",
+      "Requirement already satisfied: six>=1.5 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
+      "Requirement already satisfied: executing>=1.2.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (2.2.1)\n",
+      "Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (3.0.1)\n",
+      "Requirement already satisfied: pure-eval in c:\\users\\kathe\\appdata\\roaming\\python\\python314\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "5.5.0\n",
+      "1.13.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Install packages\n",
+    "%pip install matplotlib ipywidgets\n",
+    "\n",
+    "# Imports\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import torch\n",
+    "\n",
+    "from datasets import load_dataset, DatasetDict\n",
+    "from transformers import AutoImageProcessor, ViTForImageClassification\n",
+    "from transformers import Trainer, TrainingArguments\n",
+    "\n",
+    "import evaluate\n",
+    "import transformers\n",
+    "import accelerate\n",
+    "\n",
+    "\n",
+    "print(transformers.__version__)\n",
+    "print(accelerate.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "3e3aa822",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "339917e702894b88b0e14dd328b3c811",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Resolving data files:   0%|          | 0/241 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['image', 'label'],\n",
+       "        num_rows: 241\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Dataset laden\n",
+    "dataset = load_dataset(\"imagefolder\", data_dir=\"Cats\")\n",
+    "dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "63ecc9fb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Label names: ['Cheetah', 'Leopard', 'Lion', 'Puma', 'Tiger']\n",
+      "Label ids: [0, 1, 2, 3, 4]\n",
+      "Number of classes: 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Labels prüfen\n",
+    "label_names = dataset[\"train\"].features[\"label\"].names\n",
+    "labels = dataset[\"train\"].unique(\"label\")\n",
+    "\n",
+    "print(\"Label names:\", label_names)\n",
+    "print(\"Label ids:\", labels)\n",
+    "print(\"Number of classes:\", len(label_names))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "bb4293e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['image', 'label'],\n",
+       "        num_rows: 192\n",
+       "    })\n",
+       "    validation: Dataset({\n",
+       "        features: ['image', 'label'],\n",
+       "        num_rows: 24\n",
+       "    })\n",
+       "    test: Dataset({\n",
+       "        features: ['image', 'label'],\n",
+       "        num_rows: 25\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Train / Validation / Test splitten\n",
+    "split_dataset = dataset[\"train\"].train_test_split(test_size=0.2, seed=42)\n",
+    "eval_dataset = split_dataset[\"test\"].train_test_split(test_size=0.5, seed=42)\n",
+    "\n",
+    "our_dataset = DatasetDict({\n",
+    "    \"train\": split_dataset[\"train\"],\n",
+    "    \"validation\": eval_dataset[\"train\"],\n",
+    "    \"test\": eval_dataset[\"test\"]\n",
+    "})\n",
+    "\n",
+    "our_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a5d24190",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'Cheetah': '0', 'Leopard': '1', 'Lion': '2', 'Puma': '3', 'Tiger': '4'}\n",
+      "{'0': 'Cheetah', '1': 'Leopard', '2': 'Lion', '3': 'Puma', '4': 'Tiger'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Label-Mappings\n",
+    "label2id = {label: str(i) for i, label in enumerate(label_names)}\n",
+    "id2label = {str(i): label for i, label in enumerate(label_names)}\n",
+    "\n",
+    "print(label2id)\n",
+    "print(id2label)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "dc887218",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ViTImageProcessor {\n",
+       "  \"do_normalize\": true,\n",
+       "  \"do_rescale\": true,\n",
+       "  \"do_resize\": true,\n",
+       "  \"image_mean\": [\n",
+       "    0.5,\n",
+       "    0.5,\n",
+       "    0.5\n",
+       "  ],\n",
+       "  \"image_processor_type\": \"ViTImageProcessor\",\n",
+       "  \"image_std\": [\n",
+       "    0.5,\n",
+       "    0.5,\n",
+       "    0.5\n",
+       "  ],\n",
+       "  \"resample\": 2,\n",
+       "  \"rescale_factor\": 0.00392156862745098,\n",
+       "  \"size\": {\n",
+       "    \"height\": 224,\n",
+       "    \"width\": 224\n",
+       "  }\n",
+       "}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Image Processor\n",
+    "processor = AutoImageProcessor.from_pretrained(\"google/vit-base-patch16-224\")\n",
+    "processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ac8ed1d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Transforms\n",
+    "def transforms(batch):\n",
+    "    images = [img.convert(\"RGB\") for img in batch[\"image\"]]\n",
+    "    inputs = processor(images, return_tensors=\"pt\")\n",
+    "    inputs[\"labels\"] = batch[\"label\"]\n",
+    "    return inputs\n",
+    "\n",
+    "processed_dataset = our_dataset.with_transform(transforms)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b566cf1c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Collate Function\n",
+    "def collate_fn(batch):\n",
+    "    return {\n",
+    "        \"pixel_values\": torch.stack([x[\"pixel_values\"] for x in batch]),\n",
+    "        \"labels\": torch.tensor([x[\"labels\"] for x in batch])\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "3e90e19f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1a0b545f019c4a05a49c5b72a517da66",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading builder script: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "483365e5f75c48c9a24e88e1d9a05ef6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading builder script: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "189c19a1212f46ab95b7e769b441e2d1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading builder script: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#Metriken\n",
+    "accuracy_metric = evaluate.load(\"accuracy\")\n",
+    "precision_metric = evaluate.load(\"precision\")\n",
+    "recall_metric = evaluate.load(\"recall\")\n",
+    "f1_metric = evaluate.load(\"f1\")\n",
+    "\n",
+    "def compute_metrics(eval_pred):\n",
+    "    logits, labels = eval_pred\n",
+    "    predictions = np.argmax(logits, axis=1)\n",
+    "\n",
+    "    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)[\"accuracy\"]\n",
+    "    precision = precision_metric.compute(predictions=predictions, references=labels, average=\"weighted\")[\"precision\"]\n",
+    "    recall = recall_metric.compute(predictions=predictions, references=labels, average=\"weighted\")[\"recall\"]\n",
+    "    f1 = f1_metric.compute(predictions=predictions, references=labels, average=\"weighted\")[\"f1\"]\n",
+    "\n",
+    "    return {\n",
+    "        \"accuracy\": accuracy,\n",
+    "        \"precision\": precision,\n",
+    "        \"recall\": recall,\n",
+    "        \"f1\": f1\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "87f65a9b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You passed `num_labels=5` which is incompatible to the `id2label` map of length `1000`.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "63d5a7739fff49af855c2f7278a74df7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading weights:   0%|          | 0/200 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1mViTForImageClassification LOAD REPORT\u001b[0m from: google/vit-base-patch16-224\n",
+      "Key               | Status   |                                                                                          \n",
+      "------------------+----------+------------------------------------------------------------------------------------------\n",
+      "classifier.bias   | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([1000]) vs model:torch.Size([5])          \n",
+      "classifier.weight | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([1000, 768]) vs model:torch.Size([5, 768])\n",
+      "\n",
+      "Notes:\n",
+      "- MISMATCH:\tckpt weights were loaded, but they did not match the original empty weight shapes.\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Modell laden\n",
+    "model = ViTForImageClassification.from_pretrained(\n",
+    "    \"google/vit-base-patch16-224\",\n",
+    "    num_labels=len(label_names),\n",
+    "    id2label={int(k): v for k, v in id2label.items()},\n",
+    "    label2id=label2id,\n",
+    "    ignore_mismatched_sizes=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "78883db4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Backbone einfrieren\n",
+    "for name, param in model.named_parameters():\n",
+    "    if not name.startswith(\"classifier\"):\n",
+    "        param.requires_grad = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "2dc7e9f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#TrainingArguments\n",
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"./cat-vit\",\n",
+    "    per_device_train_batch_size=16,\n",
+    "    per_device_eval_batch_size=16,\n",
+    "    eval_strategy=\"epoch\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    logging_steps=20,\n",
+    "    num_train_epochs=5,\n",
+    "    learning_rate=3e-4,\n",
+    "    save_total_limit=2,\n",
+    "    remove_unused_columns=False,\n",
+    "    push_to_hub=True,\n",
+    "    load_best_model_at_end=True,\n",
+    "    metric_for_best_model=\"accuracy\",\n",
+    "    greater_is_better=True,\n",
+    "    report_to=\"none\",\n",
+    "    disable_tqdm=True,\n",
+    "    run_name=\"cat-vit-transfer-learning\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "1e3d4feb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Trainer\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=processed_dataset[\"train\"],\n",
+    "    eval_dataset=processed_dataset[\"validation\"],\n",
+    "    data_collator=collate_fn,\n",
+    "    compute_metrics=compute_metrics,\n",
+    "    processing_class=processor\n",
+    ")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "2a8b4894",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\kathe\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.\n",
+      "  super().__init__(loader)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': '1.082', 'eval_accuracy': '0.875', 'eval_precision': '0.9018', 'eval_recall': '0.875', 'eval_f1': '0.8627', 'eval_runtime': '3.233', 'eval_samples_per_second': '7.423', 'eval_steps_per_second': '0.619', 'epoch': '1'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "262534a235fb4b32bd6cd2ed35146c98",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\kathe\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.\n",
+      "  super().__init__(loader)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'loss': '1.151', 'grad_norm': '5.051', 'learning_rate': '0.000205', 'epoch': '1.667'}\n",
+      "{'eval_loss': '0.7125', 'eval_accuracy': '0.9167', 'eval_precision': '0.9278', 'eval_recall': '0.9167', 'eval_f1': '0.9139', 'eval_runtime': '3.441', 'eval_samples_per_second': '6.976', 'eval_steps_per_second': '0.581', 'epoch': '2'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d8e710d66cef43cea6bff1d5e03e76f5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\kathe\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.\n",
+      "  super().__init__(loader)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': '0.5354', 'eval_accuracy': '0.9167', 'eval_precision': '0.9278', 'eval_recall': '0.9167', 'eval_f1': '0.9139', 'eval_runtime': '3.425', 'eval_samples_per_second': '7.006', 'eval_steps_per_second': '0.584', 'epoch': '3'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "efcce8388767474eac235d4f294c6ed0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\kathe\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.\n",
+      "  super().__init__(loader)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'loss': '0.5336', 'grad_norm': '3.152', 'learning_rate': '0.000105', 'epoch': '3.333'}\n",
+      "{'eval_loss': '0.4571', 'eval_accuracy': '0.9167', 'eval_precision': '0.9278', 'eval_recall': '0.9167', 'eval_f1': '0.9139', 'eval_runtime': '3.065', 'eval_samples_per_second': '7.83', 'eval_steps_per_second': '0.652', 'epoch': '4'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "05f39cf4209244939ce265accaebc9bf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\kathe\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.\n",
+      "  super().__init__(loader)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'loss': '0.3465', 'grad_norm': '2.518', 'learning_rate': '5e-06', 'epoch': '5'}\n",
+      "{'eval_loss': '0.4346', 'eval_accuracy': '0.9167', 'eval_precision': '0.9219', 'eval_recall': '0.9167', 'eval_f1': '0.9139', 'eval_runtime': '3.323', 'eval_samples_per_second': '7.222', 'eval_steps_per_second': '0.602', 'epoch': '5'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d3f7a6a821604a4faa19a99e47d553a4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'train_runtime': '167.4', 'train_samples_per_second': '5.736', 'train_steps_per_second': '0.358', 'train_loss': '0.6771', 'epoch': '5'}\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\kathe\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.\n",
+      "  super().__init__(loader)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': '0.6814', 'eval_accuracy': '0.96', 'eval_precision': '0.97', 'eval_recall': '0.96', 'eval_f1': '0.96', 'eval_runtime': '3.285', 'eval_samples_per_second': '7.611', 'eval_steps_per_second': '0.609', 'epoch': '5'}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'eval_loss': 0.681404709815979,\n",
+       " 'eval_accuracy': 0.96,\n",
+       " 'eval_precision': 0.97,\n",
+       " 'eval_recall': 0.96,\n",
+       " 'eval_f1': 0.96,\n",
+       " 'eval_runtime': 3.2846,\n",
+       " 'eval_samples_per_second': 7.611,\n",
+       " 'eval_steps_per_second': 0.609,\n",
+       " 'epoch': 5.0}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Trainieren\n",
+    "trainer.train()\n",
+    "test_results = trainer.evaluate(processed_dataset[\"test\"])\n",
+    "test_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "026d1a8f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Test Evaluation !ReadME!\n",
+    "#trainer.evaluate(processed_dataset['test'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "ca5b4010",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e12f1e64b3a2438f8479118470a17561",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2e77b9cf34c34a489fc01d7137cb6714",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "96a93af3c1974290a0439d3d6089b7b5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Processing Files (0 / 0): |          |  0.00B /  0.00B            "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "81545cef38fe4403ab0c287fffaf85de",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "New Data Upload: |          |  0.00B /  0.00B            "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dcc7dd6286f348ceaa474c41639d3fa1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8be396ac33304e8aa6d04654792985b9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Processing Files (0 / 0): |          |  0.00B /  0.00B            "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ce487973666a465bb4f9111693e49fea",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "New Data Upload: |          |  0.00B /  0.00B            "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/DKatheesrupan/cat-vit/commit/05e008b778df7e8e7dcbab9ef293490315c2609a', commit_message='cat-vit-classifier', commit_description='', oid='05e008b778df7e8e7dcbab9ef293490315c2609a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/DKatheesrupan/cat-vit', endpoint='https://huggingface.co', repo_type='model', repo_id='DKatheesrupan/cat-vit'), pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Modell pushen\n",
+    "kwargs = {\n",
+    "    \"finetuned_from\": \"google/vit-base-patch16-224\",\n",
+    "    \"dataset\": \"custom cat dataset\",\n",
+    "    \"tasks\": \"image-classification\",\n",
+    "    \"tags\": [\"image-classification\", \"vision-transformer\", \"cats\"]\n",
+    "}\n",
+    "trainer.save_model()\n",
+    "trainer.push_to_hub(\"cat-vit-classifier\", **kwargs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}