swiss-landmarks-cnn
Custom CNN for classifying 6 Swiss landmarks. Trained as part of an AI Applications course (HS24).
App: https://huggingface.co/spaces/eceleo/swiss-landmarks
Classes
| Key | Label |
|---|---|
matterhorn |
Matterhorn |
chapel_bridge |
Chapel Bridge, Lucerne |
zurich_oldtown |
Zurich Old Town |
rhine_falls |
Rhine Falls |
bern_oldtown |
Bern Old Town |
lake_geneva |
Lake Geneva |
Architecture
Custom LandmarkNet — MobileNet-style depthwise separable CNN, ~135k parameters, input 96×96 px.
Training
- Dataset: 360 images (1 real photo per class, augmented ×60)
- Optimizer: AdamW, lr=2e-3
- Epochs: 25
- Val accuracy: 100%
Usage
import torch, json, io
from torch import nn
from torchvision import transforms
from PIL import Image
classes = json.load(open("classes.json"))
class DWConv(nn.Module):
def __init__(self, ci, co, s=1):
super().__init__()
self.net = nn.Sequential(
nn.Conv2d(ci,ci,3,stride=s,padding=1,groups=ci,bias=False),
nn.Conv2d(ci,co,1,bias=False), nn.BatchNorm2d(co), nn.ReLU6())
def forward(self, x): return self.net(x)
class LandmarkNet(nn.Module):
def __init__(self, n):
super().__init__()
self.net = nn.Sequential(
nn.Conv2d(3,32,3,stride=2,padding=1,bias=False), nn.BatchNorm2d(32), nn.ReLU6(),
DWConv(32,64), DWConv(64,128,2), DWConv(128,128),
DWConv(128,256,2), DWConv(256,256),
nn.AdaptiveAvgPool2d(1), nn.Flatten(),
nn.Dropout(0.35), nn.Linear(256, n))
def forward(self, x): return self.net(x)
model = LandmarkNet(len(classes))
model.load_state_dict(torch.load("model.pth", map_location="cpu", weights_only=False))
model.eval()
tf = transforms.Compose([
transforms.Resize((96, 96)),
transforms.ToTensor(),
transforms.Normalize([0.5]*3, [0.5]*3),
])
img = Image.open("your_image.jpg").convert("RGB")
with torch.no_grad():
probs = torch.softmax(model(tf(img).unsqueeze(0))[0], 0)
print(classes[probs.argmax()], f"{probs.max()*100:.1f}%")