Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| class NosePointRegressor(nn.Module): | |
| def __init__(self, input_channels=1): | |
| super(NosePointRegressor, self).__init__() | |
| self.encoder = nn.Sequential( | |
| nn.Conv2d(input_channels, 16, kernel_size=3, stride=2, padding=1), # -> [B, 16, H/2, W/2] | |
| nn.ReLU(), | |
| nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), # -> [B, 32, H/4, W/4] | |
| nn.ReLU(), | |
| nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # -> [B, 64, H/8, W/8] | |
| nn.ReLU(), | |
| nn.AdaptiveAvgPool2d((1, 1)), # -> [B, 64, 1, 1] | |
| ) | |
| self.fc = nn.Sequential( | |
| nn.Flatten(), | |
| nn.Linear(64, 32), | |
| nn.ReLU(), | |
| nn.Linear(32, 2), # Predict (x, y) coordinate | |
| nn.Sigmoid() # Normalize output to [0, 1] | |
| ) | |
| def forward(self, x): | |
| x = self.encoder(x) | |
| x = self.fc(x) | |
| return x # shape [B, 2], where values are in [0, 1] | |
| import torchvision.models as models | |
| import torch.nn as nn | |
| class ResNetNoseRegressor(nn.Module): | |
| def __init__(self, pretrained=True): | |
| super().__init__() | |
| resnet = models.resnet18(pretrained=pretrained) | |
| self.backbone = nn.Sequential(*list(resnet.children())[:-2]) # Remove last FC layers | |
| self.pool = nn.AdaptiveAvgPool2d((1, 1)) | |
| self.head = nn.Sequential( | |
| nn.Flatten(), | |
| nn.Linear(512, 128), | |
| nn.ReLU(), | |
| nn.Linear(128, 2), | |
| nn.Sigmoid() # Normalized (x, y) | |
| ) | |
| def forward(self, x): | |
| x = self.backbone(x) | |
| x = self.pool(x) | |
| return self.head(x) | |