Spaces:
Sleeping
Sleeping
import torchvision | |
import torch | |
import os | |
from torch import nn, Tensor | |
import torch.nn.functional as F | |
import cv2 | |
from PIL import Image | |
import numpy as np | |
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
class Interpreter(nn.Module): | |
def __init__(self, | |
class_count:int, | |
sample_yolo_output, | |
device, | |
): | |
super().__init__() | |
c = 32 | |
self.train() | |
self._conv1 = nn.Conv2d(in_channels= 3, out_channels= 2*c, kernel_size=5, padding=2) | |
self._conv2 = nn.Conv2d(in_channels= 2*c, out_channels= 4*c, kernel_size=5, padding=2) | |
self._conv3 = nn.Conv2d(in_channels= 4*c, out_channels= 8*c, kernel_size=5, padding=2) | |
self._conv4 = nn.Conv2d(in_channels= 8*c, out_channels=16*c, kernel_size=3, padding=1) | |
self._conv5 = nn.Conv2d(in_channels=16*c, out_channels=32*c, kernel_size=3, padding=1) | |
self._conv6 = nn.Conv2d(in_channels=32*c, out_channels=64*c, kernel_size=3, padding=1) | |
self._linear_size = self.calc_linear(sample_yolo_output) | |
print(self._linear_size) | |
self._fc1 = nn.Linear(self._linear_size,512) | |
self._fc2 = nn.Linear(512, class_count) | |
self.to(device) | |
self.device = device | |
self.training = True | |
self.train() | |
def calc_linear(self, sample_yolo_output) -> int: | |
x = self.convs(sample_yolo_output.to('cpu')) | |
return x.shape[-1] | |
def convs(self, x:Tensor) -> Tensor: | |
x = F.max_pool2d(F.relu(self._conv1(x)), (2,2)) | |
x = F.max_pool2d(F.relu(self._conv2(x)), (2,2)) | |
x = F.max_pool2d(F.relu(self._conv3(x)), (2,2)) | |
x = F.max_pool2d(F.relu(self._conv4(x)), (2,2)) | |
x = F.max_pool2d(F.relu(self._conv5(x)), (2,2)) | |
x = F.max_pool2d(F.relu(self._conv6(x)), (2,2)) | |
x = torch.flatten(x,1) | |
return x | |
def fc(self, x:Tensor) -> Tensor: | |
x = F.relu(self._fc1(x)) | |
# x = F.relu(self._fc2(x)) | |
x = self._fc2(x) | |
return x | |
def forward(self, x:list[Tensor]) -> Tensor: | |
x = self.convs(x) | |
x = self.fc(x) | |
return x | |
import patchify | |
from torchvision import transforms | |
class CNN_Model(nn.Module): | |
def __init__(self, | |
image_size: tuple[int,int], | |
interpreter: Interpreter, | |
): | |
super().__init__() | |
self.device = interpreter.device | |
self.image_size = image_size | |
self.interpreter = interpreter | |
def predict(self, img_path:str) -> Tensor: | |
img = cv2.imread(img_path) | |
img = Image.fromarray(img) | |
img = transforms.ToTensor()(img) | |
img = torchvision.transforms.Resize(self.image_size)(img) | |
img = img[None] | |
img = img.to(self.device) | |
preds = self.forward(img) | |
_, preds = torch.max(preds,1) | |
return preds | |
def forward(self, x:Tensor) -> Tensor: | |
x = self.interpreter(x) | |
return x | |
def predict_large_image(self, | |
img: np.ndarray, | |
patch_size:int = 816, | |
) -> Tensor: | |
L = patch_size | |
patches = patchify.patchify(img,(L,L,3),L) | |
w,h,_ = patches.shape[:3] | |
patches = patches.reshape(w*h,*patches.shape[3:]).transpose((0,3,1,2)) | |
patches = torch.from_numpy(patches) | |
patches = patches.float() / 255 | |
patches = transforms.Resize(self.image_size)(patches) | |
patches = patches.to(self.device) | |
preds = self.forward(patches) | |
_, preds = torch.max(preds,1) | |
ratios = preds | |
preds = torch.mode(preds, 0).values | |
return ratios, preds | |
class_count = 41 | |
def build_interpreter(img_size=(640,640), | |
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
) -> Interpreter: | |
img_size = list(img_size) | |
x = torch.randn([3]+img_size).view([-1,3]+img_size).to(device) | |
return Interpreter(class_count=class_count, sample_yolo_output=x, device=device) | |
def build_model(img_size = (640,640), | |
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
) -> CNN_Model: | |
return CNN_Model(image_size=img_size, | |
interpreter=build_interpreter(img_size, device)) | |
if __name__ == "__main__": | |
model = build_model(img_size=(320,320)) | |
DATA_DIR = "data/image/test" | |
dir = os.listdir(DATA_DIR)[0] | |
img_name = os.listdir(f"{DATA_DIR}/{dir}")[0] | |
img_path = f"{DATA_DIR}/{dir}/{img_name}" | |
out = model.predict_large_image(img_path) | |
print(out) |