Timber-identification-CNN / S1_CNN_Model.py
Yapp99's picture
Prediction includes rations
1fb7b02
import torchvision
import torch
import os
from torch import nn, Tensor
import torch.nn.functional as F
import cv2
from PIL import Image
import numpy as np
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class Interpreter(nn.Module):
def __init__(self,
class_count:int,
sample_yolo_output,
device,
):
super().__init__()
c = 32
self.train()
self._conv1 = nn.Conv2d(in_channels= 3, out_channels= 2*c, kernel_size=5, padding=2)
self._conv2 = nn.Conv2d(in_channels= 2*c, out_channels= 4*c, kernel_size=5, padding=2)
self._conv3 = nn.Conv2d(in_channels= 4*c, out_channels= 8*c, kernel_size=5, padding=2)
self._conv4 = nn.Conv2d(in_channels= 8*c, out_channels=16*c, kernel_size=3, padding=1)
self._conv5 = nn.Conv2d(in_channels=16*c, out_channels=32*c, kernel_size=3, padding=1)
self._conv6 = nn.Conv2d(in_channels=32*c, out_channels=64*c, kernel_size=3, padding=1)
self._linear_size = self.calc_linear(sample_yolo_output)
print(self._linear_size)
self._fc1 = nn.Linear(self._linear_size,512)
self._fc2 = nn.Linear(512, class_count)
self.to(device)
self.device = device
self.training = True
self.train()
def calc_linear(self, sample_yolo_output) -> int:
x = self.convs(sample_yolo_output.to('cpu'))
return x.shape[-1]
def convs(self, x:Tensor) -> Tensor:
x = F.max_pool2d(F.relu(self._conv1(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv2(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv3(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv4(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv5(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv6(x)), (2,2))
x = torch.flatten(x,1)
return x
def fc(self, x:Tensor) -> Tensor:
x = F.relu(self._fc1(x))
# x = F.relu(self._fc2(x))
x = self._fc2(x)
return x
def forward(self, x:list[Tensor]) -> Tensor:
x = self.convs(x)
x = self.fc(x)
return x
import patchify
from torchvision import transforms
class CNN_Model(nn.Module):
def __init__(self,
image_size: tuple[int,int],
interpreter: Interpreter,
):
super().__init__()
self.device = interpreter.device
self.image_size = image_size
self.interpreter = interpreter
def predict(self, img_path:str) -> Tensor:
img = cv2.imread(img_path)
img = Image.fromarray(img)
img = transforms.ToTensor()(img)
img = torchvision.transforms.Resize(self.image_size)(img)
img = img[None]
img = img.to(self.device)
preds = self.forward(img)
_, preds = torch.max(preds,1)
return preds
def forward(self, x:Tensor) -> Tensor:
x = self.interpreter(x)
return x
def predict_large_image(self,
img: np.ndarray,
patch_size:int = 816,
) -> Tensor:
L = patch_size
patches = patchify.patchify(img,(L,L,3),L)
w,h,_ = patches.shape[:3]
patches = patches.reshape(w*h,*patches.shape[3:]).transpose((0,3,1,2))
patches = torch.from_numpy(patches)
patches = patches.float() / 255
patches = transforms.Resize(self.image_size)(patches)
patches = patches.to(self.device)
preds = self.forward(patches)
_, preds = torch.max(preds,1)
ratios = preds
preds = torch.mode(preds, 0).values
return ratios, preds
class_count = 41
def build_interpreter(img_size=(640,640),
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
) -> Interpreter:
img_size = list(img_size)
x = torch.randn([3]+img_size).view([-1,3]+img_size).to(device)
return Interpreter(class_count=class_count, sample_yolo_output=x, device=device)
def build_model(img_size = (640,640),
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
) -> CNN_Model:
return CNN_Model(image_size=img_size,
interpreter=build_interpreter(img_size, device))
if __name__ == "__main__":
model = build_model(img_size=(320,320))
DATA_DIR = "data/image/test"
dir = os.listdir(DATA_DIR)[0]
img_name = os.listdir(f"{DATA_DIR}/{dir}")[0]
img_path = f"{DATA_DIR}/{dir}/{img_name}"
out = model.predict_large_image(img_path)
print(out)