Spaces:

Yapp99
/

Timber-identification-CNN

Sleeping

File size: 4,632 Bytes

import torchvision
import torch
import os
from torch import nn, Tensor
import torch.nn.functional as F
import cv2
from PIL import Image
import numpy as np

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Interpreter(nn.Module):
    def __init__(self, 
                 class_count:int,
                 sample_yolo_output,
                 device,
                ):
        super().__init__()

        c = 32

        self.train()
        self._conv1 = nn.Conv2d(in_channels= 3,   out_channels= 2*c,  kernel_size=5, padding=2)
        self._conv2 = nn.Conv2d(in_channels= 2*c, out_channels= 4*c,  kernel_size=5, padding=2)
        self._conv3 = nn.Conv2d(in_channels= 4*c, out_channels= 8*c,  kernel_size=5, padding=2)
        self._conv4 = nn.Conv2d(in_channels= 8*c, out_channels=16*c,  kernel_size=3, padding=1)
        self._conv5 = nn.Conv2d(in_channels=16*c, out_channels=32*c,  kernel_size=3, padding=1)
        self._conv6 = nn.Conv2d(in_channels=32*c, out_channels=64*c,  kernel_size=3, padding=1)

        self._linear_size = self.calc_linear(sample_yolo_output)
        print(self._linear_size)

        self._fc1 = nn.Linear(self._linear_size,512)
        self._fc2 = nn.Linear(512, class_count)
        
        self.to(device)
        self.device = device
        self.training = True
        self.train()

    def calc_linear(self, sample_yolo_output) -> int:
        x = self.convs(sample_yolo_output.to('cpu'))
        return x.shape[-1]

    def convs(self, x:Tensor) -> Tensor:
        x = F.max_pool2d(F.relu(self._conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self._conv2(x)), (2,2))        
        x = F.max_pool2d(F.relu(self._conv3(x)), (2,2))
        x = F.max_pool2d(F.relu(self._conv4(x)), (2,2))
        x = F.max_pool2d(F.relu(self._conv5(x)), (2,2))
        x = F.max_pool2d(F.relu(self._conv6(x)), (2,2))
        x = torch.flatten(x,1)
        return x
    
    def fc(self, x:Tensor) -> Tensor:
        x = F.relu(self._fc1(x))
        # x = F.relu(self._fc2(x))
        x = self._fc2(x)
        return x

    def forward(self, x:list[Tensor]) -> Tensor:
        x = self.convs(x)
        x = self.fc(x)
        return x

import patchify
from torchvision import transforms

class CNN_Model(nn.Module):
    def __init__(self,
                 image_size: tuple[int,int],
                 interpreter: Interpreter,
    ):
        super().__init__()
        self.device = interpreter.device
        self.image_size = image_size
        self.interpreter = interpreter

    def predict(self, img_path:str) -> Tensor:
        img = cv2.imread(img_path)
        img = Image.fromarray(img)
        img = transforms.ToTensor()(img)
        img = torchvision.transforms.Resize(self.image_size)(img)
        img = img[None]
        img = img.to(self.device)

        preds = self.forward(img)
        _, preds = torch.max(preds,1)
        return preds
    
    def forward(self, x:Tensor) -> Tensor:
        x = self.interpreter(x)
        return x

    def predict_large_image(self, 
                   img: np.ndarray,
                   patch_size:int = 816,
        ) -> Tensor:
        
        L = patch_size
        patches = patchify.patchify(img,(L,L,3),L)
        w,h,_ = patches.shape[:3]
        patches = patches.reshape(w*h,*patches.shape[3:]).transpose((0,3,1,2))

        patches = torch.from_numpy(patches)

        patches = patches.float() / 255
        patches = transforms.Resize(self.image_size)(patches)
        patches = patches.to(self.device)

        preds = self.forward(patches)
        _, preds = torch.max(preds,1)

        ratios = preds
        preds = torch.mode(preds, 0).values
        
        return ratios, preds
    
class_count = 41

def build_interpreter(img_size=(640,640), 
                      device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    ) -> Interpreter:
    img_size = list(img_size)

    x = torch.randn([3]+img_size).view([-1,3]+img_size).to(device)
        
    return Interpreter(class_count=class_count, sample_yolo_output=x, device=device)

def build_model(img_size = (640,640),
                device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    ) -> CNN_Model:
    return CNN_Model(image_size=img_size, 
                     interpreter=build_interpreter(img_size, device))

if __name__ == "__main__":
    model = build_model(img_size=(320,320))
    DATA_DIR = "data/image/test"
    dir = os.listdir(DATA_DIR)[0]
    img_name = os.listdir(f"{DATA_DIR}/{dir}")[0]
    img_path = f"{DATA_DIR}/{dir}/{img_name}"
    
    out = model.predict_large_image(img_path)
    print(out)