Spaces:
Sleeping
Sleeping
File size: 4,632 Bytes
9dc317c 6883ad5 9dc317c 1fb7b02 9dc317c 1fb7b02 9dc317c 6883ad5 9dc317c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import torchvision
import torch
import os
from torch import nn, Tensor
import torch.nn.functional as F
import cv2
from PIL import Image
import numpy as np
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class Interpreter(nn.Module):
def __init__(self,
class_count:int,
sample_yolo_output,
device,
):
super().__init__()
c = 32
self.train()
self._conv1 = nn.Conv2d(in_channels= 3, out_channels= 2*c, kernel_size=5, padding=2)
self._conv2 = nn.Conv2d(in_channels= 2*c, out_channels= 4*c, kernel_size=5, padding=2)
self._conv3 = nn.Conv2d(in_channels= 4*c, out_channels= 8*c, kernel_size=5, padding=2)
self._conv4 = nn.Conv2d(in_channels= 8*c, out_channels=16*c, kernel_size=3, padding=1)
self._conv5 = nn.Conv2d(in_channels=16*c, out_channels=32*c, kernel_size=3, padding=1)
self._conv6 = nn.Conv2d(in_channels=32*c, out_channels=64*c, kernel_size=3, padding=1)
self._linear_size = self.calc_linear(sample_yolo_output)
print(self._linear_size)
self._fc1 = nn.Linear(self._linear_size,512)
self._fc2 = nn.Linear(512, class_count)
self.to(device)
self.device = device
self.training = True
self.train()
def calc_linear(self, sample_yolo_output) -> int:
x = self.convs(sample_yolo_output.to('cpu'))
return x.shape[-1]
def convs(self, x:Tensor) -> Tensor:
x = F.max_pool2d(F.relu(self._conv1(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv2(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv3(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv4(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv5(x)), (2,2))
x = F.max_pool2d(F.relu(self._conv6(x)), (2,2))
x = torch.flatten(x,1)
return x
def fc(self, x:Tensor) -> Tensor:
x = F.relu(self._fc1(x))
# x = F.relu(self._fc2(x))
x = self._fc2(x)
return x
def forward(self, x:list[Tensor]) -> Tensor:
x = self.convs(x)
x = self.fc(x)
return x
import patchify
from torchvision import transforms
class CNN_Model(nn.Module):
def __init__(self,
image_size: tuple[int,int],
interpreter: Interpreter,
):
super().__init__()
self.device = interpreter.device
self.image_size = image_size
self.interpreter = interpreter
def predict(self, img_path:str) -> Tensor:
img = cv2.imread(img_path)
img = Image.fromarray(img)
img = transforms.ToTensor()(img)
img = torchvision.transforms.Resize(self.image_size)(img)
img = img[None]
img = img.to(self.device)
preds = self.forward(img)
_, preds = torch.max(preds,1)
return preds
def forward(self, x:Tensor) -> Tensor:
x = self.interpreter(x)
return x
def predict_large_image(self,
img: np.ndarray,
patch_size:int = 816,
) -> Tensor:
L = patch_size
patches = patchify.patchify(img,(L,L,3),L)
w,h,_ = patches.shape[:3]
patches = patches.reshape(w*h,*patches.shape[3:]).transpose((0,3,1,2))
patches = torch.from_numpy(patches)
patches = patches.float() / 255
patches = transforms.Resize(self.image_size)(patches)
patches = patches.to(self.device)
preds = self.forward(patches)
_, preds = torch.max(preds,1)
ratios = preds
preds = torch.mode(preds, 0).values
return ratios, preds
class_count = 41
def build_interpreter(img_size=(640,640),
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
) -> Interpreter:
img_size = list(img_size)
x = torch.randn([3]+img_size).view([-1,3]+img_size).to(device)
return Interpreter(class_count=class_count, sample_yolo_output=x, device=device)
def build_model(img_size = (640,640),
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
) -> CNN_Model:
return CNN_Model(image_size=img_size,
interpreter=build_interpreter(img_size, device))
if __name__ == "__main__":
model = build_model(img_size=(320,320))
DATA_DIR = "data/image/test"
dir = os.listdir(DATA_DIR)[0]
img_name = os.listdir(f"{DATA_DIR}/{dir}")[0]
img_path = f"{DATA_DIR}/{dir}/{img_name}"
out = model.predict_large_image(img_path)
print(out) |