import gradio as gr import torch.nn as nn import math import torch.utils.model_zoo as model_zoo import torch import torch.nn.functional as F __all__ = ['Res2Net', 'res2net50_v1b', 'res2net101_v1b'] model_urls = { 'res2net50_v1b_26w_4s': 'https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net50_v1b_26w_4s-3cf99910.pth', 'res2net101_v1b_26w_4s': 'https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth', } class Bottle2neck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale = 4, stype='normal'): """ Constructor Args: inplanes: input channel dimensionality planes: output channel dimensionality stride: conv stride. Replaces pooling layer. downsample: None when stride = 1 baseWidth: basic width of conv3x3 scale: number of scale. type: 'normal': normal set. 'stage': first block of a new stage. """ super(Bottle2neck, self).__init__() width = int(math.floor(planes * (baseWidth/64.0))) self.conv1 = nn.Conv2d(inplanes, width*scale, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(width*scale) if scale == 1: self.nums = 1 else: self.nums = scale -1 if stype == 'stage': self.pool = nn.AvgPool2d(kernel_size=3, stride = stride, padding=1) convs = [] bns = [] for i in range(self.nums): convs.append(nn.Conv2d(width, width, kernel_size=3, stride = stride, padding=1, bias=False)) bns.append(nn.BatchNorm2d(width)) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) self.conv3 = nn.Conv2d(width*scale, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stype = stype self.scale = scale self.width = width def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) spx = torch.split(out, self.width, 1) for i in range(self.nums): if i==0 or self.stype=='stage': sp = spx[i] else: sp = sp + spx[i] sp = self.convs[i](sp) sp = self.relu(self.bns[i](sp)) if i==0: out = sp else: out = torch.cat((out, sp), 1) if self.scale != 1 and self.stype=='normal': out = torch.cat((out, spx[self.nums]),1) elif self.scale != 1 and self.stype=='stage': out = torch.cat((out, self.pool(spx[self.nums])),1) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Res2Net(nn.Module): def __init__(self, block, layers, baseWidth = 26, scale = 4, num_classes=1000): self.inplanes = 64 super(Res2Net, self).__init__() self.baseWidth = baseWidth self.scale = scale self.conv1 = nn.Sequential( nn.Conv2d(3, 32, 3, 2, 1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.Conv2d(32, 32, 3, 1, 1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.Conv2d(32, 64, 3, 1, 1, bias=False) ) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU() self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False), nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample=downsample, stype='stage', baseWidth = self.baseWidth, scale=self.scale)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, baseWidth = self.baseWidth, scale=self.scale)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x def res2net50_v1b(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b model. Res2Net-50 refers to the Res2Net-50_v1b_26w_4s. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 6, 3], baseWidth = 26, scale = 4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net50_v1b_26w_4s'])) return model def res2net101_v1b(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 23, 3], baseWidth = 26, scale = 4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net101_v1b_26w_4s'])) return model def res2net50_v1b_26w_4s(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 6, 3], baseWidth = 26, scale = 4, **kwargs) if pretrained: model.load_state_dict(torch.load(pthfile,map_location='cpu')) #load model return model def res2net101_v1b_26w_4s(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 23, 3], baseWidth = 26, scale = 4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net101_v1b_26w_4s'])) return model def res2net152_v1b_26w_4s(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 8, 36, 3], baseWidth = 26, scale = 4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net152_v1b_26w_4s'])) return model class mutil_model(nn.Module): def __init__(self,category_num = 8): super(mutil_model, self).__init__() self.model1 = res2net50_v1b_26w_4s(pretrained=False) self.model1.fc = nn.Sequential( nn.Linear(in_features=2048, out_features=category_num, bias=True), ) self.model2 = torch.load('./enet_b2_8'+'.pt',map_location=torch.device('cpu')) self.model2.classifier = nn.Sequential( nn.Linear(in_features=1408, out_features=category_num, bias=True), ) self.fc = nn.Linear(in_features=category_num*2, out_features=category_num, bias=True) def forward(self, x): x1 = self.model1(x) x2 = self.model2(x) x = torch.cat((x1,x2),1) x = self.fc(x) return x pth_path = './chn.pt' category_num = 2 # "cuda" only when GPUs are available. device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize a model, and put it on the device specified. # 导入res2net预训练模型 #pthfile = '/cbd_lixiaogang_lixianneng/morror_art/pre_train_model/res2net50_v1b.pth' model = res2net50_v1b_26w_4s(pretrained=False) #修改全连接层,输出维度为预测 分类 #num_ftrs = model.fc.in_features # model.fc = nn.Sequential( # nn.Linear(in_features=2048, out_features=1000, bias=True), # nn.Dropout(0.5), # nn.Linear(1000, out_features=category_num) # ) model.fc = nn.Sequential( nn.Linear(in_features=2048, out_features=category_num, bias=True), ) model = model.to(device) model.device = device model.load_state_dict(torch.load(pth_path,torch.device('cpu'))) model.eval() #增加人脸识别模型 #model = mutil_model(category_num = category_num) #model_state = torch.load('./model_8_addsad.pt',map_location=torch.device('cpu')).state_dict() #model.load_state_dict(model_state) # 加载模型参数 #model.eval() labels = ['怀旧','伤感','快乐','激励','清新','浪漫','思念','其他'] import requests import torch import gradio as gr import torchvision.transforms as transforms #import cv2 #from PIL import Image # PIL #from PIL import Image # inception_net = tf.keras.applications.MobileNetV2() # load the model # Download human-readable labels for ImageNet. # response = requests.get("https://git.io/JJkYN") # labels = response.text.split("\n") print(len(labels)) def classify_image(inp): # inp = inp.convert('RGB') # inp = Image.fromarray(inp.astype('uint8'), 'RGB') transform_test = transforms.Compose([ # transforms.ToPILImage(), transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) inp = transform_test(inp) print(inp) with torch.no_grad(): prediction = model(torch.unsqueeze(inp, 0)).flatten() print(prediction) prediction = torch.nn.Softmax(dim=0)(prediction) print(prediction) return {labels[i]: float(prediction[i].item()) for i in range(len(labels))} # print(classify_image("/jj.jpg")) # image = gr.inputs.Image(shape=(256, 256)) # image = gr.inputs.Image() # print(image) # label = gr.outputs.Label(num_top_classes=6) gr.Interface( classify_image, # gr.inputs.Image(), gr.inputs.Image(type='pil'), outputs = 'label' # inputs='image', # outputs='label', # examples=[["images/cheetah1.jpg"], ["images/lion.jpg"]], ).launch(debug=True) #share=True