Spaces:

guru001
/

yolov3-coco-torch

Running

File size: 4,882 Bytes

1f77b91

from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
from utils import *
import argparse
import os
import os.path as osp
from darknet import Darknet
# from preprocess import prep_image, inp_to_image
import pandas as pd
import random
import pickle as pkl
import itertools
import os

if __name__ == '__main__':

    images = os.path.join('victoria.jpg')

    batch_size = int(1)
    confidence = float(0.5)
    nms_thesh = float(0.4)
    reso = 416
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80
    classes = load_classes('data/coco.names')

    #Set up the neural network

    model = Darknet("yolov3.cfg")
    model.load_weights("yolov3.weights")
    print(' [*] Model Loaded Successfuly')

    # set model resolution

    model.net_info["height"] = reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    # If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()

    # Set the model in evaluation mode
    model.eval()

    imlist = []
    imlist.append( osp.join(osp.realpath('.') , images) )

    batches = list( map( prep_image , imlist , [ inp_dim for x in range( len(imlist) ) ] ) )
    im_batches = [x[0] for x in batches]
    orig_ims = [x[1] for x in batches]
    im_dim_list = [x[2] for x in batches]

    print( 'im_dim_list : ' , im_dim_list )

    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)

    print( 'im_dim_list : after' , im_dim_list )

    if CUDA:
        im_dim_list = im_dim_list.cuda()

    print('im_batches' , len(im_batches))

    batch = im_batches[0]

    if CUDA:
        batch = batch.cuda()


    #Apply offsets to the result predictions
    #Tranform the predictions as described in the YOLO paper
    #flatten the prediction vector
    # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
    # Put every proposed box as a row.
    with torch.no_grad():
        prediction = model(Variable(batch), CUDA)

#        prediction = prediction[:,scale_indices]


    #get the boxes with object confidence > threshold
    #Convert the cordinates to absolute coordinates
    #perform NMS on these boxes, and save the results
    #I could have done NMS and saving seperately to have a better abstraction
    #But both these operations require looping, hence
    #clubbing these ops in one loop instead of two.
    #loops are slower than vectorised operations.

    prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)


    # if type(prediction) == int:
    #     continue

    end = time.time()

    # print(end - start)

    # prediction[:,0] += i*batch_size

    output = prediction

    # 1, 1, 1
    # print( 'enumerate : ' , batch_size ,  len(imlist) , min( batch_size , len(imlist) ) )

    for im_num, image in enumerate( imlist ):
        im_id = im_num
        objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
        print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
        print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
        print("----------------------------------------------------------")

    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())

    scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)

    output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
    output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2

    output[:,1:5] /= scaling_factor

    for i in range(output.shape[0]):
        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])

    colors = pkl.load(open("pallete", "rb"))

    def write(x, batches, results):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = results[int(x[0])]

        print( 'img' , int( x[0] ) )
        print( 'cls' , int( x[-1] ) )

        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        color = random.choice(colors)
        cv2.rectangle(img, c1, c2,color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2,color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
        return img


    list(map(lambda x: write(x, im_batches, orig_ims), output))

    det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format('det',x.split("/")[-1]))

    print('det_names ',det_names)
    print('orig_ims ',orig_ims[0].shape)
    print('output : ',output)

    list(map(cv2.imwrite, det_names, orig_ims))