| |
| """ |
| detector.py is an out-of-the-box windowed detector |
| callable from the command line. |
| |
| By default it configures and runs the Caffe reference ImageNet model. |
| Note that this model was trained for image classification and not detection, |
| and finetuning for detection can be expected to improve results. |
| |
| The selective_search_ijcv_with_python code required for the selective search |
| proposal mode is available at |
| https://github.com/sergeyk/selective_search_ijcv_with_python |
| |
| TODO: |
| - batch up image filenames as well: don't want to load all of them into memory |
| - come up with a batching scheme that preserved order / keeps a unique ID |
| """ |
| import numpy as np |
| import pandas as pd |
| import os |
| import argparse |
| import time |
|
|
| import caffe |
|
|
| CROP_MODES = ['list', 'selective_search'] |
| COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax'] |
|
|
|
|
| def main(argv): |
| pycaffe_dir = os.path.dirname(__file__) |
|
|
| parser = argparse.ArgumentParser() |
| |
| parser.add_argument( |
| "input_file", |
| help="Input txt/csv filename. If .txt, must be list of filenames.\ |
| If .csv, must be comma-separated file with header\ |
| 'filename, xmin, ymin, xmax, ymax'" |
| ) |
| parser.add_argument( |
| "output_file", |
| help="Output h5/csv filename. Format depends on extension." |
| ) |
| |
| parser.add_argument( |
| "--model_def", |
| default=os.path.join(pycaffe_dir, |
| "../models/bvlc_reference_caffenet/deploy.prototxt"), |
| help="Model definition file." |
| ) |
| parser.add_argument( |
| "--pretrained_model", |
| default=os.path.join(pycaffe_dir, |
| "../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"), |
| help="Trained model weights file." |
| ) |
| parser.add_argument( |
| "--crop_mode", |
| default="selective_search", |
| choices=CROP_MODES, |
| help="How to generate windows for detection." |
| ) |
| parser.add_argument( |
| "--gpu", |
| action='store_true', |
| help="Switch for gpu computation." |
| ) |
| parser.add_argument( |
| "--mean_file", |
| default=os.path.join(pycaffe_dir, |
| 'caffe/imagenet/ilsvrc_2012_mean.npy'), |
| help="Data set image mean of H x W x K dimensions (numpy array). " + |
| "Set to '' for no mean subtraction." |
| ) |
| parser.add_argument( |
| "--input_scale", |
| type=float, |
| help="Multiply input features by this scale to finish preprocessing." |
| ) |
| parser.add_argument( |
| "--raw_scale", |
| type=float, |
| default=255.0, |
| help="Multiply raw input by this scale before preprocessing." |
| ) |
| parser.add_argument( |
| "--channel_swap", |
| default='2,1,0', |
| help="Order to permute input channels. The default converts " + |
| "RGB -> BGR since BGR is the Caffe default by way of OpenCV." |
|
|
| ) |
| parser.add_argument( |
| "--context_pad", |
| type=int, |
| default='16', |
| help="Amount of surrounding context to collect in input window." |
| ) |
| args = parser.parse_args() |
|
|
| mean, channel_swap = None, None |
| if args.mean_file: |
| mean = np.load(args.mean_file) |
| if mean.shape[1:] != (1, 1): |
| mean = mean.mean(1).mean(1) |
| if args.channel_swap: |
| channel_swap = [int(s) for s in args.channel_swap.split(',')] |
|
|
| if args.gpu: |
| caffe.set_mode_gpu() |
| print("GPU mode") |
| else: |
| caffe.set_mode_cpu() |
| print("CPU mode") |
|
|
| |
| detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean, |
| input_scale=args.input_scale, raw_scale=args.raw_scale, |
| channel_swap=channel_swap, |
| context_pad=args.context_pad) |
|
|
| |
| t = time.time() |
| print("Loading input...") |
| if args.input_file.lower().endswith('txt'): |
| with open(args.input_file) as f: |
| inputs = [_.strip() for _ in f.readlines()] |
| elif args.input_file.lower().endswith('csv'): |
| inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str}) |
| inputs.set_index('filename', inplace=True) |
| else: |
| raise Exception("Unknown input file type: not in txt or csv.") |
|
|
| |
| if args.crop_mode == 'list': |
| |
| images_windows = [ |
| (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values) |
| for ix in inputs.index.unique() |
| ] |
| detections = detector.detect_windows(images_windows) |
| else: |
| detections = detector.detect_selective_search(inputs) |
| print("Processed {} windows in {:.3f} s.".format(len(detections), |
| time.time() - t)) |
|
|
| |
| df = pd.DataFrame(detections) |
| df.set_index('filename', inplace=True) |
| df[COORD_COLS] = pd.DataFrame( |
| data=np.vstack(df['window']), index=df.index, columns=COORD_COLS) |
| del(df['window']) |
|
|
| |
| t = time.time() |
| if args.output_file.lower().endswith('csv'): |
| |
| |
| class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)] |
| df[class_cols] = pd.DataFrame( |
| data=np.vstack(df['feat']), index=df.index, columns=class_cols) |
| df.to_csv(args.output_file, cols=COORD_COLS + class_cols) |
| else: |
| |
| df.to_hdf(args.output_file, 'df', mode='w') |
| print("Saved to {} in {:.3f} s.".format(args.output_file, |
| time.time() - t)) |
|
|
|
|
| if __name__ == "__main__": |
| import sys |
| main(sys.argv) |
|
|