Duplicate from camenduru/openpose

b7f741c 21 days ago

5.73 kB

	#!/usr/bin/env python
	"""
	detector.py is an out-of-the-box windowed detector
	callable from the command line.

	By default it configures and runs the Caffe reference ImageNet model.
	Note that this model was trained for image classification and not detection,
	and finetuning for detection can be expected to improve results.

	The selective_search_ijcv_with_python code required for the selective search
	proposal mode is available at
	https://github.com/sergeyk/selective_search_ijcv_with_python

	TODO:
	- batch up image filenames as well: don't want to load all of them into memory
	- come up with a batching scheme that preserved order / keeps a unique ID
	"""
	import numpy as np
	import pandas as pd
	import os
	import argparse
	import time

	import caffe

	CROP_MODES = ['list', 'selective_search']
	COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax']


	def main(argv):
	pycaffe_dir = os.path.dirname(__file__)

	parser = argparse.ArgumentParser()
	# Required arguments: input and output.
	parser.add_argument(
	"input_file",
	help="Input txt/csv filename. If .txt, must be list of filenames.\
	If .csv, must be comma-separated file with header\
	'filename, xmin, ymin, xmax, ymax'"
	)
	parser.add_argument(
	"output_file",
	help="Output h5/csv filename. Format depends on extension."
	)
	# Optional arguments.
	parser.add_argument(
	"--model_def",
	default=os.path.join(pycaffe_dir,
	"../models/bvlc_reference_caffenet/deploy.prototxt"),
	help="Model definition file."
	)
	parser.add_argument(
	"--pretrained_model",
	default=os.path.join(pycaffe_dir,
	"../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"),
	help="Trained model weights file."
	)
	parser.add_argument(
	"--crop_mode",
	default="selective_search",
	choices=CROP_MODES,
	help="How to generate windows for detection."
	)
	parser.add_argument(
	"--gpu",
	action='store_true',
	help="Switch for gpu computation."
	)
	parser.add_argument(
	"--mean_file",
	default=os.path.join(pycaffe_dir,
	'caffe/imagenet/ilsvrc_2012_mean.npy'),
	help="Data set image mean of H x W x K dimensions (numpy array). " +
	"Set to '' for no mean subtraction."
	)
	parser.add_argument(
	"--input_scale",
	type=float,
	help="Multiply input features by this scale to finish preprocessing."
	)
	parser.add_argument(
	"--raw_scale",
	type=float,
	default=255.0,
	help="Multiply raw input by this scale before preprocessing."
	)
	parser.add_argument(
	"--channel_swap",
	default='2,1,0',
	help="Order to permute input channels. The default converts " +
	"RGB -> BGR since BGR is the Caffe default by way of OpenCV."

	)
	parser.add_argument(
	"--context_pad",
	type=int,
	default='16',
	help="Amount of surrounding context to collect in input window."
	)
	args = parser.parse_args()

	mean, channel_swap = None, None
	if args.mean_file:
	mean = np.load(args.mean_file)
	if mean.shape[1:] != (1, 1):
	mean = mean.mean(1).mean(1)
	if args.channel_swap:
	channel_swap = [int(s) for s in args.channel_swap.split(',')]

	if args.gpu:
	caffe.set_mode_gpu()
	print("GPU mode")
	else:
	caffe.set_mode_cpu()
	print("CPU mode")

	# Make detector.
	detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean,
	input_scale=args.input_scale, raw_scale=args.raw_scale,
	channel_swap=channel_swap,
	context_pad=args.context_pad)

	# Load input.
	t = time.time()
	print("Loading input...")
	if args.input_file.lower().endswith('txt'):
	with open(args.input_file) as f:
	inputs = [_.strip() for _ in f.readlines()]
	elif args.input_file.lower().endswith('csv'):
	inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str})
	inputs.set_index('filename', inplace=True)
	else:
	raise Exception("Unknown input file type: not in txt or csv.")

	# Detect.
	if args.crop_mode == 'list':
	# Unpack sequence of (image filename, windows).
	images_windows = [
	(ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values)
	for ix in inputs.index.unique()
	]
	detections = detector.detect_windows(images_windows)
	else:
	detections = detector.detect_selective_search(inputs)
	print("Processed {} windows in {:.3f} s.".format(len(detections),
	time.time() - t))

	# Collect into dataframe with labeled fields.
	df = pd.DataFrame(detections)
	df.set_index('filename', inplace=True)
	df[COORD_COLS] = pd.DataFrame(
	data=np.vstack(df['window']), index=df.index, columns=COORD_COLS)
	del(df['window'])

	# Save results.
	t = time.time()
	if args.output_file.lower().endswith('csv'):
	# csv
	# Enumerate the class probabilities.
	class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)]
	df[class_cols] = pd.DataFrame(
	data=np.vstack(df['feat']), index=df.index, columns=class_cols)
	df.to_csv(args.output_file, cols=COORD_COLS + class_cols)
	else:
	# h5
	df.to_hdf(args.output_file, 'df', mode='w')
	print("Saved to {} in {:.3f} s.".format(args.output_file,
	time.time() - t))


	if __name__ == "__main__":
	import sys
	main(sys.argv)