# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== r"""Constructs model, inputs, and training environment.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import functools import os import tensorflow as tf from object_detection import eval_util from object_detection import exporter as exporter_lib from object_detection import inputs from object_detection.builders import graph_rewriter_builder from object_detection.builders import model_builder from object_detection.builders import optimizer_builder from object_detection.core import standard_fields as fields from object_detection.utils import config_util from object_detection.utils import label_map_util from object_detection.utils import shape_utils from object_detection.utils import variables_helper from object_detection.utils import visualization_utils as vis_utils # A map of names to methods that help build the model. MODEL_BUILD_UTIL_MAP = { 'get_configs_from_pipeline_file': config_util.get_configs_from_pipeline_file, 'create_pipeline_proto_from_configs': config_util.create_pipeline_proto_from_configs, 'merge_external_params_with_configs': config_util.merge_external_params_with_configs, 'create_train_input_fn': inputs.create_train_input_fn, 'create_eval_input_fn': inputs.create_eval_input_fn, 'create_predict_input_fn': inputs.create_predict_input_fn, 'detection_model_fn_base': model_builder.build, } def _prepare_groundtruth_for_eval(detection_model, class_agnostic, max_number_of_boxes): """Extracts groundtruth data from detection_model and prepares it for eval. Args: detection_model: A `DetectionModel` object. class_agnostic: Whether the detections are class_agnostic. max_number_of_boxes: Max number of groundtruth boxes. Returns: A tuple of: groundtruth: Dictionary with the following fields: 'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in normalized coordinates. 'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed classes. 'groundtruth_masks': 4D float32 tensor of instance masks (if provided in groundtruth) 'groundtruth_is_crowd': [batch_size, num_boxes] bool tensor indicating is_crowd annotations (if provided in groundtruth). 'num_groundtruth_boxes': [batch_size] tensor containing the maximum number of groundtruth boxes per image.. class_agnostic: Boolean indicating whether detections are class agnostic. """ input_data_fields = fields.InputDataFields() groundtruth_boxes = tf.stack( detection_model.groundtruth_lists(fields.BoxListFields.boxes)) groundtruth_boxes_shape = tf.shape(groundtruth_boxes) # For class-agnostic models, groundtruth one-hot encodings collapse to all # ones. if class_agnostic: groundtruth_classes_one_hot = tf.ones( [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1]) else: groundtruth_classes_one_hot = tf.stack( detection_model.groundtruth_lists(fields.BoxListFields.classes)) label_id_offset = 1 # Applying label id offset (b/63711816) groundtruth_classes = ( tf.argmax(groundtruth_classes_one_hot, axis=2) + label_id_offset) groundtruth = { input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes } if detection_model.groundtruth_has_field(fields.BoxListFields.masks): groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack( detection_model.groundtruth_lists(fields.BoxListFields.masks)) if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd): groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack( detection_model.groundtruth_lists(fields.BoxListFields.is_crowd)) groundtruth[input_data_fields.num_groundtruth_boxes] = ( tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]])) return groundtruth def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): """Unstacks all tensors in `tensor_dict` along 0th dimension. Unstacks tensor from the tensor dict along 0th dimension and returns a tensor_dict containing values that are lists of unstacked, unpadded tensors. Tensors in the `tensor_dict` are expected to be of one of the three shapes: 1. [batch_size] 2. [batch_size, height, width, channels] 3. [batch_size, num_boxes, d1, d2, ... dn] When unpad_groundtruth_tensors is set to true, unstacked tensors of form 3 above are sliced along the `num_boxes` dimension using the value in tensor field.InputDataFields.num_groundtruth_boxes. Note that this function has a static list of input data fields and has to be kept in sync with the InputDataFields defined in core/standard_fields.py Args: tensor_dict: A dictionary of batched groundtruth tensors. unpad_groundtruth_tensors: Whether to remove padding along `num_boxes` dimension of the groundtruth tensors. Returns: A dictionary where the keys are from fields.InputDataFields and values are a list of unstacked (optionally unpadded) tensors. Raises: ValueError: If unpad_tensors is True and `tensor_dict` does not contain `num_groundtruth_boxes` tensor. """ unbatched_tensor_dict = { key: tf.unstack(tensor) for key, tensor in tensor_dict.items() } if unpad_groundtruth_tensors: if (fields.InputDataFields.num_groundtruth_boxes not in unbatched_tensor_dict): raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. ' 'Keys available: {}'.format( unbatched_tensor_dict.keys())) unbatched_unpadded_tensor_dict = {} unpad_keys = set([ # List of input data fields that are padded along the num_boxes # dimension. This list has to be kept in sync with InputDataFields in # standard_fields.py. fields.InputDataFields.groundtruth_instance_masks, fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_keypoints, fields.InputDataFields.groundtruth_group_of, fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_weights ]).intersection(set(unbatched_tensor_dict.keys())) for key in unpad_keys: unpadded_tensor_list = [] for num_gt, padded_tensor in zip( unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], unbatched_tensor_dict[key]): tensor_shape = shape_utils.combined_static_and_dynamic_shape( padded_tensor) slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32) slice_size = tf.stack( [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]]) unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size) unpadded_tensor_list.append(unpadded_tensor) unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict) return unbatched_tensor_dict def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, postprocess_on_cpu=False): """Creates a model function for `Estimator`. Args: detection_model_fn: Function that returns a `DetectionModel` instance. configs: Dictionary of pipeline config objects. hparams: `HParams` object. use_tpu: Boolean indicating whether model should be constructed for use on TPU. postprocess_on_cpu: When use_tpu and postprocess_on_cpu is true, postprocess is scheduled on the host cpu. Returns: `model_fn` for `Estimator`. """ train_config = configs['train_config'] eval_input_config = configs['eval_input_config'] eval_config = configs['eval_config'] def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN # Make sure to set the Keras learning phase. True during training, # False for inference. tf.keras.backend.set_learning_phase(is_training) detection_model = detection_model_fn( is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = ( labels[fields.InputDataFields.groundtruth_boxes].get_shape() .as_list()) unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] gt_weights_list = None if fields.InputDataFields.groundtruth_weights in labels: gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] gt_confidences_list = None if fields.InputDataFields.groundtruth_confidences in labels: gt_confidences_list = labels[ fields.InputDataFields.groundtruth_confidences] gt_is_crowd_list = None if fields.InputDataFields.groundtruth_is_crowd in labels: gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_confidences_list=gt_confidences_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=gt_weights_list, groundtruth_is_crowd_list=gt_is_crowd_list) preprocessed_images = features[fields.InputDataFields.image] if use_tpu and train_config.use_bfloat16: with tf.contrib.tpu.bfloat16_scope(): prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) for k, v in prediction_dict.items(): if v.dtype == tf.bfloat16: prediction_dict[k] = tf.cast(v, tf.float32) else: prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) def postprocess_wrapper(args): return detection_model.postprocess(args[0], args[1]) if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if use_tpu and postprocess_on_cpu: detections = tf.contrib.tpu.outside_compilation( postprocess_wrapper, (prediction_dict, features[fields.InputDataFields.true_image_shape])) else: detections = postprocess_wrapper(( prediction_dict, features[fields.InputDataFields.true_image_shape])) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.values()] if train_config.add_regularization_loss: regularization_losses = detection_model.regularization_losses() if regularization_losses: regularization_loss = tf.add_n( regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict['Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None include_variables = ( train_config.update_trainable_variables if train_config.update_trainable_variables else None) exclude_variables = ( train_config.freeze_variables if train_config.freeze_variables else None) trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None if train_config.summarize_gradients: summaries = ['gradients', 'gradient_norm', 'global_gradient_norm'] train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, update_ops=detection_model.updates(), variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: exported_output = exporter_lib.add_output_tensor_nodes(detections) export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(exported_output) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = ( fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic, eval_input_config.max_number_of_boxes) use_original_images = fields.InputDataFields.original_image in features if use_original_images: eval_images = features[fields.InputDataFields.original_image] true_image_shapes = tf.slice( features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) original_image_spatial_shapes = features[fields.InputDataFields .original_image_spatial_shape] else: eval_images = features[fields.InputDataFields.image] true_image_shapes = None original_image_spatial_shapes = None eval_dict = eval_util.result_dict_for_batched_example( eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) vis_metric_ops = None if not use_tpu and use_original_images: eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections( category_index, max_examples_to_draw=eval_config.num_visualizations, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False) vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops( eval_dict) # Eval metrics on a single example. eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_config, list(category_index.values()), eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if vis_metric_ops is not None: eval_metric_ops.update(vis_metric_ops) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) scaffold = tf.train.Scaffold(saver=saver) # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. if use_tpu and mode != tf.estimator.ModeKeys.EVAL: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: if scaffold is None: keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( sharded=True, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, save_relative_paths=True) tf.add_to_collection(tf.GraphKeys.SAVERS, saver) scaffold = tf.train.Scaffold(saver=saver) return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold) return model_fn def create_estimator_and_inputs(run_config, hparams, pipeline_config_path, config_override=None, train_steps=None, sample_1_of_n_eval_examples=1, sample_1_of_n_eval_on_train_examples=1, model_fn_creator=create_model_fn, use_tpu_estimator=False, use_tpu=False, num_shards=1, params=None, override_eval_num_epochs=True, save_final_config=False, postprocess_on_cpu=False, export_to_tpu=None, **kwargs): """Creates `Estimator`, input functions, and steps. Args: run_config: A `RunConfig`. hparams: A `HParams`. pipeline_config_path: A path to a pipeline config file. config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to override the config from `pipeline_config_path`. train_steps: Number of training steps. If None, the number of training steps is set from the `TrainConfig` proto. sample_1_of_n_eval_examples: Integer representing how often an eval example should be sampled. If 1, will sample all examples. sample_1_of_n_eval_on_train_examples: Similar to `sample_1_of_n_eval_examples`, except controls the sampling of training data for evaluation. model_fn_creator: A function that creates a `model_fn` for `Estimator`. Follows the signature: * Args: * `detection_model_fn`: Function that returns `DetectionModel` instance. * `configs`: Dictionary of pipeline config objects. * `hparams`: `HParams` object. * Returns: `model_fn` for `Estimator`. use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False, an `Estimator` will be returned. use_tpu: Boolean, whether training and evaluation should run on TPU. Only used if `use_tpu_estimator` is True. num_shards: Number of shards (TPU cores). Only used if `use_tpu_estimator` is True. params: Parameter dictionary passed from the estimator. Only used if `use_tpu_estimator` is True. override_eval_num_epochs: Whether to overwrite the number of epochs to 1 for eval_input. save_final_config: Whether to save final config (obtained after applying overrides) to `estimator.model_dir`. postprocess_on_cpu: When use_tpu and postprocess_on_cpu are true, postprocess is scheduled on the host cpu. export_to_tpu: When use_tpu and export_to_tpu are true, `export_savedmodel()` exports a metagraph for serving on TPU besides the one on CPU. **kwargs: Additional keyword arguments for configuration override. Returns: A dictionary with the following fields: 'estimator': An `Estimator` or `TPUEstimator`. 'train_input_fn': A training input function. 'eval_input_fns': A list of all evaluation input functions. 'eval_input_names': A list of names for each evaluation input. 'eval_on_train_input_fn': An evaluation-on-train input function. 'predict_input_fn': A prediction input function. 'train_steps': Number of training steps. Either directly from input or from configuration. """ get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ 'get_configs_from_pipeline_file'] merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ 'merge_external_params_with_configs'] create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[ 'create_pipeline_proto_from_configs'] create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn'] create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn'] create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn'] detection_model_fn_base = MODEL_BUILD_UTIL_MAP['detection_model_fn_base'] configs = get_configs_from_pipeline_file( pipeline_config_path, config_override=config_override) kwargs.update({ 'train_steps': train_steps, 'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples, 'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu }) if override_eval_num_epochs: kwargs.update({'eval_num_epochs': 1}) tf.logging.warning( 'Forced number of epochs for all eval validations to be 1.') configs = merge_external_params_with_configs( configs, hparams, kwargs_dict=kwargs) model_config = configs['model'] train_config = configs['train_config'] train_input_config = configs['train_input_config'] eval_config = configs['eval_config'] eval_input_configs = configs['eval_input_configs'] eval_on_train_input_config = copy.deepcopy(train_input_config) eval_on_train_input_config.sample_1_of_n_examples = ( sample_1_of_n_eval_on_train_examples) if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1: tf.logging.warning('Expected number of evaluation epochs is 1, but ' 'instead encountered `eval_on_train_input_config' '.num_epochs` = ' '{}. Overwriting `num_epochs` to 1.'.format( eval_on_train_input_config.num_epochs)) eval_on_train_input_config.num_epochs = 1 # update train_steps from config but only when non-zero value is provided if train_steps is None and train_config.num_steps != 0: train_steps = train_config.num_steps detection_model_fn = functools.partial( detection_model_fn_base, model_config=model_config) # Create the input functions for TRAIN/EVAL/PREDICT. train_input_fn = create_train_input_fn( train_config=train_config, train_input_config=train_input_config, model_config=model_config) eval_input_fns = [ create_eval_input_fn( eval_config=eval_config, eval_input_config=eval_input_config, model_config=model_config) for eval_input_config in eval_input_configs ] eval_input_names = [ eval_input_config.name for eval_input_config in eval_input_configs ] eval_on_train_input_fn = create_eval_input_fn( eval_config=eval_config, eval_input_config=eval_on_train_input_config, model_config=model_config) predict_input_fn = create_predict_input_fn( model_config=model_config, predict_input_config=eval_input_configs[0]) # Read export_to_tpu from hparams if not passed. if export_to_tpu is None: export_to_tpu = hparams.get('export_to_tpu', False) tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s', use_tpu, export_to_tpu) model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu, postprocess_on_cpu) if use_tpu_estimator: estimator = tf.contrib.tpu.TPUEstimator( model_fn=model_fn, train_batch_size=train_config.batch_size, # For each core, only batch size 1 is supported for eval. eval_batch_size=num_shards * 1 if use_tpu else 1, use_tpu=use_tpu, config=run_config, export_to_tpu=export_to_tpu, eval_on_tpu=False, # Eval runs on CPU, so disable eval on TPU params=params if params else {}) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) # Write the as-run pipeline config to disk. if run_config.is_chief and save_final_config: pipeline_config_final = create_pipeline_proto_from_configs(configs) config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir) return dict( estimator=estimator, train_input_fn=train_input_fn, eval_input_fns=eval_input_fns, eval_input_names=eval_input_names, eval_on_train_input_fn=eval_on_train_input_fn, predict_input_fn=predict_input_fn, train_steps=train_steps) def create_train_and_eval_specs(train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False, final_exporter_name='Servo', eval_spec_names=None): """Creates a `TrainSpec` and `EvalSpec`s. Args: train_input_fn: Function that produces features and labels on train data. eval_input_fns: A list of functions that produce features and labels on eval data. eval_on_train_input_fn: Function that produces features and labels for evaluation on train data. predict_input_fn: Function that produces features for inference. train_steps: Number of training steps. eval_on_train_data: Whether to evaluate model on training data. Default is False. final_exporter_name: String name given to `FinalExporter`. eval_spec_names: A list of string names for each `EvalSpec`. Returns: Tuple of `TrainSpec` and list of `EvalSpecs`. If `eval_on_train_data` is True, the last `EvalSpec` in the list will correspond to training data. The rest EvalSpecs in the list are evaluation datas. """ train_spec = tf.estimator.TrainSpec( input_fn=train_input_fn, max_steps=train_steps) if eval_spec_names is None: eval_spec_names = [str(i) for i in range(len(eval_input_fns))] eval_specs = [] for index, (eval_spec_name, eval_input_fn) in enumerate( zip(eval_spec_names, eval_input_fns)): # Uses final_exporter_name as exporter_name for the first eval spec for # backward compatibility. if index == 0: exporter_name = final_exporter_name else: exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name) exporter = tf.estimator.FinalExporter( name=exporter_name, serving_input_receiver_fn=predict_input_fn) eval_specs.append( tf.estimator.EvalSpec( name=eval_spec_name, input_fn=eval_input_fn, steps=None, exporters=exporter)) if eval_on_train_data: eval_specs.append( tf.estimator.EvalSpec( name='eval_on_train', input_fn=eval_on_train_input_fn, steps=None)) return train_spec, eval_specs def continuous_eval(estimator, model_dir, input_fn, train_steps, name): """Perform continuous evaluation on checkpoints written to a model directory. Args: estimator: Estimator object to use for evaluation. model_dir: Model directory to read checkpoints for continuous evaluation. input_fn: Input function to use for evaluation. train_steps: Number of training steps. This is used to infer the last checkpoint and stop evaluation loop. name: Namescope for eval summary. """ def terminate_eval(): tf.logging.info('Terminating eval after 180 seconds of no checkpoints') return True for ckpt in tf.contrib.training.checkpoints_iterator( model_dir, min_interval_secs=180, timeout=None, timeout_fn=terminate_eval): tf.logging.info('Starting Evaluation.') try: eval_results = estimator.evaluate( input_fn=input_fn, steps=None, checkpoint_path=ckpt, name=name) tf.logging.info('Eval results: %s' % eval_results) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) if current_step >= train_steps: tf.logging.info( 'Evaluation finished after training step %d' % current_step) break except tf.errors.NotFoundError: tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint' % ckpt) def populate_experiment(run_config, hparams, pipeline_config_path, train_steps=None, eval_steps=None, model_fn_creator=create_model_fn, **kwargs): """Populates an `Experiment` object. EXPERIMENT CLASS IS DEPRECATED. Please switch to tf.estimator.train_and_evaluate. As an example, see model_main.py. Args: run_config: A `RunConfig`. hparams: A `HParams`. pipeline_config_path: A path to a pipeline config file. train_steps: Number of training steps. If None, the number of training steps is set from the `TrainConfig` proto. eval_steps: Number of evaluation steps per evaluation cycle. If None, the number of evaluation steps is set from the `EvalConfig` proto. model_fn_creator: A function that creates a `model_fn` for `Estimator`. Follows the signature: * Args: * `detection_model_fn`: Function that returns `DetectionModel` instance. * `configs`: Dictionary of pipeline config objects. * `hparams`: `HParams` object. * Returns: `model_fn` for `Estimator`. **kwargs: Additional keyword arguments for configuration override. Returns: An `Experiment` that defines all aspects of training, evaluation, and export. """ tf.logging.warning('Experiment is being deprecated. Please use ' 'tf.estimator.train_and_evaluate(). See model_main.py for ' 'an example.') train_and_eval_dict = create_estimator_and_inputs( run_config, hparams, pipeline_config_path, train_steps=train_steps, eval_steps=eval_steps, model_fn_creator=model_fn_creator, save_final_config=True, **kwargs) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] export_strategies = [ tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy( serving_input_fn=predict_input_fn) ] return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fns[0], train_steps=train_steps, eval_steps=None, export_strategies=export_strategies, eval_delay_secs=120, )