// Copyright (C) 2015 Davis E. King (davis@dlib.net) | |
// License: Boost Software License See LICENSE.txt for the full license. | |
namespace dlib | |
{ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename... T | |
> | |
auto tuple_tail( | |
const std::tuple<T...>& item | |
); | |
/*! | |
ensures | |
- returns a tuple that contains everything in item except for tuple_head(item). | |
The items will be in the same order as they are in item, just without | |
tuple_head(item). | |
- This function will correctly handle nested tuples. | |
!*/ | |
template <typename... T> | |
auto tuple_head ( | |
const std::tuple<T...>& item | |
); | |
/*! | |
ensures | |
- returns a copy of the first thing in the tuple that isn't a std::tuple. | |
Essentially, this function calls std::get<0>() recursively on item until | |
a non-std::tuple object is found. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template <typename T> | |
double get_learning_rate_multiplier( | |
const T& obj | |
); | |
/*! | |
ensures | |
- if (obj has a get_learning_rate_multiplier() member function) then | |
- returns obj.get_learning_rate_multiplier() | |
- else | |
- returns 1 | |
!*/ | |
template <typename T> | |
void set_learning_rate_multiplier( | |
T& obj, | |
double learning_rate_multiplier | |
); | |
/*! | |
requires | |
- learning_rate_multiplier >= 0 | |
ensures | |
- if (obj has a set_learning_rate_multiplier() member function) then | |
- calls obj.set_learning_rate_multiplier(learning_rate_multiplier) | |
- else | |
- does nothing | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template <typename T> | |
double get_bias_learning_rate_multiplier( | |
const T& obj | |
); | |
/*! | |
ensures | |
- if (obj has a get_bias_learning_rate_multiplier() member function) then | |
- returns obj.get_bias_learning_rate_multiplier() | |
- else | |
- returns 1 | |
!*/ | |
template <typename T> | |
void set_bias_learning_rate_multiplier( | |
T& obj, | |
double bias_learning_rate_multiplier | |
); | |
/*! | |
requires | |
- bias_learning_rate_multiplier >= 0 | |
ensures | |
- if (obj has a set_bias_learning_rate_multiplier() member function) then | |
- calls obj.set_bias_learning_rate_multiplier(bias_learning_rate_multiplier) | |
- else | |
- does nothing | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template <typename T> | |
double get_weight_decay_multiplier( | |
const T& obj | |
); | |
/*! | |
ensures | |
- if (obj has a get_weight_decay_multiplier() member function) then | |
- returns obj.get_weight_decay_multiplier() | |
- else | |
- returns 1 | |
!*/ | |
template <typename T> | |
void set_weight_decay_multiplier( | |
T& obj, | |
double weight_decay_multiplier | |
); | |
/*! | |
requires | |
- weight_decay_multiplier >= 0 | |
ensures | |
- if (obj has a set_weight_decay_multiplier() member function) then | |
- calls obj.set_weight_decay_multiplier(weight_decay_multiplier) | |
- else | |
- does nothing | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template <typename T> | |
double get_bias_weight_decay_multiplier( | |
const T& obj | |
); | |
/*! | |
ensures | |
- if (obj has a get_bias_weight_decay_multiplier() member function) then | |
- returns obj.get_bias_weight_decay_multiplier() | |
- else | |
- returns 1 | |
!*/ | |
template <typename T> | |
void set_bias_weight_decay_multiplier( | |
T& obj, | |
double bias_weight_decay_multiplier | |
); | |
/*! | |
requires: | |
- bias_weight_decay_multiplier >= 0 | |
ensures | |
- if (obj has a set_bias_weight_decay_multiplier() member function) then | |
- calls obj.set_bias_weight_decay_multiplier(bias_weight_decay_multiplier) | |
- else | |
- does nothing | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template <typename T> | |
void disable_bias( | |
T& obj | |
); | |
/*! | |
ensures | |
- if (obj has a disable_bias() member function) then | |
- calls obj.disable_bias() | |
- else | |
- does nothing | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
bool dnn_prefer_fastest_algorithms( | |
); | |
/*! | |
ensures | |
- If dlib should prefer to use fast algorithms rather than ones that use less | |
RAM then this function returns true and false otherwise. | |
- On program startup this function will default to true. | |
!*/ | |
void set_dnn_prefer_fastest_algorithms( | |
); | |
/*! | |
ensures | |
- #dnn_prefer_fastest_algorithms() == true | |
!*/ | |
void set_dnn_prefer_smallest_algorithms( | |
); | |
/*! | |
ensures | |
- #dnn_prefer_fastest_algorithms() == false | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename T | |
> | |
class sstack | |
{ | |
/*! | |
WHAT THIS OBJECT REPRESENTS | |
This is a basic stack of T objects. It contains no data itself but simply | |
points to a memory range of T object and allows you to access that block of | |
T objects as a stack. | |
!*/ | |
public: | |
typedef T value_type; | |
sstack() = delete; | |
sstack ( | |
T* data, | |
size_t s | |
); | |
/*! | |
ensures | |
- #size() == s | |
- #top() == *data | |
- #pop(i).top() == data[i] | |
!*/ | |
const T& top( | |
) const; | |
/*! | |
requires | |
- size() != 0 | |
ensures | |
- returns the top element of the stack. | |
!*/ | |
T& top( | |
); | |
/*! | |
requires | |
- size() != 0 | |
ensures | |
- returns the top element of the stack. | |
!*/ | |
size_t size( | |
) const; | |
/*! | |
ensures | |
- returns the number of elements in this stack. | |
!*/ | |
sstack pop( | |
size_t num = 1 | |
); | |
/*! | |
requires | |
- num <= size() | |
ensures | |
- returns a reference to the sub-stack S such that: | |
- S.size() == size()-num. | |
- S.top() is num elements down the stack. | |
!*/ | |
}; | |
template < | |
typename T | |
> | |
sstack<T> make_sstack( | |
std::vector<T>& item | |
) { return sstack<T>(item.data(), item.size()); } | |
/*! | |
ensures | |
- returns a sstack that sits on top of the given std::vector. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename LAYER_DETAILS, | |
typename SUBNET | |
> | |
class add_layer | |
{ | |
/*! | |
REQUIREMENTS ON LAYER_DETAILS | |
- Must be a type that implements the EXAMPLE_COMPUTATIONAL_LAYER_ interface | |
defined in layers_abstract.h | |
REQUIREMENTS ON SUBNET | |
- One of the following must be true: | |
- SUBNET implements the EXAMPLE_INPUT_LAYER interface defined in | |
input_abstract.h. | |
- SUBNET is an add_layer object. | |
- SUBNET is an add_tag_layer object. | |
- SUBNET is an add_skip_layer object. | |
- SUBNET is a repeat object. | |
WHAT THIS OBJECT REPRESENTS | |
This object represents a deep neural network. In particular, it is a tool | |
for adding another layer on top of the neural network of type SUBNET, which | |
is specified as a template argument. The specific layer added is defined | |
by the LAYER_DETAILS details template argument. | |
!*/ | |
public: | |
typedef LAYER_DETAILS layer_details_type; | |
typedef SUBNET subnet_type; | |
typedef typename subnet_type::input_type input_type; | |
// num_computational_layers will always give the number of layers in the network | |
// that transform tensors (i.e. layers defined by something that implements the | |
// EXAMPLE_COMPUTATIONAL_LAYER_ interface). This is all the layers except for | |
// loss, tag, and skip layers. | |
const static size_t num_computational_layers = subnet_type::num_computational_layers + 1; | |
// num_layers counts all the layers in the network regardless of their type. | |
const static size_t num_layers = subnet_type::num_layers + 1; | |
add_layer( | |
); | |
/*! | |
ensures | |
- default constructs all the layers in this network. | |
- #sample_expansion_factor() == 0 | |
!*/ | |
add_layer(const add_layer&) = default; | |
add_layer(add_layer&&) = default; | |
add_layer& operator=(add_layer&&) = default; | |
add_layer& operator=(const add_layer&) = default; | |
/*! | |
ensures | |
- this object is copyable and movable. | |
!*/ | |
template <typename T, typename U> | |
add_layer( | |
const add_layer<T,U>& item | |
); | |
/*! | |
ensures | |
- This constructor allows you to copy neural network objects from one to | |
another as long as their corresponding layers can be constructed from | |
each other. | |
- #layer_details() == layer_details_type(item.layer_details()) | |
- #subnet() == subnet_type(item.subnet()) | |
- #sample_expansion_factor() == item.sample_expansion_factor() | |
!*/ | |
template <typename ...T, typename LD, typename ...U> | |
add_layer( | |
const std::tuple<LD,U...>& layer_det, | |
T&& ...args | |
); | |
/*! | |
ensures | |
- #layer_details() == layer_details_type(tuple_head(layer_det)) | |
- #subnet() == subnet_type(tuple_tail(layer_det),args) | |
- #sample_expansion_factor() == 0 | |
!*/ | |
template <typename ...T> | |
add_layer( | |
const layer_details_type& layer_det, | |
T&& ...args | |
); | |
/*! | |
ensures | |
- #layer_details() == layer_details_type(layer_det) | |
- #subnet() == subnet_type(args) | |
- #sample_expansion_factor() == 0 | |
!*/ | |
template <typename ...T> | |
add_layer( | |
T&& ...args | |
); | |
/*! | |
ensures | |
- This version of the constructor is only called if layer_details_type | |
can't be constructed from the first thing in args. In this case, the | |
args are simply passed on to the sub layers in their entirety. | |
- #layer_details() == layer_details_type() | |
- #subnet() == subnet_type(args) | |
- #sample_expansion_factor() == 0 | |
!*/ | |
template <typename ...T> | |
add_layer( | |
layer_details_type&& layer_det, | |
T&& ...args | |
); | |
/*! | |
ensures | |
- #layer_details() == layer_det | |
- #subnet() == subnet_type(args) | |
- #sample_expansion_factor() == 0 | |
!*/ | |
template <typename forward_iterator> | |
void to_tensor ( | |
forward_iterator ibegin, | |
forward_iterator iend, | |
resizable_tensor& data | |
) const; | |
/*! | |
requires | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
ensures | |
- Converts the iterator range into a tensor and stores it into #data. | |
- #data.num_samples()%distance(ibegin,iend) == 0. | |
- #sample_expansion_factor() == #data.num_samples()/distance(ibegin,iend). | |
- #sample_expansion_factor() > 0 | |
- The data in the ith sample of #data corresponds to the input_type object | |
*(ibegin+i/#sample_expansion_factor()). | |
- Invokes data.async_copy_to_device() so that the data begins transferring | |
to the GPU device, if present. | |
- This function is implemented by calling the to_tensor() routine defined | |
at the input layer of this network. | |
!*/ | |
unsigned int sample_expansion_factor ( | |
) const; | |
/*! | |
ensures | |
- When to_tensor() is invoked on this network's input layer it converts N | |
input objects into M samples, all stored inside a resizable_tensor. It | |
is always the case that M is some integer multiple of N. | |
sample_expansion_factor() returns the value of this multiplier. To be | |
very specific, it is always true that M==I*N where I is some integer. | |
This integer I is what is returned by sample_expansion_factor(). | |
!*/ | |
const subnet_type& subnet( | |
) const; | |
/*! | |
ensures | |
- returns the immediate subnetwork of *this network. | |
!*/ | |
subnet_type& subnet( | |
); | |
/*! | |
ensures | |
- returns the immediate subnetwork of *this network. | |
!*/ | |
const layer_details_type& layer_details( | |
) const; | |
/*! | |
ensures | |
- returns the layer_details_type instance that defines the behavior of the | |
layer at the top of this network. I.e. returns the layer details that | |
defines the behavior of the layer nearest to the network output rather | |
than the input layer. | |
!*/ | |
layer_details_type& layer_details( | |
); | |
/*! | |
ensures | |
- returns the layer_details_type instance that defines the behavior of the | |
layer at the top of this network. I.e. returns the layer details that | |
defines the behavior of the layer nearest to the network output rather | |
than the input layer. | |
!*/ | |
template <typename forward_iterator> | |
const tensor& operator() ( | |
forward_iterator ibegin, | |
forward_iterator iend | |
); | |
/*! | |
requires | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
ensures | |
- runs [ibegin,iend) through the network and returns the results. | |
In particular, this function performs: | |
to_tensor(ibegin,iend,temp_tensor); | |
return forward(temp_tensor); | |
- The return value from this function is also available in #get_output(). | |
i.e. this function returns #get_output(). | |
- have_same_dimensions(#get_gradient_input(), #get_output()) == true. | |
- All elements of #get_gradient_input() are set to 0. | |
i.e. calling this function clears out #get_gradient_input() and ensures | |
it has the same dimensions as the most recent output. | |
!*/ | |
const tensor& operator() ( | |
const input_type& x | |
); | |
/*! | |
ensures | |
- runs a single x through the network and returns the output. | |
I.e. returns (*this)(&x, &x+1); | |
!*/ | |
const tensor& forward( | |
const tensor& x | |
); | |
/*! | |
requires | |
- sample_expansion_factor() != 0 | |
(i.e. to_tensor() must have been called to set sample_expansion_factor() | |
to something non-zero.) | |
- x.num_samples()%sample_expansion_factor() == 0 | |
- x.num_samples() > 0 | |
ensures | |
- Runs x through the network and returns the results. In particular, this | |
function performs the equivalent of: | |
subnet().forward(x); | |
if (this is the first time forward() has been called) then | |
layer_details().setup(subnet()); | |
layer_details().forward(subnet(), get_output()); | |
- The return value from this function is also available in #get_output(). | |
i.e. this function returns #get_output(). | |
- have_same_dimensions(#get_gradient_input(), #get_output()) == true | |
- All elements of #get_gradient_input() are set to 0. | |
i.e. calling this function clears out #get_gradient_input() and ensures | |
it has the same dimensions as the most recent output. | |
!*/ | |
const tensor& get_output( | |
) const; | |
/*! | |
ensures | |
- returns the output for the last tensor that was run through the network. | |
If nothing has been run through the network yet then returns an empty | |
tensor. | |
!*/ | |
tensor& get_gradient_input( | |
); | |
/*! | |
ensures | |
- returns the error gradient for this network. That is, this is the error | |
gradient that this network will use to compute parameter gradients when | |
back_propagate_error() is called. Therefore, when performing back | |
propagation, layers that sit on top of this network layer write their | |
back-propagated error gradients into get_gradient_input(). Or to put it | |
another way, during back-propagation, layers take the contents of their | |
get_gradient_input() and back-propagate it through themselves and store | |
the result into their subnetwork's get_gradient_input(). | |
This means you should consider get_gradient_input() as an input to the | |
back_propagate_error() method. | |
!*/ | |
const tensor& get_final_data_gradient( | |
) const; | |
/*! | |
ensures | |
- if back_propagate_error() has been called to back-propagate a gradient | |
through this network then you can call get_final_data_gradient() to | |
obtain the last data gradient computed. That is, this function returns | |
the gradient of the network with respect to its inputs. | |
- Note that there is only one "final data gradient" for an entire network, | |
not one per layer, since there is only one input to the entire network. | |
!*/ | |
const tensor& get_parameter_gradient( | |
) const; | |
/*! | |
ensures | |
- if back_propagate_error() has been called then you can call | |
get_parameter_gradient() to find the gradient of this layer's parameters. | |
When we update the parameters by calling update_parameters(), it will use | |
the gradient in get_parameter_gradient() to perform the update. | |
Therefore, you should consider get_parameter_gradient() as an input to | |
update_parameters(). | |
!*/ | |
tensor& get_parameter_gradient ( | |
); | |
/*! | |
ensures | |
- returns a non-const reference to the tensor returned by the above | |
get_parameter_gradient() method. You could use this method to modify the | |
parameter gradient in some way before invoking update_parameters(). | |
!*/ | |
void back_propagate_error( | |
const tensor& x | |
); | |
/*! | |
requires | |
- forward(x) was called to forward propagate x though the network. | |
Moreover, this was the most recent call to forward() and x has not been | |
subsequently modified in any way. | |
- get_gradient_input() has been set equal to the gradient of this network's | |
output with respect to some loss function. | |
ensures | |
- Back propagates the error gradient, get_gradient_input(), through this | |
network and computes parameter and data gradients, via backpropagation. | |
Specifically, this function populates get_final_data_gradient() and also, | |
for each layer, the tensor returned by get_parameter_gradient(). | |
- All elements of #get_gradient_input() are set to 0. | |
- have_same_dimensions(#get_final_data_gradient(), x) == true. | |
- have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true. | |
- #get_final_data_gradient() contains the gradient of the network with | |
respect to x. | |
!*/ | |
void back_propagate_error( | |
const tensor& x, | |
const tensor& gradient_input | |
); | |
/*! | |
requires | |
- forward(x) was called to forward propagate x though the network. | |
Moreover, this was the most recent call to forward() and x has not been | |
subsequently modified in any way. | |
- have_same_dimensions(gradient_input, get_output()) == true | |
ensures | |
- This function is identical to the version of back_propagate_error() | |
defined immediately above except that it back-propagates gradient_input | |
through the network instead of get_gradient_input(). Therefore, this | |
version of back_propagate_error() is equivalent to performing: | |
get_gradient_input() = gradient_input; | |
back_propagate_error(x); | |
Except that calling back_propagate_error(x,gradient_input) avoids the | |
copy and is therefore slightly more efficient. | |
- All elements of #get_gradient_input() are set to 0. | |
- have_same_dimensions(#get_final_data_gradient(), x) == true. | |
- have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true. | |
- #get_final_data_gradient() contains the gradient of the network with | |
respect to x. | |
!*/ | |
template <typename solver_type> | |
void update_parameters( | |
sstack<solver_type> solvers, | |
double learning_rate | |
); | |
/*! | |
requires | |
- solver_type is an implementation of the EXAMPLE_SOLVER interface defined | |
in solvers_abstract.h | |
- back_propagate_error() has been called. | |
- The given solvers have only ever been used with this network. That is, | |
if you want to call update_parameters() on some other neural network | |
object then you must NOT reuse the same solvers object. | |
- solvers.size() >= num_computational_layers | |
- 0 < learning_rate <= 1 | |
ensures | |
- Updates all the parameters in the network. In particular, we pass each | |
layer's parameter gradient (i.e. the tensor returned by the layer's | |
get_parameter_gradient() member) through that layer's corresponding | |
solver object. This produces a parameter delta vector which we add to | |
the layer's parameters. | |
- The solvers use the given learning rate. | |
!*/ | |
template <typename solver_type> | |
void update_parameters(std::vector<solver_type>& solvers, double learning_rate) | |
{ update_parameters(make_sstack(solvers), learning_rate); } | |
/*! | |
Convenience method for calling update_parameters() | |
!*/ | |
void clean( | |
); | |
/*! | |
ensures | |
- Causes the network to forget about everything but its parameters. | |
That is, for each layer we will have: | |
- get_output().num_samples() == 0 | |
- get_gradient_input().num_samples() == 0 | |
However, running new input data though this network will still produce | |
the same output it would have produced regardless of any calls to | |
clean(). The purpose of clean() is to compact the network object prior | |
to saving it to disk so that it takes up less space and the IO is | |
quicker. | |
- This also calls the .clean() method on any layer details objects that | |
define a .clean() method. | |
!*/ | |
}; | |
template <typename T, typename U> | |
std::ostream& operator<<(std::ostream& out, const add_layer<T,U>& item); | |
/*! | |
prints the network architecture to the given output stream. | |
!*/ | |
template <typename T, typename U> | |
void serialize(const add_layer<T,U>& item, std::ostream& out); | |
template <typename T, typename U> | |
void deserialize(add_layer<T,U>& item, std::istream& in); | |
/*! | |
provides serialization support | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
// ---------------------------------------------------------------------------------------- | |
// ---------------------------------------------------------------------------------------- | |
class no_label_type; | |
template < | |
typename LOSS_DETAILS, | |
typename SUBNET | |
> | |
class add_loss_layer | |
{ | |
/*! | |
REQUIREMENTS ON LOSS_DETAILS | |
- Must be a type that implements the EXAMPLE_LOSS_LAYER_ interface defined | |
in loss_abstract.h | |
REQUIREMENTS ON SUBNET | |
- One of the following must be true: | |
- SUBNET is an add_layer object. | |
- SUBNET is an add_tag_layer object. | |
- SUBNET is an add_skip_layer object. | |
- SUBNET is a repeat object. | |
WHAT THIS OBJECT REPRESENTS | |
This object represents a deep neural network. In particular, it is a tool | |
for adding a loss layer on top of the neural network of type SUBNET, which | |
is specified as a template argument. The specific layer added is defined | |
by the LOSS_DETAILS details template argument. Importantly, a loss layer | |
is the last layer in a deep neural network. So once it is added you can't | |
add any other layers of any type. | |
!*/ | |
public: | |
typedef LOSS_DETAILS loss_details_type; | |
typedef SUBNET subnet_type; | |
typedef typename subnet_type::input_type input_type; | |
const static size_t num_computational_layers = subnet_type::num_computational_layers; | |
const static size_t num_layers = subnet_type::num_layers + 1; | |
// If LOSS_DETAILS is an unsupervised loss then training_label_type==no_label_type. | |
// Otherwise it is defined as follows: | |
typedef typename LOSS_DETAILS::training_label_type training_label_type; | |
// Similarly, if LOSS_DETAILS doesn't provide any output conversion then | |
// output_label_type==no_label_type. | |
typedef typename LOSS_DETAILS::output_label_type output_label_type; | |
add_loss_layer() = default; | |
/*! | |
ensures | |
- default constructs all the layers in this network. | |
!*/ | |
add_loss_layer(const add_loss_layer&) = default; | |
add_loss_layer(add_loss_layer&&) = default; | |
add_loss_layer& operator=(add_loss_layer&&) = default; | |
add_loss_layer& operator=(const add_loss_layer&) = default; | |
/*! | |
ensures | |
- this object is copyable and movable. | |
!*/ | |
template <typename T, typename U> | |
add_loss_layer( | |
const add_loss_layer<T,U>& item | |
); | |
/*! | |
ensures | |
- This constructor allows you to copy neural network objects from one to | |
another as long as their corresponding layers can be constructed from | |
each other. | |
- #loss_details() == loss_details_type(item.loss_details()) | |
- #subnet() == subnet_type(item.subnet()) | |
!*/ | |
template <typename ...T> | |
add_loss_layer( | |
const LOSS_DETAILS& layer_det, | |
T&& ...args | |
); | |
/*! | |
ensures | |
- #loss_details() == loss_details_type(layer_det) | |
- #subnet() == subnet_type(args) | |
!*/ | |
template <typename ...T> | |
add_loss_layer( | |
LOSS_DETAILS&& layer_det, | |
T&& ...args | |
); | |
/*! | |
ensures | |
- #loss_details() == loss_details_type(layer_det) | |
- #subnet() == subnet_type(args) | |
!*/ | |
template <typename ...T> | |
add_loss_layer( | |
T&& ...args | |
); | |
/*! | |
ensures | |
- This version of the constructor is only called if loss_details_type can't | |
be constructed from the first thing in args. In this case, the args are | |
simply passed on to the sub layers in their entirety. | |
- #loss_details() == loss_details_type() | |
- #subnet() == subnet_type(args) | |
!*/ | |
const subnet_type& subnet( | |
) const; | |
/*! | |
ensures | |
- returns the immediate subnetwork of *this network. | |
!*/ | |
subnet_type& subnet( | |
); | |
/*! | |
ensures | |
- returns the immediate subnetwork of *this network. | |
!*/ | |
const loss_details_type& loss_details( | |
) const; | |
/*! | |
ensures | |
- returns the loss_details_type instance that defines the behavior of the | |
loss layer used by this network. | |
!*/ | |
loss_details_type& loss_details( | |
); | |
/*! | |
ensures | |
- returns the loss_details_type instance that defines the behavior of the | |
loss layer used by this network. | |
!*/ | |
template <typename forward_iterator> | |
void to_tensor ( | |
forward_iterator ibegin, | |
forward_iterator iend, | |
resizable_tensor& data | |
) const; | |
/*! | |
requires | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
ensures | |
- Converts the iterator range into a tensor and stores it into #data. | |
- #data.num_samples()%distance(ibegin,iend) == 0. | |
- #sample_expansion_factor() == #data.num_samples()/distance(ibegin,iend). | |
- #sample_expansion_factor() > 0 | |
- The data in the ith sample of #data corresponds to the input_type object | |
*(ibegin+i/sample_expansion_factor()). | |
- Invokes data.async_copy_to_device() so that the data begins transferring | |
to the GPU device, if present. | |
- This function is implemented by calling the to_tensor() routine defined | |
at the input layer of this network. | |
!*/ | |
unsigned int sample_expansion_factor ( | |
) const; | |
/*! | |
ensures | |
- When to_tensor() is invoked on this network's input layer it converts N | |
input objects into M samples, all stored inside a resizable_tensor. It | |
is always the case that M is some integer multiple of N. | |
sample_expansion_factor() returns the value of this multiplier. To be | |
very specific, it is always true that M==I*N where I is some integer. | |
This integer I is what is returned by sample_expansion_factor(). | |
!*/ | |
// ------------- | |
const tensor& forward(const tensor& x | |
); | |
/*! | |
requires | |
- sample_expansion_factor() != 0 | |
(i.e. to_tensor() must have been called to set sample_expansion_factor() | |
to something non-zero.) | |
- x.num_samples()%sample_expansion_factor() == 0 | |
- x.num_samples() > 0 | |
ensures | |
- Runs x through the network and returns the results as a tensor. In particular, | |
this function just performs: | |
return subnet().forward(x); | |
So if you want to get the outputs as an output_label_type then call one of the | |
methods below instead, like operator(). | |
- The return value from this function is also available in #subnet().get_output(). | |
i.e. this function returns #subnet().get_output(). | |
- have_same_dimensions(#subnet().get_gradient_input(), #subnet().get_output()) == true | |
- All elements of #subnet().get_gradient_input() are set to 0. | |
i.e. calling this function clears out #subnet().get_gradient_input() and ensures | |
it has the same dimensions as the most recent output. | |
!*/ | |
template <typename output_iterator> | |
void operator() ( | |
const tensor& x, | |
output_iterator obegin | |
); | |
/*! | |
requires | |
- sample_expansion_factor() != 0 | |
(i.e. to_tensor() must have been called to set sample_expansion_factor() | |
to something non-zero.) | |
- x.num_samples()%sample_expansion_factor() == 0 | |
- x.num_samples() > 0 | |
- obegin == iterator pointing to the start of a range of | |
x.num_samples()/sample_expansion_factor() output_label_type elements. | |
ensures | |
- runs x through the network and writes the output to the range at obegin. | |
- loss_details().to_label() is used to write the network output into | |
obegin. | |
!*/ | |
template <typename forward_iterator, typename label_iterator> | |
void operator() ( | |
forward_iterator ibegin, | |
forward_iterator iend, | |
label_iterator obegin | |
); | |
/*! | |
requires | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
- obegin == iterator pointing to the start of a range of | |
std::distance(ibegin,iend) output_label_type elements. | |
ensures | |
- runs [ibegin,iend) through the network and writes the output to the range | |
at obegin. | |
- loss_details().to_label() is used to write the network output into | |
obegin. | |
!*/ | |
// ------------- | |
const output_label_type& operator() ( | |
const input_type& x | |
); | |
/*! | |
ensures | |
- runs a single object, x, through the network and returns the output. | |
- loss_details().to_label() is used to convert the network output into a | |
output_label_type. | |
!*/ | |
template <typename iterable_type> | |
std::vector<output_label_type> operator() ( | |
const iterable_type& data, | |
size_t batch_size = 128 | |
); | |
/*! | |
requires | |
- batch_size > 0 | |
- data must have a .begin() and .end() that supply iterators over a | |
sequence of input_type elements. E.g. data could have a type of | |
std::vector<input_type> | |
ensures | |
- runs all the objects in data through the network and returns their | |
predicted labels. This means this function returns a vector V such that: | |
- V.size() == data.size() | |
- for all valid i: V[i] == the predicted label of data[i]. | |
- Elements of data are run through the network in batches of batch_size | |
items. Using a batch_size > 1 can be faster because it better exploits | |
the available hardware parallelism. | |
- loss_details().to_label() is used to convert the network output into a | |
output_label_type. | |
!*/ | |
template <typename ...T> | |
const output_label_type& process ( | |
const input_type& x, | |
T&& ...args | |
); | |
/*! | |
ensures | |
- This function is just like (*this)(x), i.e. it runs a single object, x, | |
through the network and returns the output. But we additionally pass the | |
given args to loss_details().to_label() as the 4th argument (or more, | |
depending on how many things are in args) when converting the network | |
output to an output_label_type. This is useful, for instance, with loss | |
layers like loss_mmod_ which has an optional adjust_threshold argument to | |
to_label() that adjusts the detection threshold. Therefore, for such | |
networks you could call them like: net.process(some_image, -0.5), and -0.5 | |
would be passed so the adjust_threshold argument of to_tensor(). | |
!*/ | |
template <typename iterable_type, typename ...T> | |
std::vector<output_label_type> process_batch ( | |
const iterable_type& data, | |
size_t batch_size, | |
T&& ...args | |
); | |
/*! | |
requires | |
- batch_size > 0 | |
- data must have a .begin() and .end() that supply iterators over a | |
sequence of input_type elements. E.g. data could have a type of | |
std::vector<input_type> | |
ensures | |
- This function is just like (*this)(data,batch_size), i.e. it runs a | |
bunch of objects through the network and returns the outputs. But we | |
additionally pass the given args to loss_details().to_label() as the 4th | |
argument (or more, depending on how many things are in args) when | |
converting the network output to output_label_types. This is useful, | |
for instance, with loss layers like loss_mmod_ which has an optional | |
adjust_threshold argument to to_label() that adjusts the detection | |
threshold. Therefore, for such networks you could call them like: | |
net.process_batch(std::vector<image_type>({some_image, another_image}), 128, -0.5), | |
and -0.5 would be passed so the adjust_threshold argument of to_tensor(). | |
!*/ | |
// ------------- | |
template <typename label_iterator> | |
double compute_loss ( | |
const tensor& x, | |
label_iterator lbegin | |
); | |
/*! | |
requires | |
- sample_expansion_factor() != 0 | |
(i.e. to_tensor() must have been called to set sample_expansion_factor() | |
to something non-zero.) | |
- x.num_samples()%sample_expansion_factor() == 0 | |
- x.num_samples() > 0 | |
- lbegin == iterator pointing to the start of a range of | |
x.num_samples()/sample_expansion_factor() training_label_type elements. | |
ensures | |
- runs x through the network, compares the output to the expected output | |
pointed to by lbegin, and returns the resulting loss. | |
- for all valid k: | |
- the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor()). | |
- This function does not update the network parameters. | |
- For sub-layers that are immediate inputs into the loss layer, we also populate the | |
sub-layer's get_gradient_input() tensor with the gradient of the loss with respect | |
to the sub-layer's output. | |
!*/ | |
template <typename forward_iterator, typename label_iterator> | |
double compute_loss ( | |
forward_iterator ibegin, | |
forward_iterator iend, | |
label_iterator lbegin | |
); | |
/*! | |
requires | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
- lbegin == iterator pointing to the start of a range of | |
std::distance(ibegin,iend) training_label_type elements. | |
ensures | |
- runs [ibegin,iend) through the network, compares the output to the | |
expected output pointed to by lbegin, and returns the resulting loss. | |
- for all valid k: | |
- the expected label of *(ibegin+k) is *(lbegin+k). | |
- This function does not update the network parameters. | |
- For sub-layers that are immediate inputs into the loss layer, we also populate the | |
sub-layer's get_gradient_input() tensor with the gradient of the loss with respect | |
to the sub-layer's output. | |
!*/ | |
// ------------- | |
double compute_loss ( | |
const tensor& x | |
); | |
/*! | |
requires | |
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. | |
- sample_expansion_factor() != 0 | |
(i.e. to_tensor() must have been called to set sample_expansion_factor() | |
to something non-zero.) | |
- x.num_samples()%sample_expansion_factor() == 0 | |
- x.num_samples() > 0 | |
ensures | |
- runs x through the network and returns the resulting loss. | |
- This function does not update the network parameters. | |
- For sub-layers that are immediate inputs into the loss layer, we also populate the | |
sub-layer's get_gradient_input() tensor with the gradient of the loss with respect | |
to the sub-layer's output. | |
!*/ | |
template <typename forward_iterator> | |
double compute_loss ( | |
forward_iterator ibegin, | |
forward_iterator iend, | |
); | |
/*! | |
requires | |
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
ensures | |
- runs [ibegin,iend) through the network and returns the resulting loss. | |
- This function does not update the network parameters. | |
- For sub-layers that are immediate inputs into the loss layer, we also populate the | |
sub-layer's get_gradient_input() tensor with the gradient of the loss with respect | |
to the sub-layer's output. | |
!*/ | |
// ------------- | |
template <typename label_iterator> | |
double compute_parameter_gradients ( | |
const tensor& x, | |
label_iterator lbegin | |
); | |
/*! | |
requires | |
- sample_expansion_factor() != 0 | |
(i.e. to_tensor() must have been called to set sample_expansion_factor() | |
to something non-zero.) | |
- x.num_samples()%sample_expansion_factor() == 0 | |
- x.num_samples() > 0 | |
- lbegin == iterator pointing to the start of a range of | |
x.num_samples()/sample_expansion_factor() training_label_type elements. | |
ensures | |
- runs x through the network, compares the output to the expected output | |
pointed to by lbegin, and computes parameter and data gradients with | |
respect to the loss, via backpropagation. Specifically, this function | |
updates get_final_data_gradient() and also, for each layer, the tensor | |
returned by get_parameter_gradient(). | |
- for all valid k: | |
- the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor()). | |
- returns compute_loss(x,lbegin) | |
!*/ | |
template <typename forward_iterator, typename label_iterator> | |
double compute_parameter_gradients ( | |
forward_iterator ibegin, | |
forward_iterator iend, | |
label_iterator lbegin | |
); | |
/*! | |
requires | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
- lbegin == iterator pointing to the start of a range of | |
std::distance(ibegin,iend) training_label_type elements. | |
ensures | |
- runs [ibegin,iend) through the network, compares the output to the | |
expected output pointed to by lbegin, and computes parameter and data | |
gradients with respect to the loss, via backpropagation. Specifically, | |
this function updates get_final_data_gradient() and also, for each layer, | |
the tensor returned by get_parameter_gradient(). | |
- for all valid k: | |
- the expected label of *(ibegin+k) is *(lbegin+k). | |
- returns compute_loss(ibegin,iend,lbegin) | |
!*/ | |
double compute_parameter_gradients ( | |
const tensor& x | |
); | |
/*! | |
requires | |
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. | |
- sample_expansion_factor() != 0 | |
(i.e. to_tensor() must have been called to set sample_expansion_factor() | |
to something non-zero.) | |
- x.num_samples()%sample_expansion_factor() == 0 | |
- x.num_samples() > 0 | |
ensures | |
- runs x through the network and computes parameter and data gradients with | |
respect to the loss, via backpropagation. Specifically, this function | |
updates get_final_data_gradient() and also, for each layer, the tensor | |
returned by get_parameter_gradient(). | |
- returns compute_loss(x) | |
!*/ | |
template <typename forward_iterator> | |
double compute_parameter_gradients ( | |
forward_iterator ibegin, | |
forward_iterator iend | |
); | |
/*! | |
requires | |
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. | |
- [ibegin, iend) is an iterator range over input_type objects. | |
- std::distance(ibegin,iend) > 0 | |
ensures | |
- runs [ibegin,iend) through the network and computes parameter and data | |
gradients with respect to the loss, via backpropagation. Specifically, | |
this function updates get_final_data_gradient() and also, for each layer, | |
the tensor returned by get_parameter_gradient(). | |
- returns compute_loss(ibegin,iend) | |
!*/ | |
template <typename solver_type> | |
void update_parameters ( | |
sstack<solver_type> solvers, | |
double learning_rate | |
); | |
/*! | |
requires | |
- solver_type is an implementation of the EXAMPLE_SOLVER interface defined | |
in solvers_abstract.h | |
- compute_parameter_gradients() has been called. | |
- The given solvers have only ever been used with this network. That | |
is, if you want to call update_parameters() on some other neural network | |
object then you must NOT reuse the same solvers object. | |
- solvers.size() >= num_computational_layers | |
- 0 < learning_rate <= 1 | |
ensures | |
- Updates all the parameters in the network. In particular, we pass each | |
layer's parameter gradient (i.e. the tensor returned by the layer's | |
get_parameter_gradient() member) through that layer's corresponding | |
solver object. This produces a parameter delta vector which we add to | |
the layer's parameters. | |
- The solvers use the given learning rate. | |
!*/ | |
template <typename solver_type> | |
void update_parameters(std::vector<solver_type>& solvers, double learning_rate | |
) { update_parameters(make_sstack(solvers), learning_rate); } | |
/*! | |
Convenience method for calling update_parameters() | |
!*/ | |
void back_propagate_error( | |
const tensor& x | |
); | |
/*! | |
requires | |
- forward(x) was called to forward propagate x though the network. | |
Moreover, this was the most recent call to forward() and x has not been | |
subsequently modified in any way. | |
- subnet().get_gradient_input() has been set equal to the gradient of this network's | |
output with respect to the loss function (generally this will be done by calling | |
compute_loss()). | |
ensures | |
- Back propagates the error gradient, subnet().get_gradient_input(), through this | |
network and computes parameter and data gradients, via backpropagation. | |
Specifically, this function populates get_final_data_gradient() and also, | |
for each layer, the tensor returned by get_parameter_gradient(). | |
- All elements of #subnet().get_gradient_input() are set to 0. | |
- have_same_dimensions(#get_final_data_gradient(), x) == true. | |
- #get_final_data_gradient() contains the gradient of the network with | |
respect to x. | |
!*/ | |
void back_propagate_error( | |
const tensor& x, | |
const tensor& gradient_input | |
); | |
/*! | |
requires | |
- forward(x) was called to forward propagate x though the network. | |
Moreover, this was the most recent call to forward() and x has not been | |
subsequently modified in any way. | |
- have_same_dimensions(gradient_input, subnet().get_output()) == true | |
ensures | |
- This function is identical to the version of back_propagate_error() | |
defined immediately above except that it back-propagates gradient_input | |
through the network instead of subnet().get_gradient_input(). Therefore, this | |
version of back_propagate_error() is equivalent to performing: | |
subnet().get_gradient_input() = gradient_input; | |
back_propagate_error(x); | |
Except that calling back_propagate_error(x,gradient_input) avoids the | |
copy and is therefore slightly more efficient. | |
- All elements of #subnet.get_gradient_input() are set to 0. | |
- have_same_dimensions(#get_final_data_gradient(), x) == true. | |
- #get_final_data_gradient() contains the gradient of the network with | |
respect to x. | |
!*/ | |
const tensor& get_final_data_gradient( | |
) const; | |
/*! | |
ensures | |
- if back_propagate_error() has been called to back-propagate a gradient | |
through this network then you can call get_final_data_gradient() to | |
obtain the last data gradient computed. That is, this function returns | |
the gradient of the network with respect to its inputs. | |
- Note that there is only one "final data gradient" for an entire network, | |
not one per layer, since there is only one input to the entire network. | |
!*/ | |
// ------------- | |
void clean ( | |
); | |
/*! | |
ensures | |
- Causes the network to forget about everything but its parameters. | |
- invokes subnet().clean() | |
!*/ | |
}; | |
template <typename T, typename U> | |
std::ostream& operator<<(std::ostream& out, const add_loss_layer<T,U>& item); | |
/*! | |
prints the network architecture to the given output stream. | |
!*/ | |
template <typename T, typename U> | |
void serialize(const add_loss_layer<T,U>& item, std::ostream& out); | |
template <typename T, typename U> | |
void deserialize(add_loss_layer<T,U>& item, std::istream& in); | |
/*! | |
provides serialization support | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
// ---------------------------------------------------------------------------------------- | |
// ---------------------------------------------------------------------------------------- | |
template <typename ...T> | |
decorator_repeat_group<T...> repeat_group ( | |
T&& ...args | |
); | |
/*! | |
ensures | |
- Decorates a group of variables. This is essentially like std::make_tuple() | |
except it's only purpose is to group variables together so they can be passed | |
to the repeat object's constructor. | |
!*/ | |
template < | |
size_t num, | |
template<typename> class REPEATED_LAYER, | |
typename SUBNET | |
> | |
class repeat | |
{ | |
/*! | |
REQUIREMENTS ON num | |
- num > 0 | |
REQUIREMENTS ON REPEATED_LAYER | |
- REPEATED_LAYER must be a template that stacks more layers onto a deep neural | |
network. For example, if net_type were a network without a loss layer, | |
then it should be legal to create a deeper network with a type of | |
REPEATED_LAYER<net_type>. | |
REQUIREMENTS ON SUBNET | |
- One of the following must be true: | |
- SUBNET is an add_layer object. | |
- SUBNET is an add_tag_layer object. | |
- SUBNET is an add_skip_layer object. | |
- SUBNET is a repeat object. | |
WHAT THIS OBJECT REPRESENTS | |
This object adds more layers to a deep neural network. In particular, it | |
adds REPEATED_LAYER on top of SUBNET num times. So for example, if num were 2 then | |
repeat<2,REPEATED_LAYER,SUBNET> would create a network equivalent to REPEATED_LAYER<REPEATED_LAYER<SUBNET>>. | |
Also, this object provides an interface identical to the one defined by the | |
add_layer object except that we add the num_repetitions() and | |
get_repeated_layer() methods. These additions are shown below along with | |
some additional explanatory comments. | |
!*/ | |
public: | |
typedef SUBNET subnet_type; | |
typedef typename SUBNET::input_type input_type; | |
const static size_t num_computational_layers = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers)*num + SUBNET::num_computational_layers; | |
const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers; | |
typedef REPEATED_LAYER<an_unspecified_input_type> repeated_layer_type; | |
template <typename T, typename ...U> | |
repeat( | |
T arg1, | |
U ...args2 | |
); | |
/*! | |
ensures | |
- arg1 is used to initialize the num_repetitions() copies of REPEATED_LAYER inside | |
this object. That is, all the REPEATED_LAYER elements are initialized identically | |
by being given copies of arg1. | |
- The rest of the arguments to the constructor, i.e. args2, are passed to | |
SUBNET's constructor. | |
!*/ | |
template <typename ...T, typename ...U> | |
repeat( | |
decorator_repeat_group<T...>&& arg1, | |
U ...args2 | |
); | |
/*! | |
ensures | |
- arg1 is used to initialize the num_repetitions() copies of REPEATED_LAYER inside | |
this object. That is, all the REPEATED_LAYER elements are initialized identically | |
by being given copies of an undecorated arg1. | |
- The rest of the arguments to the constructor, i.e. args2, are passed to | |
SUBNET's constructor. | |
!*/ | |
size_t num_repetitions ( | |
) const; | |
/*! | |
ensures | |
- returns num (i.e. the number of times REPEATED_LAYER was stacked on top of SUBNET) | |
!*/ | |
const repeated_layer_type& get_repeated_layer ( | |
size_t i | |
) const; | |
/*! | |
requires | |
- i < num_repetitions() | |
ensures | |
- returns a reference to the i-th instance of REPEATED_LAYER. For example, | |
get_repeated_layer(0) returns the instance of REPEATED_LAYER that is on the top of | |
the network while get_repeated_layer(num_repetitions()-1) returns the | |
instance of REPEATED_LAYER that is stacked immediately on top of SUBNET. | |
!*/ | |
repeated_layer_type& get_repeated_layer ( | |
size_t i | |
); | |
/*! | |
requires | |
- i < num_repetitions() | |
ensures | |
- returns a reference to the i-th instance of REPEATED_LAYER. For example, | |
get_repeated_layer(0) returns the instance of REPEATED_LAYER that is on the top of | |
the network while get_repeated_layer(num_repetitions()-1) returns the | |
instance of REPEATED_LAYER that is stacked immediately on top of SUBNET. | |
!*/ | |
const subnet_type& subnet( | |
) const; | |
/*! | |
ensures | |
- returns the SUBNET base network that repeat sits on top of. If you want | |
to access the REPEATED_LAYER components then you must use get_repeated_layer(). | |
!*/ | |
subnet_type& subnet( | |
); | |
/*! | |
ensures | |
- returns the SUBNET base network that repeat sits on top of. If you want | |
to access the REPEATED_LAYER components then you must use get_repeated_layer(). | |
!*/ | |
}; | |
template < size_t num, template<typename> class T, typename U > | |
std::ostream& operator<<(std::ostream& out, const repeat<num,T,U>& item); | |
/*! | |
prints the network architecture to the given output stream. | |
!*/ | |
template < size_t num, template<typename> class T, typename U > | |
void serialize(const repeat<num,T,U>& item, std::ostream& out); | |
template < size_t num, template<typename> class T, typename U > | |
void deserialize(repeat<num,T,U>& item, std::istream& in); | |
/*! | |
provides serialization support | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
unsigned long ID, | |
typename SUBNET | |
> | |
class add_tag_layer | |
{ | |
/*! | |
REQUIREMENTS ON SUBNET | |
- One of the following must be true: | |
- SUBNET implements the EXAMPLE_INPUT_LAYER interface defined in | |
input_abstract.h. | |
- SUBNET is an add_layer object. | |
- SUBNET is an add_tag_layer object. | |
- SUBNET is an add_skip_layer object. | |
- SUBNET is a repeat object. | |
WHAT THIS OBJECT REPRESENTS | |
This object adds a new layer to a deep neural network. However, this layer | |
simply performs the identity transform. This means it is a no-op and its | |
presence does not change the behavior of the network. It exists solely to | |
be used by add_skip_layer to reference a particular part of a network. | |
Also, this object provides an interface identical to the one defined by the | |
add_layer object. | |
!*/ | |
}; | |
template <unsigned long ID, typename U> | |
std::ostream& operator<<(std::ostream& out, const add_tag_layer<ID,U>& item); | |
/*! | |
prints the network architecture to the given output stream. | |
!*/ | |
template <unsigned long ID, typename U> | |
void serialize(const add_tag_layer<ID,U>& item, std::ostream& out); | |
template <unsigned long ID, typename U> | |
void deserialize(add_tag_layer<ID,U>& item, std::istream& in); | |
/*! | |
provides serialization support | |
!*/ | |
template <typename SUBNET> using tag1 = add_tag_layer< 1, SUBNET>; | |
template <typename SUBNET> using tag2 = add_tag_layer< 2, SUBNET>; | |
template <typename SUBNET> using tag3 = add_tag_layer< 3, SUBNET>; | |
template <typename SUBNET> using tag4 = add_tag_layer< 4, SUBNET>; | |
template <typename SUBNET> using tag5 = add_tag_layer< 5, SUBNET>; | |
template <typename SUBNET> using tag6 = add_tag_layer< 6, SUBNET>; | |
template <typename SUBNET> using tag7 = add_tag_layer< 7, SUBNET>; | |
template <typename SUBNET> using tag8 = add_tag_layer< 8, SUBNET>; | |
template <typename SUBNET> using tag9 = add_tag_layer< 9, SUBNET>; | |
template <typename SUBNET> using tag10 = add_tag_layer<10, SUBNET>; | |
template <template<typename SUBNET> class tag> | |
struct tag_id | |
{ | |
/*! | |
REQUIREMENTS ON tag | |
Tag should be an add_tag_layer template such as tag1, tag2, etc. | |
WHAT THIS OBJECT REPRESENTS | |
This is a tool for finding the numeric ID of a tag layer. For example, | |
tag_id<tag3>::id == 3. | |
!*/ | |
const static unsigned long id; | |
}; | |
// ---------------------------------------------------------------------------------------- | |
template < | |
template<typename> class TAG_TYPE, | |
typename SUBNET | |
> | |
class add_skip_layer | |
{ | |
/*! | |
REQUIREMENTS ON SUBNET | |
- One of the following must be true: | |
- SUBNET is an add_layer object. | |
- SUBNET is an add_tag_layer object. | |
- SUBNET is an add_skip_layer object. | |
- SUBNET is a repeat object. | |
WHAT THIS OBJECT REPRESENTS | |
This object adds a new layer to a deep neural network which draws its | |
inputs from layer<TAG_TYPE>(subnet()) and performs the identity transform. | |
Also, this object provides an interface identical to the one defined by the | |
add_layer object. | |
!*/ | |
}; | |
template <template<typename> class T, typename U> | |
std::ostream& operator<<(std::ostream& out, const add_skip_layer<T,U>& item); | |
/*! | |
prints the network architecture to the given output stream. | |
!*/ | |
template <template<typename> class T, typename U> | |
void serialize(const add_skip_layer<T,U>& item, std::ostream& out); | |
template <template<typename> class T, typename U> | |
void deserialize(add_skip_layer<T,U>& item, std::istream& in); | |
/*! | |
provides serialization support | |
!*/ | |
template <typename SUBNET> using skip1 = add_skip_layer< tag1, SUBNET>; | |
template <typename SUBNET> using skip2 = add_skip_layer< tag2, SUBNET>; | |
template <typename SUBNET> using skip3 = add_skip_layer< tag3, SUBNET>; | |
template <typename SUBNET> using skip4 = add_skip_layer< tag4, SUBNET>; | |
template <typename SUBNET> using skip5 = add_skip_layer< tag5, SUBNET>; | |
template <typename SUBNET> using skip6 = add_skip_layer< tag6, SUBNET>; | |
template <typename SUBNET> using skip7 = add_skip_layer< tag7, SUBNET>; | |
template <typename SUBNET> using skip8 = add_skip_layer< tag8, SUBNET>; | |
template <typename SUBNET> using skip9 = add_skip_layer< tag9, SUBNET>; | |
template <typename SUBNET> using skip10 = add_skip_layer<tag10, SUBNET>; | |
// ---------------------------------------------------------------------------------------- | |
template < | |
unsigned int i, | |
typename net_type | |
> | |
auto& layer ( | |
net_type& n | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- i < net_type::num_layers | |
ensures | |
- This function allows you to access any layer in a network by its layer index | |
i. Therefore, it will walk i steps down the network and return the layer | |
object there. Since networks can be big, the best way to find layer index | |
numbers is to print a network to the screen since the print out will include | |
indexes for each layer. | |
- In general, this function chains together i calls to n.subnet() and returns | |
the result. So for example: | |
- if (i == 0) | |
- returns n | |
- else if (i == 1) | |
- returns n.subnet() | |
- else if (i == 2) | |
- returns n.subnet().subnet() | |
- else if (i == 3) | |
- returns n.subnet().subnet().subnet() | |
- else | |
- etc. | |
Except that when it hits a repeat layer it recurses into the repeated layers | |
contained inside. That is, if the layer index indicates a layer in a repeat | |
object this function will make the appropriate call to get_repeated_layer() | |
and do the right thing. | |
!*/ | |
template < | |
template<typename> class Match, | |
typename net_type | |
> | |
auto& layer ( | |
net_type& n | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
ensures | |
- returns the first layer in n that is of type Match. E.g. if net_type is | |
fc<relu<fc<input<sample_type>>>> then calling layer<relu>(n) would return | |
layer<1>(n), that is, a reference to the relu layer. | |
!*/ | |
template < | |
template<typename> class Match, | |
unsigned int i, | |
typename net_type | |
> | |
auto& layer ( | |
net_type& n | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
ensures | |
- returns layer<i>(layer<Match>(n)) | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template <typename net_type> | |
auto& input_layer ( | |
net_type& net | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
ensures | |
- returns the input later of the given network object. Specifically, this | |
function is equivalent to calling: | |
layer<net_type::num_layers-1>(net); | |
That is, you get the input layer details object for the network. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename net_type, | |
typename visitor | |
> | |
void visit_layer_parameters( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, tensor& t) | |
or: | |
v(tensor& t) | |
ensures | |
- Loops over all the computational layers (i.e. layers with parameters, as | |
opposed to loss, tag, or input layers) in net and passes their parameters to | |
v(). To be specific, this function essentially performs the following: | |
size_t computational_layer_idx = 0; | |
for (size_t i = 0; i < net_type::num_layers; ++i) | |
{ | |
if (layer<i>(net) is a computational layer) | |
{ | |
v(computational_layer_idx, layer<i>(net).layer_details().get_layer_params()); | |
++computational_layer_idx; | |
} | |
} | |
- When v() is called, the first argument is always < net_type::num_computational_layers. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename net_type, | |
typename visitor | |
> | |
void visit_layer_parameter_gradients( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, tensor& t) | |
or: | |
v(tensor& t) | |
ensures | |
- Loops over all the computational layers (i.e. layers with parameters, as | |
opposed to loss, tag, or input layers) in net and passes their parameter | |
gradients to v(). To be specific, this function essentially performs the | |
following: | |
size_t computational_layer_idx = 0; | |
for (size_t i = 0; i < net_type::num_layers; ++i) | |
{ | |
if (layer<i>(net) is a computational layer) | |
{ | |
v(computational_layer_idx, layer<i>(net).get_parameter_gradient()); | |
++computational_layer_idx; | |
} | |
} | |
- When v() is called, the first argument is always < net_type::num_computational_layers. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename net_type, | |
typename visitor | |
> | |
void visit_layers( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, any_net_type& t) | |
or: | |
v(any_net_type& t) | |
That is, it takes an optional size_t and then any of the network types such as | |
add_layer, add_loss_layer, etc. | |
ensures | |
- Loops over all the layers in net and calls v() on them. To be specific, this | |
function essentially performs the following: | |
for (size_t i = 0; i < net_type::num_layers; ++i) | |
v(i, layer<i>(net)); | |
!*/ | |
template < | |
typename net_type, | |
typename visitor | |
> | |
void visit_layers_backwards( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, any_net_type& t) | |
or: | |
v(any_net_type& t) | |
That is, it takes an optional size_t and then any of the network types such as | |
add_layer, add_loss_layer, etc. | |
ensures | |
- Loops over all the layers in net and calls v() on them. The loop happens in | |
the reverse order of visit_layers(). To be specific, this function | |
essentially performs the following: | |
for (size_t i = net_type::num_layers; i != 0; --i) | |
v(i-1, layer<i-1>(net)); | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename net_type, | |
typename visitor | |
> | |
void visit_computational_layers( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, any_computational_layer& t) | |
or: | |
v(any_computational_layer& t) | |
That is, it takes an optional size_t and then any of the computational layers. E.g. | |
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_. | |
ensures | |
- Loops over all the computational layers in net and calls v() on them. To be specific, this | |
function essentially performs the following: | |
for (size_t i = 0; i < net_type::num_layers; ++i) | |
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer) | |
v(i, layer<i>(net).layer_details()); | |
!*/ | |
template < | |
size_t begin, | |
size_t end, | |
typename net_type, | |
typename visitor | |
> | |
void visit_computational_layers_range( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, any_computational_layer& t) | |
or: | |
v(any_computational_layer& t) | |
That is, it takes an optional size_t and then any of the computational layers. E.g. | |
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_. | |
ensures | |
- Loops over all the computational layers in the range [begin,end) in net and calls v() | |
on them. To be specific, this function essentially performs the following: | |
for (size_t i = begin; i < end; ++i) | |
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer) | |
v(i, layer<i>(net).layer_details()); | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
size_t begin, | |
size_t end, | |
typename net_type, | |
typename visitor | |
> | |
void visit_layers_range( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, any_net_type& t) | |
or: | |
v(any_net_type& t) | |
That is, it takes an optional size_t and then any of the network types such as | |
add_layer, add_loss_layer, etc. | |
- begin <= end <= net_type::num_layers | |
ensures | |
- Loops over the layers in the range [begin,end) in net and calls v() on them. | |
To be specific, this function essentially performs the following: | |
for (size_t i = begin; i < end; ++i) | |
v(i, layer<i>(net)); | |
!*/ | |
template < | |
size_t begin, | |
size_t end, | |
typename net_type, | |
typename visitor | |
> | |
void visit_layers_backwards_range( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(size_t idx, any_net_type& t) | |
or: | |
v(any_net_type& t) | |
That is, it takes an optional size_t and then any of the network types such as | |
add_layer, add_loss_layer, etc. | |
- begin <= end <= net_type::num_layers | |
ensures | |
- Loops over the layers in the range [begin,end) in net and calls v() on them. | |
The loop happens in the reverse order of visit_layers_range(). To be specific, | |
this function essentially performs the following: | |
for (size_t i = end; i != begin; --i) | |
v(i-1, layer<i-1>(net)); | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
unsigned long tag_id, | |
typename net_type, | |
typename visitor | |
> | |
void visit_layers_until_tag( | |
net_type& net, | |
visitor v | |
); | |
/*! | |
requires | |
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or | |
add_tag_layer. | |
- v is a function object with a signature equivalent to: | |
v(any_net_type& t) | |
That is, it must take any of the network types such as add_layer, | |
add_loss_layer, etc. | |
ensures | |
- Loops over all the layers in net beginning with layer<0>(net) and going until | |
a tag layer with an ID of tag_id is encountered. To be specific, this | |
function essentially performs the following: | |
size_t i = 0; | |
while(layer<i>(net) isn't an add_tag_layer with ID == tag_id) { | |
v(layer<i>(net)); | |
++i; | |
} | |
v(layer<i>(net)); // also visits the tag layer itself at the very end. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
struct layer_test_results | |
{ | |
std::string log; | |
bool was_good; | |
operator bool() const { return was_good; } | |
}; | |
inline std::ostream& operator<< (std::ostream& out, const layer_test_results& item) | |
{ | |
out << item.log; | |
return out; | |
} | |
template < | |
typename layer_details_type | |
> | |
layer_test_results test_layer ( | |
layer_details_type l | |
); | |
/*! | |
ensures | |
- Checks if l correctly implements the EXAMPLE_COMPUTATIONAL_LAYER_ interface | |
defined in layers_abstract.h. Importantly, it computes numerical approximations | |
to the gradients and compares them to the outputs of the layer. | |
- The results of the testing are returned. In particular, if the returned object | |
is RESULT then we will have: | |
- RESULT.was_good == false if and only if the layer failed the testing. | |
- RESULT.log == a string describing why the testing failed if was_good==false. | |
- Note that this function is only capable of checking layers that take | |
arbitrary subnetworks as input. So if you have designed a layer that expects | |
only a certain restricted type of subnetwork then you might get a compile or | |
runtime error when you call this function. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
} | |