|
<html><head><title>dlib C++ Library - trainer_abstract.h</title></head><body bgcolor='white'><pre> |
|
<font color='#009900'>// Copyright (C) 2015 Davis E. King (davis@dlib.net) |
|
</font><font color='#009900'>// License: Boost Software License See LICENSE.txt for the full license. |
|
</font><font color='#0000FF'>#undef</font> DLIB_DNn_TRAINER_ABSTRACT_H_ |
|
<font color='#0000FF'>#ifdef</font> DLIB_DNn_TRAINER_ABSTRACT_H_ |
|
|
|
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='core_abstract.h.html'>core_abstract.h</a>" |
|
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='solvers_abstract.h.html'>solvers_abstract.h</a>" |
|
<font color='#0000FF'>#include</font> <font color='#5555FF'><</font>vector<font color='#5555FF'>></font> |
|
<font color='#0000FF'>#include</font> <font color='#5555FF'><</font>chrono<font color='#5555FF'>></font> |
|
|
|
|
|
<font color='#0000FF'>namespace</font> dlib |
|
<b>{</b> |
|
|
|
<font color='#009900'>// ---------------------------------------------------------------------------------------- |
|
</font> |
|
<font color='#0000FF'>enum</font> <font color='#0000FF'>class</font> <b><a name='force_flush_to_disk'></a>force_flush_to_disk</b> <b>{</b> |
|
no <font color='#5555FF'>=</font> <font color='#979000'>0</font>, |
|
yes <font color='#5555FF'>=</font> <font color='#979000'>1</font> |
|
<b>}</b>; |
|
|
|
<font color='#009900'>// ---------------------------------------------------------------------------------------- |
|
</font> |
|
<font color='#0000FF'>template</font> <font color='#5555FF'><</font> |
|
<font color='#0000FF'>typename</font> net_type, |
|
<font color='#0000FF'>typename</font> solver_type <font color='#5555FF'>=</font> sgd |
|
<font color='#5555FF'>></font> |
|
<font color='#0000FF'>class</font> <b><a name='dnn_trainer'></a>dnn_trainer</b> |
|
<b>{</b> |
|
<font color='#009900'>/*! |
|
REQUIREMENTS ON net_type |
|
- net_type is an add_loss_layer object. |
|
|
|
REQUIREMENTS ON solver_type |
|
- solver_type is an implementation of the EXAMPLE_SOLVER interface defined |
|
in solvers_abstract.h |
|
|
|
WHAT THIS OBJECT REPRESENTS |
|
This object is a tool training a deep neural network. To use it you supply |
|
a neural network type and a solver, then you call train() with your |
|
training data and it will output a new network instance that has hopefully |
|
learned something useful from your training data. |
|
|
|
If you are compiling with CUDA then this object will use the GPU that is |
|
currently selected (i.e. the one indicated by cudaGetDevice()) when |
|
dnn_trainer is constructed. It will continue to use that device even if |
|
you later change it by a call to cudaSetDevice(). |
|
|
|
EXCEPTIONS |
|
If an exception is thrown by any part of the neural network during training |
|
then the exception will be propagated out of the trainer to the user. |
|
Moreover, the trainer instance will be unusable and should be destroyed. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>public</font>: |
|
|
|
<font color='#0000FF'>typedef</font> <font color='#0000FF'>typename</font> net_type::training_label_type training_label_type; |
|
<font color='#0000FF'>typedef</font> <font color='#0000FF'>typename</font> net_type::input_type input_type; |
|
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>size_t</u></font> num_computational_layers <font color='#5555FF'>=</font> net_type::num_computational_layers; |
|
|
|
<font color='#0000FF'>using</font> threads <font color='#5555FF'>=</font> std::vector<font color='#5555FF'><</font>std::shared_ptr<font color='#5555FF'><</font>thread_pool<font color='#5555FF'>></font><font color='#5555FF'>></font>; |
|
|
|
<b><a name='dnn_trainer'></a>dnn_trainer</b><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font> <font color='#0000FF'>delete</font>; |
|
<b><a name='dnn_trainer'></a>dnn_trainer</b><font face='Lucida Console'>(</font><font color='#0000FF'>const</font> dnn_trainer<font color='#5555FF'>&</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font> <font color='#0000FF'>delete</font>; |
|
dnn_trainer<font color='#5555FF'>&</font> <b><a name='operator'></a>operator</b><font color='#5555FF'>=</font><font face='Lucida Console'>(</font><font color='#0000FF'>const</font> dnn_trainer<font color='#5555FF'>&</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font> <font color='#0000FF'>delete</font>; |
|
|
|
<b><a name='dnn_trainer'></a>dnn_trainer</b><font face='Lucida Console'>(</font> |
|
net_type<font color='#5555FF'>&</font> net, |
|
<font color='#0000FF'>const</font> solver_type<font color='#5555FF'>&</font> solver <font color='#5555FF'>=</font> <font color='#BB00BB'>solver_type</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font><font color='#0000FF'><u>int</u></font><font color='#5555FF'>></font><font color='#5555FF'>&</font> cuda_extra_devices <font color='#5555FF'>=</font> <b>{</b><b>}</b>, |
|
std::shared_ptr<font color='#5555FF'><</font>threads<font color='#5555FF'>></font> thread_pools <font color='#5555FF'>=</font> std::shared_ptr<font color='#5555FF'><</font>threads<font color='#5555FF'>></font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- for all valid i: |
|
- 0 <= cuda_extra_devices[i] < dlib::cuda::get_num_devices() |
|
ensures |
|
- &#get_net() == &net |
|
(i.e. The dnn_trainer holds a reference to net, it does not copy it. |
|
Therefore, you must ensure net has a lifetime at least as long as the |
|
dnn_trainer). |
|
- #get_solvers() == a set of solvers that are all initialized with the |
|
provided solver instance. |
|
- #get_max_num_epochs() == 10000 |
|
- #get_mini_batch_size() == 128 |
|
- #get_learning_rate() == 1e-2 |
|
- #get_min_learning_rate() == 1e-5 |
|
- #get_iterations_without_progress_threshold() == 2000 |
|
- #get_test_iterations_without_progress_threshold() == 500 |
|
- #get_learning_rate_shrink_factor() == 0.1 |
|
- #get_learning_rate_schedule().size() == 0 |
|
- #get_train_one_step_calls() == 0 |
|
- #get_test_one_step_calls() == 0 |
|
- #get_synchronization_file() == "" |
|
- if (cuda_extra_devices.size() > 0) then |
|
- This object will use multiple graphics cards to run the learning |
|
algorithms. In particular, it will always use whatever device is |
|
currently selected on the calling thread (the device indicated by |
|
cudaGetDevice()). In addition, you can ask to use additional |
|
devices, which you do by putting their device numbers into |
|
cuda_extra_devices. |
|
- if (thread_pools.get() != nullptr) then |
|
- Any new threads spun within the trainer will execute within the |
|
passed thread pools vector. This means that the same threads can |
|
be re-used across different dnn_trainer instances. Otherwise, the |
|
CUDA runtime may leak memory. This, however, is relevant only if |
|
your program is going to instantiate a large number of trainers, |
|
and generally stay up and running for a very long time. If not, |
|
then you need not worry about this. |
|
NB: Any particular thread pools vector should be passed to max |
|
one trainer instance at a time. |
|
NB: The mentioned leak isn't happening because dlib is or isn't |
|
doing something. Instead, it is a limitation of the CUDA |
|
runtime that dlib has no control over. |
|
!*/</font> |
|
|
|
net_type<font color='#5555FF'>&</font> <b><a name='get_net'></a>get_net</b> <font face='Lucida Console'>(</font> |
|
force_flush_to_disk force_flush <font color='#5555FF'>=</font> force_flush_to_disk::yes |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the neural network object used by this trainer. This is the |
|
network that is optimized when you call train() or train_one_step(). |
|
Recall that the dnn_trainer doesn't contain the net_type object but |
|
simply holds a reference to an external network which was provided to the |
|
dnn_trainer's constructor. |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
- If force_flush is yes, then this function will sync the trainer state to |
|
disk if the current state hasn't already been synced to disk since the |
|
last network modification. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>solver_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> <b><a name='get_solvers'></a>get_solvers</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the solvers used to optimize each layer of the neural network |
|
get_net(). In particular, the first layer's solver is |
|
get_solvers()[0], the second layer's solver is |
|
get_solvers()[1], and so on. |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_mini_batch_size'></a>get_mini_batch_size</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- During training, we call the network's update() routine over and over |
|
with training data. The number of training samples we give to each call |
|
to update is the "mini-batch size", which is defined by |
|
get_mini_batch_size(). |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_mini_batch_size'></a>set_mini_batch_size</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> batch_size |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- batch_size > 0 |
|
ensures |
|
- #get_mini_batch_size() == batch_size |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_max_num_epochs'></a>get_max_num_epochs</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- train() will execute at most get_max_num_epochs() iterations over the |
|
training data before returning. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_max_num_epochs'></a>set_max_num_epochs</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> num |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- num > 0 |
|
ensures |
|
- #get_max_num_epochs() == num |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_learning_rate'></a>set_learning_rate</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>double</u></font> lr |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- lr > 0 |
|
ensures |
|
- #get_learning_rate() == lr |
|
- #get_learning_rate_schedule().size() == 0 |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>double</u></font> <b><a name='get_learning_rate'></a>get_learning_rate</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- During each training step, a solver tells us how to modify the parameters |
|
of each layer in the network. It does this by outputting a step vector |
|
that, when added to the parameters, will hopefully result in improved |
|
network performance. The learning rate is one of the inputs to the |
|
solver and influences the size of this step vector. This function |
|
returns the current learning rate, that is, the learning rate that will |
|
be used during the next training step. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_min_learning_rate'></a>set_min_learning_rate</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>double</u></font> lr |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- lr > 0 |
|
ensures |
|
- #get_min_learning_rate() == lr |
|
- #get_learning_rate_schedule().size() == 0 |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>double</u></font> <b><a name='get_min_learning_rate'></a>get_min_learning_rate</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- During training via this->train(), this object will test if progress is |
|
still being made and if it isn't then it will reduce get_learning_rate() |
|
by setting it to get_learning_rate()*get_learning_rate_shrink_factor(). |
|
However, it will not reduce it below get_min_learning_rate(). Once this |
|
minimum learning rate is crossed the training will terminate. |
|
- get_min_learning_rate() doesn't apply if you are using train_one_step(). |
|
You can keep calling train_one_step() as many times as you want and the |
|
learning rate will drop infinitely close to 0 if you run long enough. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>template</font> <font color='#5555FF'><</font><font color='#0000FF'>typename</font> EXP<font color='#5555FF'>></font> |
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_learning_rate_schedule'></a>set_learning_rate_schedule</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> matrix_exp<font color='#5555FF'><</font>EXP<font color='#5555FF'>></font><font color='#5555FF'>&</font> schedule |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- schedule.size() > 0 |
|
- min(schedule) > 0 |
|
ensures |
|
- #get_learning_rate_schedule() == reshape_to_column_vector(schedule) |
|
- #get_learning_rate() == schedule(0,0) |
|
- #get_min_learning_rate() == min(schedule) |
|
- #set_learning_rate_shrink_factor() == 1 |
|
!*/</font> |
|
|
|
<font color='#0000FF'>const</font> matrix<font color='#5555FF'><</font><font color='#0000FF'><u>double</u></font>,<font color='#979000'>0</font>,<font color='#979000'>1</font><font color='#5555FF'>></font><font color='#5555FF'>&</font> <b><a name='get_learning_rate_schedule'></a>get_learning_rate_schedule</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- if (this function returns a non-empty matrix) then |
|
- This trainer will use an explicit learning rate schedule defined by |
|
the learning rate values in get_learning_rate_schedule(). For |
|
example, if get_learning_rate_schedule() returned {0.1, 0.09, 0.08, |
|
0.07, 0.06} then the first training mini-batch would use a learning |
|
rate of 0.1, then the next training mini-batch uses 0.09, and then |
|
0.8, and so on until the end of the schedule is reached. |
|
|
|
If you continue to run training after the end of the schedule has |
|
been reached then the learning rate will be fixed to 0.99 times the |
|
final value. So in our example, eventually the learning rate would |
|
be fixed to 0.99*0.06. This allows you to test if we have reached the |
|
end of the schedule by checking if get_learning_rate() >= 0.06. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_steps_without_progress'></a>get_steps_without_progress</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- if (get_learning_rate_shrink_factor() != 1) then |
|
- returns an estimate of how many mini-batches have executed without us |
|
observing a statistically significant decrease in the training error. |
|
- else |
|
- returns 0 |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_iterations_without_progress_threshold'></a>set_iterations_without_progress_threshold</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> thresh |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- #get_iterations_without_progress_threshold() == thresh |
|
- #get_learning_rate_schedule().size() == 0 |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_iterations_without_progress_threshold'></a>get_iterations_without_progress_threshold</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- This object monitors the progress of training and estimates if the |
|
training error is being reduced. It does this by looking at the previous |
|
get_iterations_without_progress_threshold() mini-batch results and |
|
applying the statistical test defined by the running_gradient object to |
|
see if the training error is getting smaller. If it isn't being reduced |
|
then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink_factor(). |
|
|
|
Therefore, get_iterations_without_progress_threshold() should always be |
|
set to something sensibly large so that this test can be done with |
|
reasonably high confidence. Think of this test as saying "if the loss |
|
hasn't decreased for the previous get_iterations_without_progress_threshold() |
|
then shrink the learning rate". |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_learning_rate_shrink_factor'></a>set_learning_rate_shrink_factor</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>double</u></font> shrink |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- 0 < shrink && shrink <= 1 |
|
ensures |
|
- #get_learning_rate_shrink_factor() == shrink |
|
- #get_learning_rate_schedule().size() == 0 |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>double</u></font> <b><a name='get_learning_rate_shrink_factor'></a>get_learning_rate_shrink_factor</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- Whenever the training routine thinks it isn't making progress anymore it |
|
will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink_factor(). |
|
- You can disable the automatic learning rate reduction by setting |
|
get_learning_rate_shrink_factor() to 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_train_one_step_calls'></a>get_train_one_step_calls</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the number of times train_one_step() has been called. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_test_one_step_calls'></a>get_test_one_step_calls</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the number of times test_one_step() has been called. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='be_verbose'></a>be_verbose</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- This object will print status messages to standard out so that a |
|
user can observe the progress of the algorithm. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='be_quiet'></a>be_quiet</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- This object will not print anything to standard out |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_synchronization_file'></a>set_synchronization_file</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> std::string<font color='#5555FF'>&</font> filename, |
|
std::chrono::seconds time_between_syncs <font color='#5555FF'>=</font> std::chrono::<font color='#BB00BB'>minutes</font><font face='Lucida Console'>(</font><font color='#979000'>15</font><font face='Lucida Console'>)</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- #get_synchronization_file() == filename |
|
- While training is running, either via train() or repeated calls to |
|
train_one_step(), this object will save its entire state, including the |
|
state of get_net(), to disk in the file named filename every |
|
time_between_syncs seconds. |
|
- If the filename file already exists then the state of this trainer will |
|
be loaded from that file by this call to set_synchronization_file(). |
|
This allows you to resume a training session which was previously |
|
interrupted. |
|
- It should be noted that when saving, the trainer will alternate between |
|
saving to a file called filename and another file called filename+"_". |
|
We do this because it's possible that your computer might crash (not |
|
because of dlib, just in general) before the data is safely saved to |
|
disk. This way, you will always have a backup file if the write to disk |
|
gets corrupted or is incomplete. Moreover, when loading, we will always |
|
load from the newest of the two possible files. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>const</font> std::string<font color='#5555FF'>&</font> <b><a name='get_synchronization_file'></a>get_synchronization_file</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- Returns the name of the file the dnn_trainer will periodically save it's |
|
state to. If the return value is "" then synchronization is disabled. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='train'></a>train</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>input_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> data, |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>training_label_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> labels |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- data.size() == labels.size() |
|
- data.size() > 0 |
|
- net_type uses a supervised loss. |
|
i.e. net_type::training_label_type != no_label_type. |
|
ensures |
|
- Trains a supervised neural network based on the given training data. |
|
The goal of training is to find the network parameters that minimize |
|
get_net().compute_loss(data.begin(), data.end(), labels.begin()). |
|
- The optimizer will run until get_learning_rate() < get_min_learning_rate() |
|
or get_max_num_epochs() training epochs have been executed. |
|
- Each layer in the network will be optimized by its corresponding solver |
|
in get_solvers(). |
|
- Each call to train DOES NOT reinitialize the state of get_net() or |
|
get_solvers(). That is, the existing state of the solvers and network is |
|
the starting point for the optimization each time train() is called. In |
|
particular, if you use the set_synchronization_file() method you can |
|
resume an interrupted train() call by simply calling train() again and it |
|
will pick up from the last synchronization point. |
|
- You can obtain the average loss value during the final training epoch by |
|
calling get_average_loss(). |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='train'></a>train</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>input_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> data |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- data.size() > 0 |
|
- net_type uses an unsupervised loss. |
|
i.e. net_type::training_label_type == no_label_type. |
|
ensures |
|
- Trains an unsupervised neural network based on the given training data. |
|
The goal of training is to find the network parameters that minimize |
|
get_net().compute_loss(data.begin(), data.end()). |
|
- The optimizer will run until get_learning_rate() < get_min_learning_rate() |
|
or get_max_num_epochs() training epochs have been executed. |
|
- Each layer in the network will be optimized by its corresponding solver |
|
in get_solvers(). |
|
- Each call to train DOES NOT reinitialize the state of get_net() or |
|
get_solvers(). That is, the existing state of the solvers and network is |
|
the starting point for the optimization each time train() is called. In |
|
particular, if you use the set_synchronization_file() method you can |
|
resume an interrupted train() call by simply calling train() again and it |
|
will pick up from the last synchronization point. |
|
- You can obtain the average loss value during the final training epoch by |
|
calling get_average_loss(). |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='train_one_step'></a>train_one_step</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>input_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> data, |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>training_label_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> labels |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- data.size() == labels.size() |
|
- data.size() > 0 |
|
- net_type uses a supervised loss. |
|
i.e. net_type::training_label_type != no_label_type. |
|
ensures |
|
- Performs one stochastic gradient update step based on the mini-batch of |
|
data and labels supplied to this function. In particular, calling |
|
train_one_step() in a loop is equivalent to calling the train() method |
|
defined above. However, train_one_step() allows you to stream data from |
|
disk into the training process while train() requires you to first load |
|
all the training data into RAM. Otherwise, these training methods are |
|
equivalent. |
|
- You can observe the current average loss value by calling get_average_loss(). |
|
- The network training will happen in another thread. Therefore, after |
|
calling this function you should call get_net() before you touch the net |
|
object from the calling thread to ensure no other threads are still |
|
accessing the network. |
|
- #get_train_one_step_calls() == get_train_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>template</font> <font color='#5555FF'><</font> |
|
<font color='#0000FF'>typename</font> data_iterator, |
|
<font color='#0000FF'>typename</font> label_iterator |
|
<font color='#5555FF'>></font> |
|
<font color='#0000FF'><u>void</u></font> <b><a name='train_one_step'></a>train_one_step</b> <font face='Lucida Console'>(</font> |
|
data_iterator dbegin, |
|
data_iterator dend, |
|
label_iterator lbegin |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable |
|
- std::distance(dbegin, dend) > 0 |
|
- net_type uses a supervised loss. |
|
i.e. net_type::training_label_type != no_label_type. |
|
ensures |
|
- Performs one stochastic gradient update step based on the mini-batch of |
|
data and labels supplied to this function. In particular, calling |
|
train_one_step() in a loop is equivalent to calling the train() method |
|
defined above. However, train_one_step() allows you to stream data from |
|
disk into the training process while train() requires you to first load |
|
all the training data into RAM. Otherwise, these training methods are |
|
equivalent. |
|
- You can observe the current average loss value by calling get_average_loss(). |
|
- The network training will happen in another thread. Therefore, after |
|
calling this function you should call get_net() before you touch the net |
|
object from the calling thread to ensure no other threads are still |
|
accessing the network. |
|
- #get_train_one_step_calls() == get_train_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='train_one_step'></a>train_one_step</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>input_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> data |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- data.size() > 0 |
|
- net_type uses an unsupervised loss. |
|
i.e. net_type::training_label_type == no_label_type. |
|
ensures |
|
- Performs one stochastic gradient update step based on the mini-batch of |
|
data supplied to this function. In particular, calling train_one_step() |
|
in a loop is equivalent to calling the train() method defined above. |
|
However, train_one_step() allows you to stream data from disk into the |
|
training process while train() requires you to first load all the |
|
training data into RAM. Otherwise, these training methods are |
|
equivalent. |
|
- You can observe the current average loss value by calling get_average_loss(). |
|
- The network training will happen in another thread. Therefore, after |
|
calling this function you should call get_net() before you touch the net |
|
object from the calling thread to ensure no other threads are still |
|
accessing the network. |
|
- #get_train_one_step_calls() == get_train_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>template</font> <font color='#5555FF'><</font> |
|
<font color='#0000FF'>typename</font> data_iterator |
|
<font color='#5555FF'>></font> |
|
<font color='#0000FF'><u>void</u></font> <b><a name='train_one_step'></a>train_one_step</b> <font face='Lucida Console'>(</font> |
|
data_iterator dbegin, |
|
data_iterator dend |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- std::distance(dbegin, dend) > 0 |
|
- net_type uses an unsupervised loss. |
|
i.e. net_type::training_label_type == no_label_type. |
|
ensures |
|
- Performs one stochastic gradient update step based on the mini-batch of |
|
data supplied to this function. In particular, calling train_one_step() |
|
in a loop is equivalent to calling the train() method defined above. |
|
However, train_one_step() allows you to stream data from disk into the |
|
training process while train() requires you to first load all the |
|
training data into RAM. Otherwise, these training methods are |
|
equivalent. |
|
- You can observe the current average loss value by calling get_average_loss(). |
|
- The network training will happen in another thread. Therefore, after |
|
calling this function you should call get_net() before you touch the net |
|
object from the calling thread to ensure no other threads are still |
|
accessing the network. |
|
- #get_train_one_step_calls() == get_train_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>double</u></font> <b><a name='get_average_loss'></a>get_average_loss</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the average loss value observed during previous calls to |
|
train_one_step() or train(). That is, the average output of |
|
net_type::update() during the previous mini-batch updates. |
|
- Note that, if be_verbose() has been called, then this object will |
|
automatically call clear_average_loss() periodically when it logs the |
|
loss to the console. |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='clear_average_loss'></a>clear_average_loss</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- #get_average_loss() == 0 |
|
- get_average_loss() uses a dlib::running_stats object to keep a running |
|
average of the loss values seen during the previous mini-batch updates |
|
applied during training. Calling clear_average_loss() resets the |
|
running_stats object so it forgets about all previous loss values |
|
observed. |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#009900'>// ---------------------- |
|
</font> |
|
<font color='#0000FF'><u>double</u></font> <b><a name='get_average_test_loss'></a>get_average_test_loss</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the average loss value observed during previous calls to |
|
test_one_step(). |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='test_one_step'></a>test_one_step</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>input_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> data, |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>training_label_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> labels |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- data.size() == labels.size() |
|
- data.size() > 0 |
|
- net_type uses a supervised loss. |
|
i.e. net_type::training_label_type != no_label_type. |
|
ensures |
|
- Runs the given data through the network and computes and records the loss. |
|
- This call does not modify network parameters. The point of |
|
test_one_step() is two fold, to allow you to observe the accuracy of the |
|
network on hold out data during training, and to allow the trainer to |
|
automatically adjust the learning rate when the test loss stops |
|
improving. It should be noted that you are not required to use |
|
test_one_step() at all, but if you want to do this kind of thing it is |
|
available. |
|
- You can observe the current average loss value by calling get_average_test_loss(). |
|
- The computation will happen in another thread. Therefore, after calling |
|
this function you should call get_net() before you touch the net object |
|
from the calling thread to ensure no other threads are still accessing |
|
the network. |
|
- #get_test_one_step_calls() == get_test_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>template</font> <font color='#5555FF'><</font> |
|
<font color='#0000FF'>typename</font> data_iterator, |
|
<font color='#0000FF'>typename</font> label_iterator |
|
<font color='#5555FF'>></font> |
|
<font color='#0000FF'><u>void</u></font> <b><a name='test_one_step'></a>test_one_step</b> <font face='Lucida Console'>(</font> |
|
data_iterator dbegin, |
|
data_iterator dend, |
|
label_iterator lbegin |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable |
|
- std::distance(dbegin, dend) > 0 |
|
- net_type uses a supervised loss. |
|
i.e. net_type::training_label_type != no_label_type. |
|
ensures |
|
- Runs the given data through the network and computes and records the loss. |
|
- This call does not modify network parameters. The point of |
|
test_one_step() is two fold, to allow you to observe the accuracy of the |
|
network on hold out data during training, and to allow the trainer to |
|
automatically adjust the learning rate when the test loss stops |
|
improving. It should be noted that you are not required to use |
|
test_one_step() at all, but if you want to do this kind of thing it is |
|
available. |
|
- You can observe the current average loss value by calling get_average_test_loss(). |
|
- The computation will happen in another thread. Therefore, after calling |
|
this function you should call get_net() before you touch the net object |
|
from the calling thread to ensure no other threads are still accessing |
|
the network. |
|
- #get_test_one_step_calls() == get_test_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='test_one_step'></a>test_one_step</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>input_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> data |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- data.size() > 0 |
|
- net_type uses an unsupervised loss. |
|
i.e. net_type::training_label_type == no_label_type. |
|
ensures |
|
- Runs the given data through the network and computes and records the loss. |
|
- This call does not modify network parameters. The point of |
|
test_one_step() is two fold, to allow you to observe the accuracy of the |
|
network on hold out data during training, and to allow the trainer to |
|
automatically adjust the learning rate when the test loss stops |
|
improving. It should be noted that you are not required to use |
|
test_one_step() at all, but if you want to do this kind of thing it is |
|
available. |
|
- You can observe the current average loss value by calling get_average_test_loss(). |
|
- The computation will happen in another thread. Therefore, after calling |
|
this function you should call get_net() before you touch the net object |
|
from the calling thread to ensure no other threads are still accessing |
|
the network. |
|
- #get_test_one_step_calls() == get_test_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>template</font> <font color='#5555FF'><</font> |
|
<font color='#0000FF'>typename</font> data_iterator |
|
<font color='#5555FF'>></font> |
|
<font color='#0000FF'><u>void</u></font> <b><a name='test_one_step'></a>test_one_step</b> <font face='Lucida Console'>(</font> |
|
data_iterator dbegin, |
|
data_iterator dend |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- std::distance(dbegin, dend) > 0 |
|
- net_type uses an unsupervised loss. |
|
i.e. net_type::training_label_type == no_label_type. |
|
ensures |
|
- Runs the given data through the network and computes and records the loss. |
|
- This call does not modify network parameters. The point of |
|
test_one_step() is two fold, to allow you to observe the accuracy of the |
|
network on hold out data during training, and to allow the trainer to |
|
automatically adjust the learning rate when the test loss stops |
|
improving. It should be noted that you are not required to use |
|
test_one_step() at all, but if you want to do this kind of thing it is |
|
available. |
|
- You can observe the current average loss value by calling get_average_test_loss(). |
|
- The computation will happen in another thread. Therefore, after calling |
|
this function you should call get_net() before you touch the net object |
|
from the calling thread to ensure no other threads are still accessing |
|
the network. |
|
- #get_test_one_step_calls() == get_test_one_step_calls() + 1. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_test_iterations_without_progress_threshold'></a>set_test_iterations_without_progress_threshold</b> <font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> thresh |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- #get_test_iterations_without_progress_threshold() == thresh |
|
- #get_learning_rate_schedule().size() == 0 |
|
- This function blocks until all threads inside the dnn_trainer have |
|
stopped touching the net. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_test_iterations_without_progress_threshold'></a>get_test_iterations_without_progress_threshold</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- This object monitors the progress of training and estimates if the |
|
testing error is being reduced. It does this by looking at the previous |
|
get_test_iterations_without_progress_threshold() mini-batch results from |
|
test_one_step() and applying the statistical test defined by the |
|
running_gradient object to see if the testing error is getting smaller. |
|
If it isn't being reduced then get_learning_rate() is made smaller by a |
|
factor of get_learning_rate_shrink_factor(). |
|
|
|
Therefore, get_test_iterations_without_progress_threshold() should always be |
|
set to something sensibly large so that this test can be done with |
|
reasonably high confidence. Think of this test as saying "if the testing loss |
|
hasn't decreased for the previous get_test_iterations_without_progress_threshold() |
|
calls to test_one_step() then shrink the learning rate". |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='get_test_steps_without_progress'></a>get_test_steps_without_progress</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- if (get_learning_rate_shrink_factor() != 1) then |
|
- returns an estimate of how many mini-batches have executed without us |
|
observing a statistically significant decrease in the testing error |
|
(i.e. the error on the data given to the trainer via test_one_step() |
|
calls). |
|
- else |
|
- returns 0 |
|
!*/</font> |
|
|
|
<b>}</b>; |
|
|
|
<font color='#009900'>// ---------------------------------------------------------------------------------------- |
|
</font> |
|
<font color='#0000FF'>template</font> <font color='#5555FF'><</font> |
|
<font color='#0000FF'>typename</font> net_type, |
|
<font color='#0000FF'>typename</font> solver_type |
|
<font color='#5555FF'>></font> |
|
std::ostream<font color='#5555FF'>&</font> <b><a name='operator'></a>operator</b><font color='#5555FF'><</font><font color='#5555FF'><</font> <font face='Lucida Console'>(</font> |
|
std::ostream<font color='#5555FF'>&</font> out, |
|
dnn_trainer<font color='#5555FF'><</font>net_type,solver_type<font color='#5555FF'>></font><font color='#5555FF'>&</font> trainer |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- Prints a log of the current parameters of trainer to out. |
|
!*/</font> |
|
|
|
<font color='#009900'>// ---------------------------------------------------------------------------------------- |
|
</font> |
|
<b>}</b> |
|
|
|
<font color='#0000FF'>#endif</font> <font color='#009900'>// DLIB_DNn_TRAINER_ABSTRACT_H_ |
|
</font> |
|
|
|
|
|
</pre></body></html> |