|
<html><head><title>dlib C++ Library - dnn_instance_segmentation_ex.cpp</title></head><body bgcolor='white'><pre> |
|
<font color='#009900'>// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt |
|
</font><font color='#009900'>/* |
|
This example shows how to do instance segmentation on an image using net pretrained |
|
on the PASCAL VOC2012 dataset. For an introduction to what instance segmentation is, |
|
see the accompanying header file dnn_instance_segmentation_ex.h. |
|
|
|
Instructions how to run the example: |
|
1. Download the PASCAL VOC2012 data, and untar it somewhere. |
|
http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar |
|
2. Build the dnn_instance_segmentation_train_ex example program. |
|
3. Run: |
|
./dnn_instance_segmentation_train_ex /path/to/VOC2012 |
|
4. Wait while the network is being trained. |
|
5. Build the dnn_instance_segmentation_ex example program. |
|
6. Run: |
|
./dnn_instance_segmentation_ex /path/to/VOC2012-or-other-images |
|
|
|
An alternative to steps 2-4 above is to download a pre-trained network |
|
from here: http://dlib.net/files/instance_segmentation_voc2012net_v2.dnn |
|
|
|
It would be a good idea to become familiar with dlib's DNN tooling before reading this |
|
example. So you should read <a href="dnn_introduction_ex.cpp.html">dnn_introduction_ex.cpp</a> and <a href="dnn_introduction2_ex.cpp.html">dnn_introduction2_ex.cpp</a> |
|
before reading this example program. |
|
*/</font> |
|
|
|
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='dnn_instance_segmentation_ex.h.html'>dnn_instance_segmentation_ex.h</a>" |
|
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='pascal_voc_2012.h.html'>pascal_voc_2012.h</a>" |
|
|
|
<font color='#0000FF'>#include</font> <font color='#5555FF'><</font>iostream<font color='#5555FF'>></font> |
|
<font color='#0000FF'>#include</font> <font color='#5555FF'><</font>dlib<font color='#5555FF'>/</font>data_io.h<font color='#5555FF'>></font> |
|
<font color='#0000FF'>#include</font> <font color='#5555FF'><</font>dlib<font color='#5555FF'>/</font>gui_widgets.h<font color='#5555FF'>></font> |
|
|
|
<font color='#0000FF'>using</font> <font color='#0000FF'>namespace</font> std; |
|
<font color='#0000FF'>using</font> <font color='#0000FF'>namespace</font> dlib; |
|
|
|
<font color='#009900'>// ---------------------------------------------------------------------------------------- |
|
</font> |
|
<font color='#0000FF'><u>int</u></font> <b><a name='main'></a>main</b><font face='Lucida Console'>(</font><font color='#0000FF'><u>int</u></font> argc, <font color='#0000FF'><u>char</u></font><font color='#5555FF'>*</font><font color='#5555FF'>*</font> argv<font face='Lucida Console'>)</font> <font color='#0000FF'>try</font> |
|
<b>{</b> |
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>argc <font color='#5555FF'>!</font><font color='#5555FF'>=</font> <font color='#979000'>2</font><font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>You call this program like this: </font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>./dnn_instance_segmentation_train_ex /path/to/images</font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>You will also need a trained '</font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> instance_segmentation_net_filename <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>' file.</font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>You can either train it yourself (see example program</font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>dnn_instance_segmentation_train_ex), or download a</font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>copy from here: http://dlib.net/files/</font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> instance_segmentation_net_filename <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
<font color='#0000FF'>return</font> <font color='#979000'>1</font>; |
|
<b>}</b> |
|
|
|
<font color='#009900'>// Read the file containing the trained networks from the working directory. |
|
</font> det_anet_type det_net; |
|
std::map<font color='#5555FF'><</font>std::string, seg_bnet_type<font color='#5555FF'>></font> seg_nets_by_class; |
|
<font color='#BB00BB'>deserialize</font><font face='Lucida Console'>(</font>instance_segmentation_net_filename<font face='Lucida Console'>)</font> <font color='#5555FF'>></font><font color='#5555FF'>></font> det_net <font color='#5555FF'>></font><font color='#5555FF'>></font> seg_nets_by_class; |
|
|
|
<font color='#009900'>// Show inference results in a window. |
|
</font> image_window win; |
|
|
|
matrix<font color='#5555FF'><</font>rgb_pixel<font color='#5555FF'>></font> input_image; |
|
|
|
<font color='#009900'>// Find supported image files. |
|
</font> <font color='#0000FF'>const</font> std::vector<font color='#5555FF'><</font>file<font color='#5555FF'>></font> files <font color='#5555FF'>=</font> dlib::<font color='#BB00BB'>get_files_in_directory_tree</font><font face='Lucida Console'>(</font>argv[<font color='#979000'>1</font>], |
|
dlib::<font color='#BB00BB'>match_endings</font><font face='Lucida Console'>(</font>"<font color='#CC0000'>.jpeg .jpg .png</font>"<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>; |
|
|
|
dlib::rand rnd; |
|
|
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>Found </font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> files.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'> images, processing...</font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
|
|
<font color='#0000FF'>for</font> <font face='Lucida Console'>(</font><font color='#0000FF'>const</font> file<font color='#5555FF'>&</font> file : files<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#009900'>// Load the input image. |
|
</font> <font color='#BB00BB'>load_image</font><font face='Lucida Console'>(</font>input_image, file.<font color='#BB00BB'>full_name</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>; |
|
|
|
<font color='#009900'>// Find instances in the input image |
|
</font> <font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> instances <font color='#5555FF'>=</font> <font color='#BB00BB'>det_net</font><font face='Lucida Console'>(</font>input_image<font face='Lucida Console'>)</font>; |
|
|
|
matrix<font color='#5555FF'><</font>rgb_pixel<font color='#5555FF'>></font> rgb_label_image; |
|
matrix<font color='#5555FF'><</font><font color='#0000FF'><u>float</u></font><font color='#5555FF'>></font> label_image_confidence; |
|
|
|
matrix<font color='#5555FF'><</font>rgb_pixel<font color='#5555FF'>></font> input_chip; |
|
|
|
rgb_label_image.<font color='#BB00BB'>set_size</font><font face='Lucida Console'>(</font>input_image.<font color='#BB00BB'>nr</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, input_image.<font color='#BB00BB'>nc</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>; |
|
rgb_label_image <font color='#5555FF'>=</font> <font color='#BB00BB'>rgb_pixel</font><font face='Lucida Console'>(</font><font color='#979000'>0</font>, <font color='#979000'>0</font>, <font color='#979000'>0</font><font face='Lucida Console'>)</font>; |
|
|
|
label_image_confidence.<font color='#BB00BB'>set_size</font><font face='Lucida Console'>(</font>input_image.<font color='#BB00BB'>nr</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, input_image.<font color='#BB00BB'>nc</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>; |
|
label_image_confidence <font color='#5555FF'>=</font> <font color='#979000'>0.0</font>; |
|
|
|
<font color='#0000FF'><u>bool</u></font> found_something <font color='#5555FF'>=</font> <font color='#979000'>false</font>; |
|
|
|
<font color='#0000FF'>for</font> <font face='Lucida Console'>(</font><font color='#0000FF'>const</font> <font color='#0000FF'>auto</font><font color='#5555FF'>&</font> instance : instances<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font><font color='#5555FF'>!</font>found_something<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>Found </font>"; |
|
found_something <font color='#5555FF'>=</font> <font color='#979000'>true</font>; |
|
<b>}</b> |
|
<font color='#0000FF'>else</font> |
|
<b>{</b> |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'>, </font>"; |
|
<b>}</b> |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> instance.label; |
|
|
|
<font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> cropping_rect <font color='#5555FF'>=</font> <font color='#BB00BB'>get_cropping_rect</font><font face='Lucida Console'>(</font>instance.rect<font face='Lucida Console'>)</font>; |
|
<font color='#0000FF'>const</font> chip_details <font color='#BB00BB'>chip_details</font><font face='Lucida Console'>(</font>cropping_rect, <font color='#BB00BB'>chip_dims</font><font face='Lucida Console'>(</font>seg_dim, seg_dim<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>; |
|
<font color='#BB00BB'>extract_image_chip</font><font face='Lucida Console'>(</font>input_image, chip_details, input_chip, <font color='#BB00BB'>interpolate_bilinear</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>; |
|
|
|
<font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> i <font color='#5555FF'>=</font> seg_nets_by_class.<font color='#BB00BB'>find</font><font face='Lucida Console'>(</font>instance.label<font face='Lucida Console'>)</font>; |
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>i <font color='#5555FF'>=</font><font color='#5555FF'>=</font> seg_nets_by_class.<font color='#BB00BB'>end</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#009900'>// per-class segmentation net not found, so we must be using the same net for all classes |
|
</font> <font color='#009900'>// (see bool separate_seg_net_for_each_class in <a href="dnn_instance_segmentation_train_ex.cpp.html">dnn_instance_segmentation_train_ex.cpp</a>) |
|
</font> <font color='#BB00BB'>DLIB_CASSERT</font><font face='Lucida Console'>(</font>seg_nets_by_class.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font><font color='#5555FF'>=</font> <font color='#979000'>1</font><font face='Lucida Console'>)</font>; |
|
<font color='#BB00BB'>DLIB_CASSERT</font><font face='Lucida Console'>(</font>seg_nets_by_class.<font color='#BB00BB'>begin</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>-</font><font color='#5555FF'>></font>first <font color='#5555FF'>=</font><font color='#5555FF'>=</font> "<font color='#CC0000'></font>"<font face='Lucida Console'>)</font>; |
|
<b>}</b> |
|
|
|
<font color='#0000FF'>auto</font><font color='#5555FF'>&</font> seg_net <font color='#5555FF'>=</font> i <font color='#5555FF'>!</font><font color='#5555FF'>=</font> seg_nets_by_class.<font color='#BB00BB'>end</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> |
|
? i<font color='#5555FF'>-</font><font color='#5555FF'>></font>second <font color='#009900'>// use the segmentation net trained for this class |
|
</font> : seg_nets_by_class.<font color='#BB00BB'>begin</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>-</font><font color='#5555FF'>></font>second; <font color='#009900'>// use the same segmentation net for all classes |
|
</font> |
|
<font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> mask <font color='#5555FF'>=</font> <font color='#BB00BB'>seg_net</font><font face='Lucida Console'>(</font>input_chip<font face='Lucida Console'>)</font>; |
|
|
|
<font color='#0000FF'>const</font> rgb_pixel <font color='#BB00BB'>random_color</font><font face='Lucida Console'>(</font> |
|
rnd.<font color='#BB00BB'>get_random_8bit_number</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, |
|
rnd.<font color='#BB00BB'>get_random_8bit_number</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, |
|
rnd.<font color='#BB00BB'>get_random_8bit_number</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> |
|
<font face='Lucida Console'>)</font>; |
|
|
|
dlib::matrix<font color='#5555FF'><</font><font color='#0000FF'><u>float</u></font><font color='#5555FF'>></font> <font color='#BB00BB'>resized_mask</font><font face='Lucida Console'>(</font> |
|
<font color='#0000FF'>static_cast</font><font color='#5555FF'><</font><font color='#0000FF'><u>int</u></font><font color='#5555FF'>></font><font face='Lucida Console'>(</font>chip_details.rect.<font color='#BB00BB'>height</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>, |
|
<font color='#0000FF'>static_cast</font><font color='#5555FF'><</font><font color='#0000FF'><u>int</u></font><font color='#5555FF'>></font><font face='Lucida Console'>(</font>chip_details.rect.<font color='#BB00BB'>width</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font> |
|
<font face='Lucida Console'>)</font>; |
|
|
|
dlib::<font color='#BB00BB'>resize_image</font><font face='Lucida Console'>(</font>mask, resized_mask<font face='Lucida Console'>)</font>; |
|
|
|
<font color='#0000FF'>for</font> <font face='Lucida Console'>(</font><font color='#0000FF'><u>int</u></font> r <font color='#5555FF'>=</font> <font color='#979000'>0</font>; r <font color='#5555FF'><</font> resized_mask.<font color='#BB00BB'>nr</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>; <font color='#5555FF'>+</font><font color='#5555FF'>+</font>r<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#0000FF'>for</font> <font face='Lucida Console'>(</font><font color='#0000FF'><u>int</u></font> c <font color='#5555FF'>=</font> <font color='#979000'>0</font>; c <font color='#5555FF'><</font> resized_mask.<font color='#BB00BB'>nc</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>; <font color='#5555FF'>+</font><font color='#5555FF'>+</font>c<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> new_confidence <font color='#5555FF'>=</font> <font color='#BB00BB'>resized_mask</font><font face='Lucida Console'>(</font>r, c<font face='Lucida Console'>)</font>; |
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>new_confidence <font color='#5555FF'>></font> <font color='#979000'>0</font><font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> y <font color='#5555FF'>=</font> chip_details.rect.<font color='#BB00BB'>top</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>+</font> r; |
|
<font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> x <font color='#5555FF'>=</font> chip_details.rect.<font color='#BB00BB'>left</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>+</font> c; |
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>y <font color='#5555FF'>></font><font color='#5555FF'>=</font> <font color='#979000'>0</font> <font color='#5555FF'>&</font><font color='#5555FF'>&</font> y <font color='#5555FF'><</font> rgb_label_image.<font color='#BB00BB'>nr</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>&</font><font color='#5555FF'>&</font> x <font color='#5555FF'>></font><font color='#5555FF'>=</font> <font color='#979000'>0</font> <font color='#5555FF'>&</font><font color='#5555FF'>&</font> x <font color='#5555FF'><</font> rgb_label_image.<font color='#BB00BB'>nc</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#0000FF'>auto</font><font color='#5555FF'>&</font> current_confidence <font color='#5555FF'>=</font> <font color='#BB00BB'>label_image_confidence</font><font face='Lucida Console'>(</font>y, x<font face='Lucida Console'>)</font>; |
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>new_confidence <font color='#5555FF'>></font> current_confidence<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#0000FF'>auto</font> rgb_label <font color='#5555FF'>=</font> random_color; |
|
<font color='#0000FF'>const</font> <font color='#0000FF'>auto</font> baseline_confidence <font color='#5555FF'>=</font> <font color='#979000'>5</font>; |
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>new_confidence <font color='#5555FF'><</font> baseline_confidence<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
<font color='#009900'>// Scale label intensity if confidence isn't high |
|
</font> rgb_label.red <font color='#5555FF'>*</font><font color='#5555FF'>=</font> new_confidence <font color='#5555FF'>/</font> baseline_confidence; |
|
rgb_label.green <font color='#5555FF'>*</font><font color='#5555FF'>=</font> new_confidence <font color='#5555FF'>/</font> baseline_confidence; |
|
rgb_label.blue <font color='#5555FF'>*</font><font color='#5555FF'>=</font> new_confidence <font color='#5555FF'>/</font> baseline_confidence; |
|
<b>}</b> |
|
<font color='#BB00BB'>rgb_label_image</font><font face='Lucida Console'>(</font>y, x<font face='Lucida Console'>)</font> <font color='#5555FF'>=</font> rgb_label; |
|
current_confidence <font color='#5555FF'>=</font> new_confidence; |
|
<b>}</b> |
|
<b>}</b> |
|
<b>}</b> |
|
<b>}</b> |
|
<b>}</b> |
|
|
|
<font color='#0000FF'>const</font> Voc2012class<font color='#5555FF'>&</font> voc2012_class <font color='#5555FF'>=</font> <font color='#BB00BB'>find_voc2012_class</font><font face='Lucida Console'>(</font> |
|
[<font color='#5555FF'>&</font>instance]<font face='Lucida Console'>(</font><font color='#0000FF'>const</font> Voc2012class<font color='#5555FF'>&</font> candidate<font face='Lucida Console'>)</font> <b>{</b> |
|
<font color='#0000FF'>return</font> candidate.classlabel <font color='#5555FF'>=</font><font color='#5555FF'>=</font> instance.label; |
|
<b>}</b> |
|
<font face='Lucida Console'>)</font>; |
|
|
|
dlib::<font color='#BB00BB'>draw_rectangle</font><font face='Lucida Console'>(</font>rgb_label_image, instance.rect, voc2012_class.rgb_label, <font color='#979000'>1</font><font face='Lucida Console'>)</font>; |
|
<b>}</b> |
|
|
|
<font color='#009900'>// Show the input image on the left, and the predicted RGB labels on the right. |
|
</font> win.<font color='#BB00BB'>set_image</font><font face='Lucida Console'>(</font><font color='#BB00BB'>join_rows</font><font face='Lucida Console'>(</font>input_image, rgb_label_image<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>; |
|
|
|
<font color='#0000FF'>if</font> <font face='Lucida Console'>(</font><font color='#5555FF'>!</font>instances.<font color='#BB00BB'>empty</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'> in </font>" <font color='#5555FF'><</font><font color='#5555FF'><</font> file.<font color='#BB00BB'>name</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'><</font><font color='#5555FF'><</font> "<font color='#CC0000'> - hit enter to process the next image</font>"; |
|
cin.<font color='#BB00BB'>get</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>; |
|
<b>}</b> |
|
<b>}</b> |
|
<b>}</b> |
|
<font color='#0000FF'>catch</font><font face='Lucida Console'>(</font>std::exception<font color='#5555FF'>&</font> e<font face='Lucida Console'>)</font> |
|
<b>{</b> |
|
cout <font color='#5555FF'><</font><font color='#5555FF'><</font> e.<font color='#BB00BB'>what</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'><</font><font color='#5555FF'><</font> endl; |
|
<b>}</b> |
|
|
|
|
|
</pre></body></html> |