// Copyright (C) 2012 Davis E. King (davis@dlib.net) | |
// License: Boost Software License See LICENSE.txt for the full license. | |
namespace dlib | |
{ | |
// ---------------------------------------------------------------------------------------- | |
unsigned long chinese_whispers ( | |
const std::vector<ordered_sample_pair>& edges, | |
std::vector<unsigned long>& labels, | |
const unsigned long num_iterations, | |
dlib::rand& rnd | |
); | |
/*! | |
requires | |
- is_ordered_by_index(edges) == true | |
ensures | |
- This function implements the graph clustering algorithm described in the | |
paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its | |
Application to Natural Language Processing Problems by Chris Biemann. | |
- Interprets edges as a directed graph. That is, it contains the edges on the | |
said graph and the ordered_sample_pair::distance() values define the edge | |
weights (larger values indicating a stronger edge connection between the | |
nodes). If an edge has a distance() value of infinity then it is considered | |
a "must link" edge. | |
- returns the number of clusters found. | |
- #labels.size() == max_index_plus_one(edges) | |
- for all valid i: | |
- #labels[i] == the cluster ID of the node with index i in the graph. | |
- 0 <= #labels[i] < the number of clusters found | |
(i.e. cluster IDs are assigned contiguously and start at 0) | |
- Duplicate edges are interpreted as if there had been just one edge with a | |
distance value equal to the sum of all the duplicate edge's distance values. | |
- The algorithm performs exactly num_iterations passes over the graph before | |
terminating. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
unsigned long chinese_whispers ( | |
const std::vector<sample_pair>& edges, | |
std::vector<unsigned long>& labels, | |
const unsigned long num_iterations, | |
dlib::rand& rnd | |
); | |
/*! | |
ensures | |
- This function is identical to the above chinese_whispers() routine except | |
that it operates on a vector of sample_pair objects instead of | |
ordered_sample_pairs. Therefore, this is simply a convenience routine. In | |
particular, it is implemented by transforming the given edges into | |
ordered_sample_pairs and then calling the chinese_whispers() routine defined | |
above. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
unsigned long chinese_whispers ( | |
const std::vector<ordered_sample_pair>& edges, | |
std::vector<unsigned long>& labels, | |
const unsigned long num_iterations = 100 | |
); | |
/*! | |
requires | |
- is_ordered_by_index(edges) == true | |
ensures | |
- performs: return chinese_whispers(edges, labels, num_iterations, rnd) | |
where rnd is a default initialized dlib::rand object. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
unsigned long chinese_whispers ( | |
const std::vector<sample_pair>& edges, | |
std::vector<unsigned long>& labels, | |
const unsigned long num_iterations = 100 | |
); | |
/*! | |
ensures | |
- performs: return chinese_whispers(edges, labels, num_iterations, rnd) | |
where rnd is a default initialized dlib::rand object. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
} | |