My Project
kmeans.hh
Go to the documentation of this file.
1 /* -*- mia-c++ -*-
2  *
3  * This file is part of MIA - a toolbox for medical image analysis
4  * Copyright (c) Leipzig, Madrid 1999-2017 Gert Wollny
5  *
6  * MIA is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with MIA; if not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 #ifndef __mia_core_kmeans_hh
22 #define __mia_core_kmeans_hh
23 #include <vector>
24 #include <numeric>
25 #include <cmath>
26 #include <stdexcept>
27 #include <iomanip>
28 #include <limits>
29 #include <mia/core/defines.hh>
30 #include <mia/core/errormacro.hh>
31 #include <mia/core/msgstream.hh>
32 #include <boost/concept/requires.hpp>
33 #include <boost/concept_check.hpp>
34 
36 
37 int EXPORT_CORE kmeans_get_closest_clustercenter(const std::vector<double>& classes, size_t l, double value);
38 
39 
40 template <typename InputIterator, typename OutputIterator>
41 bool kmeans_step(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
42  std::vector<double>& classes, size_t l, int& biggest_class )
43 {
44  cvdebug() << "kmeans enter: ";
45 
46  for (size_t i = 0; i <= l; ++i )
47  cverb << std::setw(8) << classes[i] << " ";
48 
49  cverb << "\n";
50  biggest_class = -1;
51  const double convLimit = 0.005; // currently fixed
52  std::vector<double> sums(classes.size());
53  std::vector<size_t> count(classes.size());
54  bool conv = false;
55  int iter = 50;
56 
57  while ( iter-- && !conv) {
58  sort(classes.begin(), classes.end());
59  // assign closest cluster center
60  OutputIterator ob = obegin;
61 
62  for (InputIterator b = ibegin; b != iend; ++b, ++ob) {
63  *ob = kmeans_get_closest_clustercenter(classes, l, *b);
64  ++count[*ob];
65  sums[*ob] += *b;
66  };
67 
68  // recompute cluster centers
69  conv = true;
70 
71  size_t max_count = 0;
72 
73  for (size_t i = 0; i <= l; i++) {
74  if (count[i]) {
75  double a = sums[i] / count[i];
76 
77  if (a && fabs ((a - classes[i]) / a) > convLimit)
78  conv = false;
79 
80  classes[i] = a;
81 
82  if (max_count < count[i]) {
83  max_count = count[i];
84  biggest_class = i;
85  }
86  } else { // if a class is empty move it closer to neighbour
87  if (i == 0)
88  classes[i] = (classes[i] + classes[i + 1]) / 2.0;
89  else
90  classes[i] = (classes[i] + classes[i - 1]) / 2.0;
91 
92  conv = false;
93  }
94 
95  sums[i] = 0;
96  count[i] = 0;
97  };
98  };
99 
100  cvinfo() << "kmeans: " << l + 1 << " classes, " << 50 - iter << " iterations";
101 
102  for (size_t i = 0; i <= l; ++i )
103  cverb << std::setw(8) << classes[i] << " ";
104 
105  cverb << "\n";
106  return conv;
107 }
108 
109 
110 template <typename InputIterator, typename OutputIterator>
111 bool kmeans_step_with_fixed_centers(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
112  std::vector<double>& classes, const std::vector<bool>& fixed_center,
113  size_t l, int& biggest_class )
114 {
115  cvdebug() << "kmeans enter: ";
116 
117  for (size_t i = 0; i <= l; ++i )
118  cverb << std::setw(8) << classes[i] << " ";
119 
120  cverb << "\n";
121  biggest_class = -1;
122  const double convLimit = 0.005; // currently fixed
123  std::vector<double> sums(classes.size());
124  std::vector<size_t> count(classes.size());
125  bool conv = false;
126  int iter = 50;
127 
128  while ( iter-- && !conv) {
129  sort(classes.begin(), classes.end());
130  // assign closest cluster center
131  OutputIterator ob = obegin;
132 
133  for (InputIterator b = ibegin; b != iend; ++b, ++ob) {
134  *ob = kmeans_get_closest_clustercenter(classes, l, *b);
135  ++count[*ob];
136  sums[*ob] += *b;
137  };
138 
139  // recompute cluster centers
140  conv = true;
141 
142  size_t max_count = 0;
143 
144  for (size_t i = 0; i <= l; i++) {
145  if (fixed_center[i])
146  continue;
147 
148  if (count[i]) {
149  double a = sums[i] / count[i];
150 
151  if (a && fabs ((a - classes[i]) / a) > convLimit)
152  conv = false;
153 
154  classes[i] = a;
155 
156  if (max_count < count[i]) {
157  max_count = count[i];
158  biggest_class = i;
159  }
160  } else { // if a class is empty move it closer to neighbour
161  if (i == 0)
162  classes[i] = (classes[i] + classes[i + 1]) / 2.0;
163  else
164  classes[i] = (classes[i] + classes[i - 1]) / 2.0;
165 
166  conv = false;
167  }
168 
169  sums[i] = 0;
170  count[i] = 0;
171  };
172  };
173 
174  cvinfo() << "kmeans: " << l + 1 << " classes, " << 50 - iter << " iterations";
175 
176  for (size_t i = 0; i <= l; ++i )
177  cverb << std::setw(8) << classes[i] << " ";
178 
179  cverb << "\n";
180  return conv;
181 }
182 
183 
200 template <typename InputIterator, typename OutputIterator>
201 BOOST_CONCEPT_REQUIRES( ((::boost::ForwardIterator<InputIterator>))
202  ((::boost::Mutable_ForwardIterator<OutputIterator>)),
203  (void)
204  )
205 kmeans(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
206  std::vector<double>& classes)
207 {
208  if (classes.size() < 2)
209  throw create_exception<std::invalid_argument>("kmeans: requested ", classes.size(),
210  "class(es), required are at least two");
211 
212  const size_t nclusters = classes.size();
213  const double size = std::distance(ibegin, iend);
214 
215  if ( size < nclusters )
216  throw create_exception<std::invalid_argument>("kmeans: insufficient input: want ", nclusters,
217  " classes, but git only ", size, " input elements");
218 
219  double sum = std::accumulate(ibegin, iend, 0.0);
220  // simple initialization splitting at the mean
221  classes[0] = sum / (size - 1);
222  classes[1] = sum / (size + 1);
223  // first run calles directly
224  int biggest_class = 0;
225  // coverity is completely off here, the 1UL is actually a class index
226  // and has nothing to do with the size of the type pointed to by ibegin
227  //
228  // coverity[sizeof_mismatch]
229  kmeans_step(ibegin, iend, obegin, classes, 1, biggest_class);
230 
231  // further clustering always splits biggest class
232  for (size_t l = 2; l < nclusters; l++) {
233  const size_t pos = biggest_class > 0 ? biggest_class - 1 : biggest_class + 1;
234  classes[l] = 0.5 * (classes[biggest_class] + classes[pos]);
235  kmeans_step(ibegin, iend, obegin, classes, l, biggest_class);
236  };
237 
238  // some post iteration until centers no longer change
239  for (size_t l = 1; l < 3; l++) {
240  if (kmeans_step(ibegin, iend, obegin, classes, nclusters - 1, biggest_class))
241  break;
242  }
243 }
244 
246 
247 #endif
double fabs(const T3DVector< T > &t)
A way to get the norm of a T3DVector using faba syntax.
Definition: 3d/vector.hh:396
#define NS_MIA_BEGIN
conveniance define to start the mia namespace
Definition: defines.hh:33
#define EXPORT_CORE
Macro to manage Visual C++ style dllimport/dllexport.
Definition: defines.hh:101
#define NS_MIA_END
conveniance define to end the mia namespace
Definition: defines.hh:36
static F::result_type accumulate(F &f, const B &data)
Definition: core/filter.hh:371
vstream & cvinfo()
informal output that may be of interest to understand problems with a program and are of higher prior...
Definition: msgstream.hh:262
#define cverb
define a shortcut to the raw output stream
Definition: msgstream.hh:341
void kmeans(InputIterator ibegin, InputIterator iend, OutputIterator obegin, std::vector< double > &classes)
Definition: kmeans.hh:205
bool kmeans_step_with_fixed_centers(InputIterator ibegin, InputIterator iend, OutputIterator obegin, std::vector< double > &classes, const std::vector< bool > &fixed_center, size_t l, int &biggest_class)
Definition: kmeans.hh:111
bool kmeans_step(InputIterator ibegin, InputIterator iend, OutputIterator obegin, std::vector< double > &classes, size_t l, int &biggest_class)
Definition: kmeans.hh:41
int EXPORT_CORE kmeans_get_closest_clustercenter(const std::vector< double > &classes, size_t l, double value)
CDebugSink & cvdebug()
Definition: msgstream.hh:226