% Copyright (c) 2007 Antti Ukkonen % % Permission to use, copy, modify, and distribute this software for any % purpose with or without fee is hereby granted, provided that the above % copyright notice and this permission notice appear in all copies. % % THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES % WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF % MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR % ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES % WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN % ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF % OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. % % This code was provided as supplementary material for the course % Machine Learning: Basic Principles given at Helsinki University of % Technology. It probably shouldn't be used AS IS for any other % purpose than studying k-means and Matlab. % % Known bug(s): % 1) The algorithm will crash if some centroid is not assigned any % points during iteration. function [c, err] = simple_kmeans( X, k ); [n,dim] = size(X); [c, err_old] = init_random( X, k ); err_new = 0; round = 1; while err_old > err_new err_old = err_new; centroids = update_centroids( c, k, X ); [c, err_new] = update_clusters( centroids, X ); fprintf(1, 'simple_kmeans, round %d, error = %f\n', round, err_new); round = round + 1; end err = err_new; function [c, err] = init_random( X, k ); rp = randperm(size(X,1)); centroids = X(rp(1:k),:); [c, err] = update_clusters( centroids, X ); function [c, err] = init_maxdist( X, k ); d = squareform(pdist(X, 'euclidean')); rp = randperm(size(X,1)); ci(1) = rp(1); d(:,ci(1)) = 0; [junk, ci(2)] = max( d(ci(1),:) ); d(:,ci(2)) = 0; for i=3:k [junk, ci(i)] = max( sum( d(ci,:) ) ); d(:, ci(i)) = 0; end centroids = X(ci,:); [c, err] = update_clusters( centroids, X ); function [c, err] = update_clusters( centroids, X ); for i=1:size(X,1) for j=1:size(centroids,1) centroid = centroids(j,:); d( j ) = sum( (X(i,:) - centroid ).^2 ); end [err(i), c(i)] = min( d ); end err = sum(err); function centroids = update_centroids( c, k, X ); for i=1:k centroids(i,:) = mean(X(c==i,:)); end