K-means

From Knowledge Discovery

Jump to: navigation, search

This is (i think) similar to MATLAB's kmeans.m in the statistics toolbox

%% Written By Savi -- please correct me if im wrong %%

function [clusterindices, centroids,loss_train] = kmeans3(x, k, startcentroids)

%% function to implement k-means algorithm with centroid threshold=0.02*k %%

%% initialize

  [m,n]=size(x);
  diff=[12,2,4,6,7]; %% garbage initialization
  indices=ones(m,1);
  centroids=startcentroids;
  count=0; %% debug
  prevdist=999999999;
  distance=indices*k*3; %garbage initialization

while (prevdist>sum(distance)|count<3); %simulate a gradient Descent across total distances to converge to a local optimum prevdist=sum(distance);

 for i=1:m
  for j=1:k
    s=0;
    for p=1:n
      s=s+(x(i,p) - centroids(j,p))^2; %% Euclidean Distance
    end
    dist(j,1)=s;
 end

% dist=(x-centroids)*(x-centroids)'; [val,ind]=min(dist(:,1));  %% centroid closest to the point clusterindices(i,1)=ind; %% assign cluster indices distance(i,1)=val; end

 for i=1:k
   cluster=x(find(clusterindices==i),:);
   centroids(i,:)=mean(cluster);    %% calculate centroids        
    %%
 end

%sum(distance)

% update

  diff=abs(startcentroids-centroids);
  change=clusterindices-indices;
  indices=clusterindices;
  startcentroids=centroids;
  count=count+1 % debug

end

s=0;

for i=1:m
 for p=1:n
  s=s+(x(i,p) - centroids(clusterindices(i),p))^2; %% Euclidean Distance
 end
end

 loss_train=s; %% training error
Personal tools