I'm working with k-means in MATLAB. I am trying to create the plot/graph, but my data has three dimensional array. Here is my k-means code:
clc
clear all
close all
load cobat.txt; % read the file
k=input('Enter a number: '); % determine the number of cluster
isRand=0; % 0 -> sequeantial initialization
% 1 -> random initialization
[maxRow, maxCol]=size(cobat);
if maxRow<=k,
y=[m, 1:maxRow];
elseif k>7
h=msgbox('cant more than 7');
else
% initial value of centroid
if isRand,
p = randperm(size(cobat,1)); % random initialization
for i=1:k
c(i,:)=cobat(p(i),:);
end
else
for i=1:k
c(i,:)=cobat(i,:); % sequential initialization
end
end
temp=zeros(maxRow,1); % initialize as zero vector
u=0;
while 1,
d=DistMatrix3(cobat,c); % calculate the distance
[z,g]=min(d,[],2); % set the matrix g group
if g==temp, % if the iteration doesn't change anymore
break; % stop the iteration
else
temp=g; % copy the matrix to the temporary variable
end
for i=1:k
f=find(g==i);
if f % calculate the new centroid
c(i,:)=mean(cobat(find(g==i),:),1);
end
end
c
[B,index] = sortrows( c ); % sort the centroids
g = index(g); % arrange the labels based on centroids
end
y=[cobat,g]
hold off;
%This plot is actually placed in plot 3D code (last line), but I put it into here, because I think this is the plotting line
f = PlotClusters(cobat,g,y,Colors) %Here is the error
if Dimensions==2
for i=1:NumOfDataPoints %plot data points
plot(cobat(i,1),cobat(i,2),'.','Color',Colors(g(i),:))
hold on
end
for i=1:NumOfCenters %plot the centers
plot(y(i,1),y(i,2),'s','Color',Colors(i,:))
end
else
for i=1:NumOfDataPoints %plot data points
plot3(cobat(i,1),cobat(i,2),cobat(i,3),'.','Color',Colors(g(i),:))
hold on
end
for i=1:NumOfCenters %plot the centers
plot3(y(i,1),y(i,2),y(i,3),'s','Color',Colors(i,:))
end
end
end
And here is the plot 3D code:
%This function plots clustering data, for example the one provided by
%kmeans. To be able to plot, the number of dimensions has to be either 2 or
%3.
%Inputs:
% Data - an m-by-d matrix, where m is the number of data points to
% cluster and d is the number of dimensions. In my code, it is cobat
% IDX - an m-by-1 indices vector, where each element gives the
% cluster to which the corresponding data point in Data belongs. In my file, it is 'g'
% Centers y - an optional c-by-d matrix, where c is the number of
% clusters and d is the dimensions of the problem. The matrix
% gives the location of the cluster centers. If this is not
% given, the centers will be calculated. In my file, I think, it is 'y'
% Colors - an optional color scheme generated by hsv. If this is not
% given, a color scheme will be generated.
%
function f = PlotClusters(cobat,g,y,Colors)
%Checking inputs
switch nargin
case 1 %Not enough inputs
error('Clustering data is required to plot clusters. Usage: PlotClusters(Data,IDX,Centers,Colors)')
case 2 %Need to calculate cluster centers and color scheme
[NumOfDataPoints,Dimensions]=size(cobat);
if Dimensions~=2 && Dimensions~=3 %Check ability to plot
error('It is only possible to plot in 2 or 3 dimensions.')
end
if length(g)~=NumOfDataPoints %Check that each data point is assigned to a cluster
error('The number of data points in Data must be equal to the number of indices in IDX.')
end
NumOfClusters=max(g);
Centers=zeros(NumOfClusters,Dimensions);
NumOfCenters=NumOfClusters;
NumOfPointsInCluster=zeros(NumOfClusters,1);
for i=1:NumOfDataPoints
Centers(g(i),:)=y(g(i),:)+cobat(i,:);
NumOfPointsInCluster(g(i))=NumOfPointsInCluster(g(i))+1;
end
for i=1:NumOfClusters
y(i,:)=y(i,:)/NumOfPointsInCluster(i);
end
Colors=hsv(NumOfClusters);
case 3 %Need to calculate color scheme
[NumOfDataPoints,Dimensions]=size(cobat);
if Dimensions~=2 && Dimensions~=3 %Check ability to plot
error('It is only possible to plot in 2 or 3 dimensions.')
end
if length(g)~=NumOfDataPoints %Check that each data point is assigned to a cluster
error('The number of data points in Data must be equal to the number of indices in IDX.')
end
NumOfClusters=max(g);
[NumOfCenters,Dims]=size(y);
if Dims~=Dimensions
error('The number of dimensions in Data should be equal to the number of dimensions in Centers')
end
if NumOfCenters<NumOfClusters %Check that each cluster has a center
error('The number of cluster centers is smaller than the number of clusters.')
elseif NumOfCenters>NumOfClusters %Check that each cluster has a center
disp('There are more centers than clusters, all will be plotted')
end
Colors=hsv(NumOfCenters);
case 4 %All data is given just need to check consistency
[NumOfDataPoints,Dimensions]=size(cobat);
if Dimensions~=2 && Dimensions~=3 %Check ability to plot
error('It is only possible to plot in 2 or 3 dimensions.')
end
if length(g)~=NumOfDataPoints %Check that each data point is assigned to a cluster
error('The number of data points in Data must be equal to the number of indices in IDX.')
end
NumOfClusters=max(g);
[NumOfCenters,Dims]=size(y);
if Dims~=Dimensions
error('The number of dimensions in Data should be equal to the number of dimensions in Centers')
end
if NumOfCenters<NumOfClusters %Check that each cluster has a center
error('The number of cluster centers is smaller than the number of clusters.')
elseif NumOfCenters>NumOfClusters %Check that each cluster has a center
disp('There are more centers than clusters, all will be plotted')
end
[NumOfColors,RGB]=size(Colors);
if RGB~=3 || NumOfColors<NumOfCenters
error('Colors should have at least the same number of rows as number of clusters and 3 columns')
end
end
%Data is ready. Now plotting
end
Here is the error:
??? Undefined function or variable 'Colors'.
Error in ==> clustere at 69
f = PlotClusters(cobat,g,y,Colors)
Am I wrong call the function like that? What should I do? Your help will be appreciated a lot.
Your code is very messy, and unnecessarily long..
Here is smaller example that does the same thing. You'll need the Statistics toolbox to run it (for the
kmeans
function and Iris dataset):You could simply go for
scatter()
:As you can see from the image, you differentiate colors, size of the clusters. FOr more details check out the examples in the documentation.