DBSCAN是基于密度空间的聚类算法,与KMeans算法不同,它不须要确定聚类的数量,而是基于数据揣测聚类的数目,它可能针对任意形态产生聚类。
clear all;clc;%% 导入数据集% data = load('testData.txt');data = textread('F:\DATA\二维离群点检测.txt');% 定义参数Eps和MinPtsMinPts = 5;Eps = epsilon(data, MinPts);[m,n] = size(data);%失去数据的大小x = [(1:m)' data];[m,n] = size(x);%从新计算数据集的大小types = zeros(1,m);%用于辨别外围点1,边界点0和乐音点-1dealed = zeros(m,1);%用于判断该点是否解决过,0示意未解决过dis = calDistance(x(:,2:n));number = 1;%用于标记类 %% 对每一个点进行解决for i = 1:m %找到未解决的点 if dealed(i) == 0 xTemp = x(i,:); D = dis(i,:);%获得第i个点到其余所有点的间隔 ind = find(D<=Eps);%找到半径Eps内的所有点 %% 辨别点的类型 %边界点 if length(ind) > 1 && length(ind) < MinPts+1 types(i) = 0; class(i) = 0; end %乐音点 if length(ind) == 1 types(i) = -1; class(i) = -1; dealed(i) = 1; end %外围点(此处是关键步骤) if length(ind) >= MinPts+1 types(xTemp(1,1)) = 1; class(ind) = number; % 判断外围点是否密度可达 while ~isempty(ind) yTemp = x(ind(1),:); dealed(ind(1)) = 1; ind(1) = []; D = dis(yTemp(1,1),:);%找到与ind(1)之间的间隔 ind_1 = find(D<=Eps); if length(ind_1)>1%解决非乐音点 class(ind_1) = number; if length(ind_1) >= MinPts+1 types(yTemp(1,1)) = 1; else types(yTemp(1,1)) = 0; end for j=1:length(ind_1) if dealed(ind_1(j)) == 0 dealed(ind_1(j)) = 1; ind=[ind ind_1(j)]; class(ind_1(j))=number; end end end end number = number + 1; end endend% 最初解决所有未分类的点为乐音点ind_2 = find(class==0);class(ind_2) = -1;types(ind_2) = -1; %% 画出最终的聚类图hold onfor i = 1:m if class(i) == -1 plot(data(i,1),data(i,2),'.r'); elseif class(i) == 1 if types(i) == 1 plot(data(i,1),data(i,2),'+b'); else plot(data(i,1),data(i,2),'.b'); end elseif class(i) == 2 if types(i) == 1 plot(data(i,1),data(i,2),'+g'); else plot(data(i,1),data(i,2),'.g'); end elseif class(i) == 3 if types(i) == 1 plot(data(i,1),data(i,2),'+c'); else plot(data(i,1),data(i,2),'.c'); end else if types(i) == 1 plot(data(i,1),data(i,2),'+k'); else plot(data(i,1),data(i,2),'.k'); end endendhold off