DBSCAN 是基于密度空间的聚类算法,与 KMeans 算法不同,它不须要确定聚类的数量,而是基于数据揣测聚类的数目,它可能针对任意形态产生聚类。
clear all;
clc;
%% 导入数据集
% data = load('testData.txt');
data = textread('F:\DATA\ 二维离群点检测.txt');
% 定义参数 Eps 和 MinPts
MinPts = 5;
Eps = epsilon(data, MinPts);
[m,n] = size(data);% 失去数据的大小
x = [(1:m)' data];
[m,n] = size(x);% 从新计算数据集的大小
types = zeros(1,m);% 用于辨别外围点 1,边界点 0 和乐音点 -1
dealed = zeros(m,1);% 用于判断该点是否解决过,0 示意未解决过
dis = calDistance(x(:,2:n));
number = 1;% 用于标记类
%% 对每一个点进行解决
for i = 1:m
% 找到未解决的点
if dealed(i) == 0
xTemp = x(i,:);
D = dis(i,:);% 获得第 i 个点到其余所有点的间隔
ind = find(D<=Eps);% 找到半径 Eps 内的所有点
%% 辨别点的类型
% 边界点
if length(ind) > 1 && length(ind) < MinPts+1
types(i) = 0;
class(i) = 0;
end
% 乐音点
if length(ind) == 1
types(i) = -1;
class(i) = -1;
dealed(i) = 1;
end
% 外围点 (此处是关键步骤)
if length(ind) >= MinPts+1
types(xTemp(1,1)) = 1;
class(ind) = number;
% 判断外围点是否密度可达
while ~isempty(ind)
yTemp = x(ind(1),:);
dealed(ind(1)) = 1;
ind(1) = [];
D = dis(yTemp(1,1),:);% 找到与 ind(1) 之间的间隔
ind_1 = find(D<=Eps);
if length(ind_1)>1% 解决非乐音点
class(ind_1) = number;
if length(ind_1) >= MinPts+1
types(yTemp(1,1)) = 1;
else
types(yTemp(1,1)) = 0;
end
for j=1:length(ind_1)
if dealed(ind_1(j)) == 0
dealed(ind_1(j)) = 1;
ind=[ind ind_1(j)];
class(ind_1(j))=number;
end
end
end
end
number = number + 1;
end
end
end
% 最初解决所有未分类的点为乐音点
ind_2 = find(class==0);
class(ind_2) = -1;
types(ind_2) = -1;
%% 画出最终的聚类图
hold on
for i = 1:m
if class(i) == -1
plot(data(i,1),data(i,2),'.r');
elseif class(i) == 1
if types(i) == 1
plot(data(i,1),data(i,2),'+b');
else
plot(data(i,1),data(i,2),'.b');
end
elseif class(i) == 2
if types(i) == 1
plot(data(i,1),data(i,2),'+g');
else
plot(data(i,1),data(i,2),'.g');
end
elseif class(i) == 3
if types(i) == 1
plot(data(i,1),data(i,2),'+c');
else
plot(data(i,1),data(i,2),'.c');
end
else
if types(i) == 1
plot(data(i,1),data(i,2),'+k');
else
plot(data(i,1),data(i,2),'.k');
end
end
end
hold off