DBscan聚类的图形,按照每个簇来给图例
效果:
代码:
# coding:utf8
import sys
reload(sys)
sys.setdefaultencoding("utf8")
import math
import numpy as np
import pylab as pl
import matplotlib.pyplot as plt
import pandas as pd
import string
import os
with open('E:\\pywork\\onlyJW\\d.txt', 'r') as f:
data=f.readlines()
#去掉末尾的\n
data1=map(lambda d: d.replace('\n','').split(','),data)
#去掉列表中的引号并转元组
data2=map(lambda d: tuple(map(lambda e: eval(e),d)),data1)
#计算欧几里得距离,a,b分别为两个元组
def dist(a, b):
return math.sqrt(math.pow(a[0]-b[0], 2)+math.pow(a[1]-b[1], 2))
#算法模型
def DBSCAN(D, e, Minpts):
#初始化核心对象集合T,聚类个数k,聚类集合C, 未访问集合P,
T = set(); k = 0; C = []; P = set(D)
for d in D:
if len([ i for i in D if dist(d, i) <= e]) >= Minpts:
T.add(d)
#开始聚类
while len(T):
P_old = P
o = list(T)[np.random.randint(0, len(T))]
P = P - set(o)
Q = []; Q.append(o)
while len(Q):
q = Q[0]
Nq = [i for i in D if dist(q, i) <= e]
if len(Nq) >= Minpts:
S = P & set(Nq)
Q += (list(S))
P = P - S
Q.remove(q)
k += 1
Ck = list(P_old - P)
T = T - set(Ck)
C.append(Ck)
print 1,len(C),C
return C
#画图
def draw(C):
colValue = ['r', 'y', 'g', 'b', 'c', 'k', 'm','w']
labels=['First','Second','Third','Forth','Fifth']
for i in range(len(C)):
coo_X = [] #x坐标列表
coo_Y = [] #y坐标列表
for j in range(len(C[i])):
coo_X.append(C[i][j][0])
coo_Y.append(C[i][j][1])
print 2,coo_X
print 3,coo_Y
pl.scatter(coo_X, coo_Y, marker='.', color=colValue[i % len(colValue)], label=labels[i % len(labels)])
pl.title('wulili')
pl.legend()
pl.show()
C1 = DBSCAN(data2, 0.01, 5)
print C1
draw(C1)