R 聚类分析

浏览: 1519

下面使用薛义书习题8.3 和8.4 来熟悉R 聚类分析

QQ图片20161011113710.png

#8.3

DXBZ<-c(9.3, 4.67, 0.96, 1.38, 1.48, 2.6, 2.15, 2.14, 6.53, 1.47, 1.17, 0.88,

        1.23, 0.99, 0.98, 0.85, 1.57, 1.14, 1.34, 0.79, 1.24, 0.96, 0.78, 0.81,

        0.57, 1.67, 1.1, 1.49, 1.61, 1.85)

CZBZ<-c(30.55, 29.38, 24.69, 29.24, 25.47, 32.32, 26.31, 28.46, 31.59, 26.43,

        23.74, 19.97, 16.87, 18.84, 25.18, 26.55, 23.16, 22.57, 23.04, 19.14, 22.53,

        21.65, 14.65, 13.85, 3.85, 24.36, 16.85, 17.76, 20.27, 20.66)

WMBZ<-c(8.7, 8.92, 15.21, 11.3, 15.39, 8.81, 10.49, 10.87, 11.04, 17.23, 17.46,

        24.43, 15.63, 16.22, 16.87, 16.15, 15.79, 12.1, 10.45, 10.61, 13.97, 16.24,

        24.27, 25.44, 44.43, 17.62, 27.93, 27.7, 22.06, 12.75)

data8.3 <- cbind(DXBZ, CZBZ, WMBZ)

rownames(data8.3) = c("北京", "天津", "河北", "山西", "内蒙古", "辽宁","吉林", "黑龙江",

                      "上海", "江苏", "浙江", "安徽", "福建","江西", "山东", "河南", "湖北", "湖南", "广东",

                      "广西", "海南","四川", "贵州", "云南", "西藏", "陕西", "甘肃", "青海", "宁夏","新疆")

d <- dist(data8.3,method = "euclidean")

hc1<-hclust(d, method = "complete"); #最长距离法聚类

hc2<-hclust(d, method = "average"); #均值法聚类

hc3<-hclust(d, method = "centroid"); #重心法聚类

hc4<-hclust(d, method = "ward.D"); #Ward法聚类

par(mfrow=c(1,1))

plot(hc1,hang=-1)

rect.hclust(hc1, k=4,border="red")

plot(hc2,hang=-1)

rect.hclust(hc2, k=4,border="red");

plot(hc3,hang=-1)

rect.hclust(hc3, k=4,border="red");

plot(hc4,hang=-1)

rect.hclust(hc4, k=4,border="red");

clgroup1<-cutree(hc1, k = 4);sort(clgroup1);

clgroup2<-cutree(hc2, k = 4);sort(clgroup2);

clgroup3<-cutree(hc3, k = 4);sort(clgroup3);

clgroup4<-cutree(hc4, k = 4);sort(clgroup4);

#用动态聚类法聚类:

hc5<-kmeans(data8.3, 4);


sort(hc5$cluster);


#8.4

data8.4 <- read.csv("D:\\learn\\bigdata\\dataguru\\薛毅书源程序\\Chapter03\\applicant.csv")

d <- dist(1-cor(data8.4));#相似系数为scale(data8.4),本习题为相关系数

hc1 = hclust(d,"complete"); #最长距离法聚类

hc2 = hclust(d,"average"); #均值法聚类

hc3 = hclust(d,"centroid"); #重心法聚类

hc4 = hclust(d,"ward.D"); #Ward法聚类

#(1)绘制相应的谱系图

plot(hc1,hang=-1);rect.hclust(hc1, k=5,border="green");

plot(hc2,hang=-1);rect.hclust(hc2, k=5,border="green");

plot(hc3,hang=-1);rect.hclust(hc3, k=5,border="green");

plot(hc4,hang=-1);rect.hclust(hc4, k=5,border="green");

#(2)给出各省分类的具体结果:

clgroup1<-cutree(hc1, k = 5);sort(clgroup1);

clgroup2<-cutree(hc2, k = 5);sort(clgroup2);

clgroup3<-cutree(hc3, k = 5);sort(clgroup3);

clgroup4<-cutree(hc4, k = 5);sort(clgroup4);

推荐 0
本文由 策马行空 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

0 个评论

要回复文章请先登录注册