使用R语言对spss第3章顾客售后满意度问卷进行描述性分析

发表: 2017-05-23 浏览: 2123

R语言

读入spss数据

#将SPSS数据放入工作目录设置中

library(foreign)

>satis<-read.spss("aftersale satisfactory.sav",use.value.labels = TRUE,to.data.frame = TRUE)

#说明：如果读入时设置use.value.labels = TRUE，则会将SPSS中的值属性和标签都会读入，但是为FALSE则不读入属性，因而需要在R中重新设置为因子或者数值型，否则无法使用相关函数

>str(satis)

'data.frame':	490 obs. of  10 variables:

 $ Gender: Factor w/ 2 levels "男性","女性": 1 2 2 2 2 2 1 2 2 1 ...

 $ Age   : Factor w/ 2 levels "18～39岁","40～60岁": 1 2 1 1 1 1 2 2 1 1 ...

 $ Q1    : atomic  5 7 10 9 10 10 10 6 5 8 ...

  ..- attr(*, "value.labels")= Named num  10 0

  .. ..- attr(*, "names")= chr  "肯定会推荐" "肯定不会推荐"

 $ Q9    : Factor w/ 6 levels "每天都来","每周5-6次",..: 5 4 4 6 4 4 4 4 3 5 ...

 $ Q10_1 : Factor w/ 13 levels "步行","公交车",..: 1 1 1 1 1 1 1 1 1 1 ...

 $ Q10_2 : Factor w/ 13 levels "步行","公交车",..: 13 5 4 4 13 5 2 2 2 11 ...

 $ Q10_3 : Factor w/ 13 levels "步行","公交车",..: 10 NA NA NA 4 NA NA 4 5 5 ...

 $ S2n   : num  50 28.5 50 50 50 50 28.5 28.5 50 50 ...

 $ Q9n   : num  0.6 1.5 1.5 0.1 1.5 1.5 1.5 1.5 3.5 0.6 ...

 $ S2    : num  28.5 28.5 28.5 28.5 28.5 28.5 28.5 28.5 28.5 28.5 ...

 - attr(*, "variable.labels")= Named chr  "S3 记录顾客的性别 (单选)" "S2 请问您的年龄是 (单选)" "Q1 推荐倾向" "Q9 过去3个月的购物频次" ...

  ..- attr(*, "names")= chr  "Gender" "Age" "Q1" "Q9" ...

 - attr(*, "codepage")= int 936

#解释：

#1、导入SPSS数据一共有490条记录，10个变量，其中最后3个变量对本次分析无作用，可以删除。

#2、Q1表示推荐意愿，导入后属性显示atomic（表示未定义），因而定义为numeric,如果不定义无法使用fix()

数据格式和值转换

>satis$Q1<-as.numeric(satis$Q1)

>satis<-satis[,-c(8:10)]#删除8：10列的变量数据

>fix(satis)#查看弹出的编辑框中“每周3-4次”前有空格，为避免后面麻烦，建议去掉。但是satis$Q9是因子变量，对其元素赋值出现错误，因而需先转化为字符型变量

>satis$Q9<-as.character(satis$Q9)

>satis$Q9[satis$Q9==" 每周3-4次"]<-"每周3-4次"

>satis$Age<-as.character(customer_satisfy$Age)#后面需要对将年龄段转化为对应的年龄数值，因而涉及赋值需将因子变量转化为字符变量

缺失值查看

>library(VIM)

>aggr(customer_satisfy,prop=FALSE,numbers=TRUE)

41]ZD8S3{YSO@HC2%Q`IF3D.png

#解释：

#只有多项选择题Q10其中2个子选项有缺失值，Q10_1无缺失值，Q10_2和Q10_3都有缺失值的样本数有56个，只有Q10_3有缺失值的样本数有283个。

变量重编码

#将Q9和 Age进行重编码，并形成新数值型变量

>satis<-within(satis,{

Q9n<-NA

Q9n[Q9=="每天都来"]<-7

Q9n[Q9=="每周5-6次"]<-5.5

Q9n[Q9=="每周3-4次"]<-3.5

Q9n[Q9=="每周1-2次"]<-1.5

Q9n[Q9=="每月2-3次"]<-0.6

Q9n[Q9=="每月1次或以下"]<-0.1

Agen<-NA

Agen[Age=="18～39岁"]<-28.5

Agen[Age=="40～60岁"]<-50

})#注意“==”和{}中换行就不用“，”隔开。

分类变量描述性统计分析

#分类变量Gender、Age、Q1、Q9的频数及百分比统计,table()函数既可以对因子变量进行统计也可以对数值型变量统计

>descri<-function(x){freq=round(addmargins(table(x)),0)

prop=round(addmargins(prop.table(table(x)))*100,2)

return(c(freq=freq,prop=prop))

}#如何将每个变量的freq和prop分行显示

>apply(satis[,c("Gender","Age","Q1"，"Q9")],2,descri)#如何将每个变量的freq和prop分行显示

$Gender

freq.男性 freq.女性  freq.Sum prop.男性 prop.女性  prop.Sum 

   198.00    292.00    490.00     40.41     59.59    100.00 



$Age

freq.18～39岁 freq.40～60岁      freq.Sum prop.18～39岁 prop.40～60岁 

       337.00        153.00        490.00         68.78         31.22 

     prop.Sum 

       100.00 



$Q1

 freq. 0  freq. 1  freq. 2  freq. 3  freq. 4  freq. 5  freq. 6  freq. 7 

    6.00     2.00     7.00     5.00     4.00    51.00    36.00    53.00 

 freq. 8  freq. 9  freq.10 freq.Sum  prop. 0  prop. 1  prop. 2  prop. 3 

  142.00    48.00   136.00   490.00     1.22     0.41     1.43     1.02 

 prop. 4  prop. 5  prop. 6  prop. 7  prop. 8  prop. 9  prop.10 prop.Sum 

    0.82    10.41     7.35    10.82    28.98     9.80    27.76   100.00 



$Q9

     freq.每天都来 freq.每月1次或以下     freq.每月2-3次 

             18.00              38.00             119.00 

    freq.每周1-2次     freq.每周3-4次     freq.每周5-6次 

            230.00              70.00              15.00 

          freq.Sum      prop.每天都来 prop.每月1次或以下 

            490.00               3.67               7.76 

    prop.每月2-3次     prop.每周1-2次     prop.每周3-4次 

             24.29              46.94              14.29 

    prop.每周5-6次           prop.Sum 

              3.06             100.00

数值型变量的描述统计

>stats<-function(x){mean<-mean(x)

sd<-sd(x)

n<-length(x)

max<-max(x)

min<-min(x)

return(c(n=n,max=max,min=min,mean=mean,sd=sd))}#必须要有return

>t(round(apply(satis[,c("Q1","Agen","Q9n")],2,stats),2))#数值型变量可以用round对结果取小数位，然后转置。

       n max  min  mean   sd

Q1   490  10  0.0  7.79 2.10

Agen 490  50 28.5 35.21 9.97

Q9n  490   7  0.1  1.78 1.56

多重响应变量的频率统计(频数、百分比、个案数占比)

>library(reshape)

>Q10<-satis[, c("Q10_1","Q10_2","Q10_3")]

>Q10melt<-melt(Q10,measure.vars = c("Q10_1","Q10_2","Q10_3"))#将三个自变量整合，形成两个变量，variable将3个列变量名转变为列元素值。将3个变量中的值整合在一起。

>head(Q10melt)

  variable value

1    Q10_1  步行

2    Q10_1  步行

3    Q10_1  步行

4    Q10_1  步行

5    Q10_1  步行

6    Q10_1  步行

>str(Q10melt)

'data.frame':	1470 obs. of  2 variables:

 $ variable: Factor w/ 3 levels "Q10_1","Q10_2",..: 1 1 1 1 1 1 1 1 1 1 ...

 $ value   : Factor w/ 13 levels "步行","公交车",..: 1 1 1 1 1 1 1 1 1 1 ...

>multi_descri<-function(x){freq=round(addmargins(table(x)),0)#计算频数

prop=round(addmargins(prop.table(table(x)))*100,2)#计算百分比占比

n=length(x)/3

caseprop=round(freq/n*100,2)#计算个案数占比

return(c(freq=freq,prop=prop,caseprop=caseprop))}

>multi<-sapply(Q10melt[,c("variable","value")],multi_descri)#使用数据框中单个字段如Q10melt$value，Q10melt[,c("value")]得出的结果并不是$value中出现的结果，为什么？

>multi$value<-as.data.frame(multi$value)#将列表中的value数据转化为数据表

>multi

$variable

    freq.Q10_1     freq.Q10_2     freq.Q10_3       freq.Sum 

        490.00         490.00         490.00        1470.00 

    prop.Q10_1     prop.Q10_2     prop.Q10_3       prop.Sum 

         33.33          33.33          33.33         100.00 

caseprop.Q10_1 caseprop.Q10_2 caseprop.Q10_3   caseprop.Sum 

        100.00         100.00         100.00         300.00 



$value

                            multi$value

freq.步行                        470.00

freq.公交车                      197.00

freq.自行车                       32.00

freq.自有摩托车                   37.00

freq.自己开车                    195.00

freq.出租车(打的)                 35.00

freq.商场的免费购物班车           16.00

freq.出租摩托车                    1.00

freq.人力三轮车                    0.00

freq.电瓶车/电动车                52.00

freq.地铁                          3.00

freq.轻轨                          0.00

freq.其他                         37.00

freq.Sum                        1075.00

prop.步行                         43.72

prop.公交车                       18.33

prop.自行车                        2.98

prop.自有摩托车                    3.44

prop.自己开车                     18.14

prop.出租车(打的)                  3.26

prop.商场的免费购物班车            1.49

prop.出租摩托车                    0.09

prop.人力三轮车                    0.00

prop.电瓶车/电动车                 4.84

prop.地铁                          0.28

prop.轻轨                          0.00

prop.其他                          3.44

prop.Sum                         100.00

caseprop.步行                     95.92

caseprop.公交车                   40.20

caseprop.自行车                    6.53

caseprop.自有摩托车                7.55

caseprop.自己开车                 39.80

caseprop.出租车(打的)              7.14

caseprop.商场的免费购物班车        3.27

caseprop.出租摩托车                0.20

caseprop.人力三轮车                0.00

caseprop.电瓶车/电动车            10.61

caseprop.地铁                      0.61

caseprop.轻轨                      0.00

caseprop.其他                      7.55

caseprop.Sum                     219.39

问题：

1、R语言中多选题的多重响应变量如何设置

2、利用R进行数据的描述统计得出的结果呈现形式不如SPSS清晰，见下图。怎么将R的数据结果呈现形式弄成下图样式？

WYJWQLXUU7WT$VEPPV1}3{X.png

0 个评论

要回复文章请先登录或注册