SQL面试题目

浏览: 1236
SQL

1.写出分析代码

image.png

答案:

select

case when a.order_num<=5 then '0-5'

when a.order_num>=6 and  a.order_num<=10 then '6-10'

when a.order_num>=11 and  a.order_num<=20 then '11-20'

when a.order_num>20  '20以上' end 单量

,count(客户id)

from

(select

客户id

,count(运单号) order_num

from table

where create_time between '2020-05-01' and '2020-05-31'

group by 客户id

) a

group by case when a.order_num<=5 then '0-5'

when a.order_num>=6 and  a.order_num<=10 then '6-10'

when a.order_num>=11 and  a.order_num<=20 then '11-20'

when a.order_num>20  '20以上' end


2.考察窗口函数的用法

image.png


select *

from

(select *

,row_number() over(partition by 渠道 order by 授信时间 desc) rn

from table   

) a

where rn=1


3.一张成绩表class有如下字段,班级ID,英语成绩,数学成绩,语文成绩

                                id    english   math  chinese classid
                                1       78         89         95         2
                                2       68         86         79         2  
                                3       59         98         78         1
                                4       68         96         86         3
                                5       58         87         68         3
                                6       97         87         68         3
                                7       97         87         76         1
                                8       67         98         97         2
                                9       78         76         96         1
                               10      99         99         98         3

求:每个班级英语成绩最高的前两名的分值差为多少?

第一步,首先分组取top2的数据;

select 

    english,

    classid 

from

   (select 

        english,

        classid,

        row_number() over (partition by classid order by english desc) rank

    from class) temp1

where rank < 3;

--结果输出

english classid

97      1

78      1

78      2

68      2

99      3

97      3

第二步,将每个班级分组数据的最大值减去最小值;

select 

    classid,

    max(english) - min(english) as eng_difference 

from

   (select 

       english,

       classid 

    from

        (select 

           english,

           classid,

           row_number() over (partition by classid order by english desc) rank

         from class) temp1

where rank < 3) temp2

group by classid;

--结果

classid eng_difference

1       19

2       10

3       2

如果求所有的分值差,如下:

select 

    classid,

    max(english) - min(english) as eng_difference,

    max(math) - min(math) as math_difference,

    max(chinese) - min(chinese) as china_difference

from

   (select 

       english,

       math,

       chinese,

       classid 

    from

        (select 

           english,

           math,

           chinese,

           classid,

           row_number() over (partition by classid order by english desc) rank

         from class) temp1

where rank < 3) temp2

group by classid;

 

--结果输出

classid eng_difference  math_difference china_difference

1       19      11      20

2       10      3       16

3       2       12      30

hive中一般取top n时,row_number(),rank,dense_rank()这三个函数就派上用场了,

先简单说下这三函数都是排名的,不过呢还有点细微的区别。

通过代码运行结果一看就明白了。

除Row_number外还有rank,dense_rank 
以下是语法: 
rank() over([partition by col1] order by col2) 
dense_rank() over([partition by col1] order by col2) 
row_number() over([partition by col1] order by col2)

功能差不多,但是有细微的差别 

row_number的排序不允许并列,即使两条记录的值相等也不会出现相等的排序值 
rank排序时出现相等的值时会有并列,即值相等的两条数据会有相同的序列值 
dense_rank排序的值允许并列,但会跳跃的排序,像这样:1,1,3,4,5,5,7.

select empid,

deptid,

salary,

rank()over( order by deptid desc ) rank,

dense_rank() over( order by deptid desc ) dense_rank,

row_number()over( order by deptid desc) row_number

from employee

rank() 排序相同时会重复,总数不会变  

dense_rank()排序相同时会重复,总数会减少

row_number() 会根据顺序计算

正好听到一个需求,求sal前50%的人,

基于row_number函数也很容易实现分页:

SELECT
    *
FROM
    (
        SELECT
            row_number() over(ORDER BY empid DESC) AS rnum ,
            employee.*
        FROM
            employee
    ) t
WHERE
    rnum >= 1
AND rnum <= 5;


image.png

select a.date,count(a.user_id)as '每天活跃用户数',count(b.user_id) as'活跃30天留存用户数' 

from (select User_id,date from Activate) a

left join (select User_id,date from Activate) b

on a.User_id=b.User_id

and date_add(a.date,interval 1 day)<=b.date 

and b.date<=date_add(a.date,interval 29 day)

group by 1



image.png

--------求结束一场游戏后,平均多久时间内会开始下场游戏?如果司机只玩过一次游戏,就不计算该

select a.drier_id,avg(timestampdiff(minute,a.end_time,b.start_time))as '平均游戏间隔'

from

(select drier_id,start_time,end_time

,rank() over (partition by drier_id order by start_time) as rank1

from game) a

left join 

(select drier_id,start_time,end_time

,rank() over (partition by drier_id order by start_time) as rank2

from game) b

on a.drier_id=b.drier_id

where a.rank1+1=b.rank2

group by 1


6.连续登录天数

题目:数据表table1中记录用户登录日志,包含两列,uid和login_time,需要找出连续登陆3天的用户

样例数据如下(提示:可用窗口函数)

uid,login_time

1,2020-01-01 00:00:00

1,2020-01-02 01:23:45

1,2020-01-01 00:00:00

2,2020-01-01 02:01:00

2,2020-01-01 00:00:00

2,2020-01-01 00:00:00

3,2020-01-01 04:05:00

3,2020-01-02 00:00:00

3,2020-01-03 16:26:00

4,2020-01-01 00:00:00

4,2020-01-02 23:04:34

4,2020-01-04 00:00:00

4,2020-01-05 00:00:00


答案:

--1、连续3天登录SQL

select

   uid as `用户数`,

   date_sub(t2.login_time,t2.rank) as flag_dt, 

   count(1) as num --联系登录数

from

(

  select

     t1.uid as uid,

     t1.login_time as login_time,

     row_number()over(partition by t1.uid order by t1.login_time) as rank

  from

  (

     select

        distinct

        uid,

        to_date(login_time) as login_time

     from table1

  )t1

)t2

group by uid, date_sub(t2.login_time,t2.rn)

having count(1) =3

--2、代码注释:

1、第一步:取每天用户登录去重distinct,保证只有一条记录每天;

2、第二步:排序row_number,每个用户登录uid、dt降序打标签;

3、第三步:取用户登录天数,count(1) num=3,取登录3次的;

4、第四步:date_sub(t2.login_time,t2.rank),用户登录时间减去排序,应该是同一天;

5、第五步:取数连续3天登录用户记录


select user_id,p_day_new,count(1)

from (

    select user_id,p_day,rn,format_datetime(date_add('day',-rn,from_iso8601_timestamp(p_day)),'yyyy-MM-dd') as p_day_new

    from (

        select user_id,p_day,row_number() over(partition by user_id order by p_day) as rn

        from (

            select p_day,user_id

            from ug.fact_activity_user_event_detail

            group by p_day,user_id

        ) a

    ) b

) c 

group by user_id,p_day_new

having count(1)>=3


8.查询连续登录天数


第一步:使用ROW_NUMBER() 窗口函数 按UID分组,按date1升序排

-- 第一段首先根据用户分组,登陆时间排序,结果按照登陆时间升序排列

    SELECT 

    UID,

    date1,

    row_number() OVER(PARTITION BY UID order by date1) as sort

    FROM user_login



第二步:判断是否连续:

(升序排序后  : 用date1-sort  如果连续则做差后值一样,然后再用UID,login_group分组算出每个UID下连续的天数)

SELECT 

UID,

date_sub(date1,sort) as login_group,

min(date1) as start_date1,

max(date1) as end_date1,

count(1) as continuous_days

FROM (

    -- 第一段首先根据用户分组,登陆时间排序,结果按照登陆时间升序排列

    SELECT 

    UID,

    date1,

    row_number() OVER(PARTITION BY UID order by date1) as sort

    FROM user_login

) a

GROUP BY UID,date_sub(date1,sort)


第三步:再以UID分组,去取max(continuous_days)

SELECT 

UID,

max(continuous_days) as maxday

FROM 

(

SELECT 

UID,

date_sub(date1,sort) as login_group,

min(date1) as start_date1,

max(date1) as end_date1,

count(1) as continuous_days

FROM (

    -- 第一段首先根据用户分组,登陆时间排序,结果按照登陆时间升序排列

    SELECT 

    UID,

    date1,

    row_number() OVER(PARTITION BY UID order by date1) as sort

    FROM user_login

) a

GROUP BY UID,date_sub(date1,sort)

)b

GROUP BY UID



表:create table user_mission_finish_table

(

     userid string comment '用户id',

     missionid string comment '任务id',

     finish_dt string comment '完成日期'

)

注意:只有用户mission完成了,才会有记录,如:2020-08-01日,用户没有做mission,表中无该用户2020-08-01日的记录

连续完课定义: 指相邻2天都完成了课程任务,如:2020-08-18日完成任务1,2020-08-19日完成任务3,则用户是一个有连续完课记录的用户

问题:

1.有连续学习行为的用户数

2.计算用户历史以来最大的连续学习天数,并按倒序输出,输出字段:用户id,最大连续学习天数数

2.

----1、有连续学习行为用户数

select保存草稿

   count(distinct t3.uid) as `有连续学习行为用户数`

from

(

 select

   uid,

   date_sub(t2.finish_dt,t2.rn) as flag_dt,

   count(1) as num --连续登录数

 from

 (

   select

     t1.uid as uid,

     t1.finish_dt as login_time,

     row_number()over(partition by t1.uid order by t1.finish_dt) as rank

   from

   (

     select

        distinct --用户当天完成数据去重

        userid as uid,

        to_date(finish_dt) as finish_dt --完成时间

     from user_mission_finish_table

   )t1

  )t2

  group by uid,date_sub(t2.login_time,t2.rn)

  having count(1) >=2 --连续两天及其以上学习记录

)t3;

----2、有连续学习行为用户,输出最大连续学习天数

select

   uid as `连续最大学习用户数`,

   max(t3.num) as `连续最大学习天数`

from

(

 select

   uid,

   date_sub(t2.finish_dt,t2.rn) as flag_dt,

   count(1) as num --连续登录数

 from

 (

   select

     t1.uid as uid,

     t1.finish_dt as login_time,

     row_number()over(partition by t1.uid order by t1.finish_dt) as rank

   from

   (

     select

        distinct --用户当天完成数据去重

        userid as uid,

        to_date(finish_dt) as finish_dt --完成时间

     from user_mission_finish_table

   ) t1

) t2

 group by uid, date_sub(t2.login_time,t2.rn)

 having count(1) >=2 --连续两天及其以上学习记录

)t3

group by t3.uid;

-----3.

 问题:user_id  month【order_time】 amt

1  20170101 100

3  20170101 20

4  20170101 30

1  20170102 200

2  20170102 240

3  20170102 30

4  20170102 2

1  20170101 180

2  20170101 250

3  20170101 30

4  20170101 260

......

[Hive SQL/MaxCompute SQL]统计amt连续3个月,环比增长>50%的user

select

   distinct

   t1.user_id,

   if((t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5,'连续3月环比超50%',null) as `用户标签`

from

(

  select

    month(order_time) as month,

    user_id,

    sum(amt) as amt

  from user_order_whole --假定该表为订单全量表

  where dt ='20171231

     and to_date(order_time) between '20170101' and '20171231'

  group by month(order_time),user_id

)t1

left join

(

  select

    month(order_time) as month,

    user_id,

    sum(amt) as amt

  from user_order_whole --假定该表为订单全量表

  where dt ='20171231

     and to_date(order_time) between '20170101' and '20171231'

  group by month(order_time),user_id

)t2

on t1.user_id=t2.user_id

(

  select

    month(order_time) as month,

    user_id,

    sum(amt) as amt

  from user_order_whole --假定该表为订单全量表

  where dt ='20171231

     and to_date(order_time) between '20170101' and '20171231'

  group by month(order_time),user_id

)t3

on t1.user_id=t3.user_id

where (t2.month-t1.month)=1 and (t3.month-t1.month)=2 

having (t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5

select

   distinct

   t1.user_id,

   if((t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5,'连续3月环比超50%',null) as `用户标签`

from

(

  select

    month(order_time) as month,

    user_id,

    sum(amt) as amt

  from user_order_whole --假定该表为订单全量表

  where dt ='20171231'

     and to_date(order_time) between '20170101' and '20171231'

  group by month(order_time),user_id

)t1

left join

(

  select

    month(order_time) as month,

    user_id,

    sum(amt) as amt

  from user_order_whole --假定该表为订单全量表

  where dt ='20171231'

     and to_date(order_time) between '20170101' and '20171231'

  group by month(order_time),user_id

)t2

on t1.user_id=t2.user_id

left join

(

  select

    month(order_time) as month,

    user_id,

    sum(amt) as amt

  from user_order_whole --假定该表为订单全量表

  where dt ='20171231'

     and to_date(order_time) between '20170101' and '20171231'

  group by month(order_time),user_id

)t3

on t1.user_id=t3.user_id

where (t2.month-t1.month)=1 and (t3.month-t1.month)=2 

having (t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5


6.连续几天内有服务行为的用户

#前7天里面有4天服务的用户

SELECT driver_id from s_order_base_info_a_holo 

where bi_cate_id=3 

and (

(unload_time>date_trunc('day', now()) - interval '7d' and unload_time<date_trunc('day',now())) 

or (service_time>date_trunc('day', now()) - interval '7d' and service_time<date_trunc('day',now()) and bi_state=8 ) ) 

group by driver_id having count(distinct to_char(unload_time,'yyyy-mm-dd'))=4

推荐 0
本文由 DDlucky 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

0 个评论

要回复文章请先登录注册