SQL面试题目

发表: 2020-08-15 浏览: 1236

SQL

1.写出分析代码

答案：

select

case when a.order_num<=5 then '0-5'

when a.order_num>=6 and a.order_num<=10 then '6-10'

when a.order_num>=11 and a.order_num<=20 then '11-20'

when a.order_num>20 '20以上' end 单量

,count(客户id)

from

(select

客户id

,count(运单号) order_num

from table

where create_time between '2020-05-01' and '2020-05-31'

group by 客户id

) a

group by case when a.order_num<=5 then '0-5'

when a.order_num>=6 and a.order_num<=10 then '6-10'

when a.order_num>=11 and a.order_num<=20 then '11-20'

when a.order_num>20 '20以上' end

2.考察窗口函数的用法

select *

from

(select *

,row_number() over(partition by 渠道 order by 授信时间 desc) rn

from table

) a

where rn=1

3.一张成绩表class有如下字段，班级ID，英语成绩，数学成绩，语文成绩

id    english   math chinese classid
1       78 89 95 2
2       68 86 79 2
3       59 98 78 1
4       68 96 86 3
5       58 87 68 3
6       97 87 68 3
7       97 87 76 1
8       67 98 97 2
9       78 76 96 1
10      99 99 98 3

求：每个班级英语成绩最高的前两名的分值差为多少？

第一步，首先分组取top2的数据；

select

english，

classid

from

(select

english,

classid，

row_number() over (partition by classid order by english desc) rank

from class) temp1

where rank < 3;

--结果输出

english classid

97 1

78 1

78 2

68 2

99 3

97 3

第二步，将每个班级分组数据的最大值减去最小值；

select

classid,

max(english) - min(english) as eng_difference

from

(select

english，

classid

from

(select

english,

classid，

row_number() over (partition by classid order by english desc) rank

from class) temp1

where rank < 3) temp2

group by classid;

--结果

classid eng_difference

1 19

2 10

3 2

如果求所有的分值差，如下：

select

classid,

max(english) - min(english) as eng_difference,

max(math) - min(math) as math_difference,

max(chinese) - min(chinese) as china_difference

from

(select

english，

math,

chinese,

classid

from

(select

english,

math,

chinese,

classid，

row_number() over (partition by classid order by english desc) rank

from class) temp1

where rank < 3) temp2

group by classid;

--结果输出

classid eng_difference math_difference china_difference

1 19 11 20

2 10 3 16

3 2 12 30

hive中一般取top n时，row_number(),rank,dense_rank()这三个函数就派上用场了，

先简单说下这三函数都是排名的，不过呢还有点细微的区别。

通过代码运行结果一看就明白了。

除Row_number外还有rank,dense_rank
以下是语法：
rank() over([partition by col1] order by col2)
dense_rank() over([partition by col1] order by col2)
row_number() over([partition by col1] order by col2)

功能差不多，但是有细微的差别

row_number的排序不允许并列，即使两条记录的值相等也不会出现相等的排序值
rank排序时出现相等的值时会有并列，即值相等的两条数据会有相同的序列值
dense_rank排序的值允许并列，但会跳跃的排序，像这样：1,1,3,4,5,5,7.

select empid,

deptid,

salary,

rank()over( order by deptid desc ) rank,

dense_rank() over( order by deptid desc ) dense_rank,

row_number()over( order by deptid desc) row_number

from employee

rank() 排序相同时会重复，总数不会变

dense_rank()排序相同时会重复，总数会减少

row_number() 会根据顺序计算

正好听到一个需求，求sal前50%的人,

基于row_number函数也很容易实现分页：

SELECT
   *
FROM
   (
       SELECT
           row_number() over(ORDER BY empid DESC) AS rnum ,
           employee.*
       FROM
           employee
   ) t
WHERE
   rnum >= 1
AND rnum <= 5;

select a.date,count(a.user_id)as '每天活跃用户数',count(b.user_id) as'活跃30天留存用户数'

from (select User_id,date from Activate) a

left join (select User_id,date from Activate) b

on a.User_id=b.User_id

and date_add(a.date,interval 1 day)<=b.date

and b.date<=date_add(a.date,interval 29 day)

group by 1

--------求结束一场游戏后，平均多久时间内会开始下场游戏？如果司机只玩过一次游戏，就不计算该

select a.drier_id,avg(timestampdiff(minute,a.end_time,b.start_time))as '平均游戏间隔'

from

(select drier_id,start_time,end_time

,rank() over (partition by drier_id order by start_time) as rank1

from game) a

left join

(select drier_id,start_time,end_time

,rank() over (partition by drier_id order by start_time) as rank2

from game) b

on a.drier_id=b.drier_id

where a.rank1+1=b.rank2

group by 1

6.连续登录天数

题目：数据表table1中记录用户登录日志，包含两列，uid和login_time，需要找出连续登陆3天的用户

样例数据如下(提示：可用窗口函数)

uid,login_time

1,2020-01-01 00:00:00

1,2020-01-02 01:23:45

1,2020-01-01 00:00:00

2,2020-01-01 02:01:00

2,2020-01-01 00:00:00

3,2020-01-01 04:05:00

3,2020-01-02 00:00:00

3,2020-01-03 16:26:00

4,2020-01-01 00:00:00

4,2020-01-02 23:04:34

4,2020-01-04 00:00:00

4,2020-01-05 00:00:00

答案：

--1、连续3天登录SQL

select

uid as `用户数`,

date_sub(t2.login_time,t2.rank) as flag_dt,

count(1) as num --联系登录数

from

(

select

t1.uid as uid,

t1.login_time as login_time,

row_number()over(partition by t1.uid order by t1.login_time) as rank

from

(

select

distinct

uid,

to_date(login_time) as login_time

from table1

)t1

)t2

group by uid, date_sub(t2.login_time,t2.rn)

having count(1) =3

--2、代码注释：

1、第一步：取每天用户登录去重distinct，保证只有一条记录每天;

2、第二步：排序row_number,每个用户登录uid、dt降序打标签;

3、第三步：取用户登录天数,count(1) num=3,取登录3次的;

4、第四步：date_sub(t2.login_time,t2.rank),用户登录时间减去排序,应该是同一天;

5、第五步：取数连续3天登录用户记录

select user_id,p_day_new,count(1)

from (

select user_id,p_day,rn,format_datetime(date_add('day',-rn,from_iso8601_timestamp(p_day)),'yyyy-MM-dd') as p_day_new

from (

select user_id,p_day,row_number() over(partition by user_id order by p_day) as rn

from (

select p_day,user_id

from ug.fact_activity_user_event_detail

group by p_day,user_id

) a

) b

) c

group by user_id,p_day_new

having count(1)>=3

8.查询连续登录天数

第一步：使用ROW_NUMBER() 窗口函数按UID分组，按date1升序排

-- 第一段首先根据用户分组，登陆时间排序，结果按照登陆时间升序排列

SELECT

UID,

date1,

row_number() OVER(PARTITION BY UID order by date1) as sort

FROM user_login

第二步：判断是否连续：

(升序排序后：用date1-sort 如果连续则做差后值一样，然后再用UID，login_group分组算出每个UID下连续的天数）

SELECT

UID,

date_sub(date1,sort) as login_group,

min(date1) as start_date1,

max(date1) as end_date1,

count(1) as continuous_days

FROM (

-- 第一段首先根据用户分组，登陆时间排序，结果按照登陆时间升序排列

SELECT

UID,

date1,

row_number() OVER(PARTITION BY UID order by date1) as sort

FROM user_login

) a

GROUP BY UID,date_sub(date1,sort)

第三步：再以UID分组，去取max(continuous_days)

SELECT

UID,

max(continuous_days) as maxday

FROM

(

SELECT

UID,

date_sub(date1,sort) as login_group,

min(date1) as start_date1,

max(date1) as end_date1,

count(1) as continuous_days

FROM (

-- 第一段首先根据用户分组，登陆时间排序，结果按照登陆时间升序排列

SELECT

UID,

date1,

row_number() OVER(PARTITION BY UID order by date1) as sort

FROM user_login

) a

GROUP BY UID,date_sub(date1,sort)

)b

GROUP BY UID

表：create table user_mission_finish_table

(

userid string comment '用户id',

missionid string comment '任务id',

finish_dt string comment '完成日期'

)

注意：只有用户mission完成了，才会有记录，如:2020-08-01日，用户没有做mission，表中无该用户2020-08-01日的记录

连续完课定义：指相邻2天都完成了课程任务，如：2020-08-18日完成任务1，2020-08-19日完成任务3，则用户是一个有连续完课记录的用户

问题：

1.有连续学习行为的用户数

2.计算用户历史以来最大的连续学习天数，并按倒序输出，输出字段：用户id，最大连续学习天数数

2.

----1、有连续学习行为用户数

select保存草稿

count(distinct t3.uid) as `有连续学习行为用户数`

from

(

select

uid,

date_sub(t2.finish_dt,t2.rn) as flag_dt,

count(1) as num --连续登录数

from

(

select

t1.uid as uid,

t1.finish_dt as login_time,

row_number()over(partition by t1.uid order by t1.finish_dt) as rank

from

(

select

distinct --用户当天完成数据去重

userid as uid,

to_date(finish_dt) as finish_dt --完成时间

from user_mission_finish_table

)t1

)t2

group by uid,date_sub(t2.login_time,t2.rn)

having count(1) >=2 --连续两天及其以上学习记录

)t3;

----2、有连续学习行为用户，输出最大连续学习天数

select

uid as `连续最大学习用户数`,

max(t3.num) as `连续最大学习天数`

from

(

select

uid,

date_sub(t2.finish_dt,t2.rn) as flag_dt,

count(1) as num --连续登录数

from

(

select

t1.uid as uid,

t1.finish_dt as login_time,

row_number()over(partition by t1.uid order by t1.finish_dt) as rank

from

(

select

distinct --用户当天完成数据去重

userid as uid,

to_date(finish_dt) as finish_dt --完成时间

from user_mission_finish_table

) t1

) t2

group by uid, date_sub(t2.login_time,t2.rn)

having count(1) >=2 --连续两天及其以上学习记录

)t3

group by t3.uid;

-----3.

问题：user_id month【order_time】 amt

1 20170101 100

3 20170101 20

4 20170101 30

1 20170102 200

2 20170102 240

3 20170102 30

4 20170102 2

1 20170101 180

2 20170101 250

3 20170101 30

4 20170101 260

......

[Hive SQL/MaxCompute SQL]统计amt连续3个月，环比增长>50%的user

select

distinct

t1.user_id,

if((t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5,'连续3月环比超50%',null) as `用户标签`

from

(

select

month(order_time) as month,

user_id,

sum(amt) as amt

from user_order_whole --假定该表为订单全量表

where dt ='20171231

and to_date(order_time) between '20170101' and '20171231'

group by month(order_time),user_id

)t1

left join

(

select

month(order_time) as month,

user_id,

sum(amt) as amt

from user_order_whole --假定该表为订单全量表

where dt ='20171231

and to_date(order_time) between '20170101' and '20171231'

group by month(order_time),user_id

)t2

on t1.user_id=t2.user_id

(

select

month(order_time) as month,

user_id,

sum(amt) as amt

from user_order_whole --假定该表为订单全量表

where dt ='20171231

and to_date(order_time) between '20170101' and '20171231'

group by month(order_time),user_id

)t3

on t1.user_id=t3.user_id

where (t2.month-t1.month)=1 and (t3.month-t1.month)=2

having (t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5

select

distinct

t1.user_id,

if((t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5,'连续3月环比超50%',null) as `用户标签`

from

(

select

month(order_time) as month,

user_id,

sum(amt) as amt

from user_order_whole --假定该表为订单全量表

where dt ='20171231'

and to_date(order_time) between '20170101' and '20171231'

group by month(order_time),user_id

)t1

left join

(

select

month(order_time) as month,

user_id,

sum(amt) as amt

from user_order_whole --假定该表为订单全量表

where dt ='20171231'

and to_date(order_time) between '20170101' and '20171231'

group by month(order_time),user_id

)t2

on t1.user_id=t2.user_id

left join

(

select

month(order_time) as month,

user_id,

sum(amt) as amt

from user_order_whole --假定该表为订单全量表

where dt ='20171231'

and to_date(order_time) between '20170101' and '20171231'

group by month(order_time),user_id

)t3

on t1.user_id=t3.user_id

where (t2.month-t1.month)=1 and (t3.month-t1.month)=2

having (t3.amt-t2.amt)/t2.amt >0.5 and (t2.amt-t1.amt)/t1.amt >0.5

6.连续几天内有服务行为的用户

#前7天里面有4天服务的用户

SELECT driver_id from s_order_base_info_a_holo

where bi_cate_id=3

and (

(unload_time>date_trunc('day', now()) - interval '7d' and unload_time<date_trunc('day',now()))

or (service_time>date_trunc('day', now()) - interval '7d' and service_time<date_trunc('day',now()) and bi_state=8 ) )

group by driver_id having count(distinct to_char(unload_time,'yyyy-mm-dd'))=4

0 个评论

要回复文章请先登录或注册

SQL面试题目

hive中一般取top n时，row_number(),rank,dense_rank()这三个函数就派上用场了，

基于row_number函数也很容易实现分页：

SELECT *FROM ( SELECT row_number() over(ORDER BY empid DESC) AS rnum , employee.* FROM employee ) tWHERE rnum >= 1AND rnum <= 5;

0 个评论

SELECT
*
FROM
(
SELECT
row_number() over(ORDER BY empid DESC) AS rnum ,
employee.*
FROM
employee
) t
WHERE
rnum >= 1
AND rnum <= 5;