【SQL】取上一条不为空的数据

浏览: 2105

需求

获取同一月内上一条不为空的数据(按照序号排序),如下图

image.png

样例数据

with chenxu(adate,no_1,cnt) as
(
select 201801,1,1 from dual union all
select 201801,2,null from dual union all
select 201801,3,null from dual union all
select 201801,4,null from dual union all
select 201801,5,null from dual union all
select 201801,6,3 from dual union all
select 201801,7,null from dual union all
select 201801,8,null from dual union all
select 201801,9,2 from dual union all
select 201801,10,null from dual union all
select 201801,11,null from dual union all
select 201801,12,null from dual union all
select 201802,1,19 from dual union all
select 201802,2,null from dual union all
select 201802,3,null from dual union all
select 201802,4,20 from dual union all
select 201802,5,23 from dual

)
select * from chenxu

方案一

如果在Oracle 11gR2版本下可以直接使用lag over函数中的ignore nulls

with chenxu(adate,no_1,cnt) as
(
select 201801,1,1 from dual union all
select 201801,2,null from dual union all
select 201801,3,null from dual union all
select 201801,4,null from dual union all
select 201801,5,null from dual union all
select 201801,6,3 from dual union all
select 201801,7,null from dual union all
select 201801,8,null from dual union all
select 201801,9,2 from dual union all
select 201801,10,null from dual union all
select 201801,11,null from dual union all
select 201801,12,null from dual union all
select 201802,1,19 from dual union all
select 201802,2,null from dual union all
select 201802,3,null from dual union all
select 201802,4,20 from dual union all
select 201802,5,23 from dual

)
select t.*,nvl(lag(cnt ignore nulls) over(partition by adate order by no_1),cnt) lag_cnt
from chenxu t


但是像PG、GP、Hive等数据库,他们的分析函数里并没有集成进来ignore nulls的功能,则可以多套两层来解决

方案二

不等值关联,再分组筛选得出(会发散)

with chenxu(adate,no_1,cnt) as
(
select 201801,1,1 from dual union all
select 201801,2,null from dual union all
select 201801,3,null from dual union all
select 201801,4,null from dual union all
select 201801,5,null from dual union all
select 201801,6,3 from dual union all
select 201801,7,null from dual union all
select 201801,8,null from dual union all
select 201801,9,2 from dual union all
select 201801,10,null from dual union all
select 201801,11,null from dual union all
select 201801,12,null from dual union all
select 201802,1,19 from dual union all
select 201802,2,null from dual union all
select 201802,3,null from dual union all
select 201802,4,20 from dual union all
select 201802,5,23 from dual

)
,tmp as (
select t.*,case when t.cnt is null then t2.cnt else t.cnt end as lag_cnt, t2.no_1 t2_no

from chenxu t,
(select * from chenxu where cnt is not null) t2
where t.adate = t2.adate
and t.no_1 >= t2.no_1

)
select adate,no_1,cnt,nvl(lag(lag_cnt) over(partition by adate order by no_1) ,cnt)
from (
select adate,no_1,cnt,lag_cnt from (
select t.*,row_number() over(partition by adate,no_1,cnt order by t2_no desc) rn from tmp t
)
where rn = 1
);

方案三

标定范围,关联得出结果(不会发散)

with chenxu(adate,no_1,cnt) as
(
select 201801,1,1 from dual union all
select 201801,2,null from dual union all
select 201801,3,null from dual union all
select 201801,4,null from dual union all
select 201801,5,null from dual union all
select 201801,6,3 from dual union all
select 201801,7,null from dual union all
select 201801,8,null from dual union all
select 201801,9,2 from dual union all
select 201801,10,null from dual union all
select 201801,11,null from dual union all
select 201801,12,null from dual union all
select 201802,1,19 from dual union all
select 201802,2,null from dual union all
select 201802,3,null from dual union all
select 201802,4,20 from dual union all
select 201802,5,23 from dual 
)
,tmp as (
select a.adate,a.cnt ,no_1,
 lead(a.no_1) over(partition by adate order by no_1) ld_no,max_no,lead_no
        from (select t.*,max(no_1) over(partition by adate order by 1) as max_no,--获取当组最大,为当组最后一条空处理做准备
                     lead(no_1) over(partition by adate order by no_1) as lead_no --获取当组下一个
               from chenxu t) a 
       where a.cnt is not null --过滤空行

select t.*,a.cnt as lag_ignore_null
        from chenxu t 
        left join tmp a on a.adate = t.adate 
         and t.no_1 >= a.lead_no
         and t.no_1 <= nvl(a.ld_no,a.max_no)
order by t.adate,t.no_1
       ;

方案四

若是在Hive中,可使用last_value+true实现

根据测试发现last_value和first_value不一样,last_value的默认窗口范围为 range between unbounded preceding and current row (上无边界 到 当前行)

LAST_VALUE

  • This takes at most two parameters. The first parameter is the column for which you want the last value, the second (optional) parameter must be a boolean which is false by default. If set to true it skips null values.

https://cwiki.apache.org/confluence/display/Hive/LanguageManual+WindowingAndAnalytics

根据官网提示可得

with chenxu  as
(
select 201801 adate,1 no_1,1 cnt union all
select 201801,2,null union all
select 201801,3,null union all
select 201801,4,null union all
select 201801,5,null union all
select 201801,6,3 union all
select 201801,7,null union all
select 201801,8,null union all
select 201801,9,2 union all
select 201801,10,null union all
select 201801,11,null union all
select 201801,12,null union all
select 201802,1,19 union all
select 201802,2,null union all
select 201802,3,null union all
select 201802,4,20 union all
select 201802,5,23 
)
select *,
last_value(Cnt,true) over(partition by adate order by no_1) as lv
from chenxu

image.png

推荐 2
本文由 老头子 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

1 个评论

这个思路真心可以,感觉用起来很顺

要回复文章请先登录注册