关于hive:HiveSQL高级进阶10大技巧

间接上干货，HiveSQL高级进阶技巧，重要性显而易见。把握这10个技巧，你的SQL程度将有一个质的晋升，达到一个较高的档次！
1.删除：
insert overwrite tmp
select * from tmp where id != ‘666’;
复制代码
2.更新：
间接上干货，HiveSQL高级进阶技巧，重要性显而易见。把握这10个技巧，你的SQL程度将有一个质的晋升，达到一个较高的档次！
insert overwrite tmp
select id,label,

   if(id = '1' and label = 'grade','25',value) as value

from tmp where id != ‘666’;
复制代码
3.行转列：
— Step03：最初将info的内容切分
select id,split(info,’:’)[0] as label,split(info,’:’)[1] as value
from
(
— Step01：先将数据拼接成“heit:180,weit:60,age:26”

select id,concat('heit',':',height,',','weit',':',weight,',','age',':',age) as value 
from tmp

) as tmp
— Step02：而后在借用explode函数将数据收缩至多行
lateral view explode(split(value,’,’)) mytable as info;
复制代码
4.列转行1：
select
tmp1.id as id,tmp1.value as height,tmp2.value as weight,tmp3.value as age
from
(select id,label,value from tmp2 where label = ‘heit’) as tmp1
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = ‘weit’) as tmp2
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = ‘age’) as tmp3
on tmp1.id = tmp3.id;
复制代码
5.列转行2：
select
id,tmpmap[‘height’] as height,tmpmap[‘weight’] as weight,tmpmap[‘age’] as age
from
(

select id,
       str_to_map(concat_ws(',',collect_set(concat(label,':',value))),',',':') as tmpmap  
from tmp2 group by id

) as tmp1;
复制代码
6.剖析函数1：
select id,label,value,

   lead(value,1,0)over(partition by id order by label) as lead,
   lag(value,1,999)over(partition by id order by label) as lag,
   first_value(value)over(partition by id order by label) as first_value,
   last_value(value)over(partition by id order by label) as last_value

from tmp;
复制代码
7.剖析函数2：

select id,label,value,

   row_number()over(partition by id order by value) as row_number,
   rank()over(partition by id order by value) as rank,
   dense_rank()over(partition by id order by value) as dense_rank

from tmp;
复制代码
8.多维分析1：
select col1,col2,col3,count(1),

   Grouping__ID

from tmp
group by col1,col2,col3
grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),())
复制代码
9.多维分析2：
select col1,col2,col3,count(1),

   Grouping__ID

from tmp
group by col1,col2,col3
with cube;
复制代码
10.数据歪斜groupby：
select label,sum(cnt) as all from
(

select rd,label,sum(1) as cnt from 
(
    select id,round(rand(),2) as rd,value from tmp1
) as tmp
group by rd,label

) as tmp
group by label;
复制代码
11.数据歪斜join：
select label,sum(value) as all from
(

select rd,label,sum(value) as cnt from
(
    select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value 
    from 
    (
        select id,round(rand(),1) as rd,label,value from tmp1
    ) as tmp1
    join
    (
        select id,rd,label,value from tmp2
        lateral view explode(split('0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9',',')) mytable as rd
    ) as tmp2
    on tmp1.rd = tmp2.rd and tmp1.label = tmp2.label
) as tmp1
group by rd,label

) as tmp1
group by label;

关键词：大数据培训

关于hive:HiveSQL高级进阶10大技巧

评论

发表回复取消回复

更多文章

DDN HPC 存储硬件架构设计深度分析

探秘IO500：从Lustre并行文件系统出发，开启HPC存储性能新征程

苹果iOS打包的ipa应用无法安装？一篇文章带你了解可能的原因及排查方法

图解Golang：从零开始实现简易版过期LRU缓存

关于hive:HiveSQL高级进阶10大技巧

评论

发表回复 取消回复

更多文章

DDN HPC 存储硬件架构设计深度分析

探秘IO500：从Lustre并行文件系统出发，开启HPC存储性能新征程

苹果iOS打包的ipa应用无法安装？一篇文章带你了解可能的原因及排查方法

图解Golang：从零开始实现简易版过期LRU缓存

发表回复取消回复