共计 2436 个字符,预计需要花费 7 分钟才能阅读完成。
间接上干货,HiveSQL 高级进阶技巧,重要性显而易见。把握这 10 个技巧,你的 SQL 程度将有一个质的晋升,达到一个较高的档次!
1. 删除:
insert overwrite tmp
select * from tmp where id != ‘666’;
复制代码
2. 更新:
间接上干货,HiveSQL 高级进阶技巧,重要性显而易见。把握这 10 个技巧,你的 SQL 程度将有一个质的晋升,达到一个较高的档次!
insert overwrite tmp
select id,label,
if(id = '1' and label = 'grade','25',value) as value
from tmp where id != ‘666’;
复制代码
3. 行转列:
— Step03:最初将 info 的内容切分
select id,split(info,’:’)[0] as label,split(info,’:’)[1] as value
from
(
— Step01:先将数据拼接成“heit:180,weit:60,age:26”
select id,concat('heit',':',height,',','weit',':',weight,',','age',':',age) as value | |
from tmp |
) as tmp
— Step02:而后在借用 explode 函数将数据收缩至多行
lateral view explode(split(value,’,’)) mytable as info;
复制代码
4. 列转行 1:
select
tmp1.id as id,tmp1.value as height,tmp2.value as weight,tmp3.value as age
from
(select id,label,value from tmp2 where label = ‘heit’) as tmp1
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = ‘weit’) as tmp2
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = ‘age’) as tmp3
on tmp1.id = tmp3.id;
复制代码
5. 列转行 2:
select
id,tmpmap[‘height’] as height,tmpmap[‘weight’] as weight,tmpmap[‘age’] as age
from
(
select id, | |
str_to_map(concat_ws(',',collect_set(concat(label,':',value))),',',':') as tmpmap | |
from tmp2 group by id |
) as tmp1;
复制代码
6. 剖析函数 1:
select id,label,value,
lead(value,1,0)over(partition by id order by label) as lead, | |
lag(value,1,999)over(partition by id order by label) as lag, | |
first_value(value)over(partition by id order by label) as first_value, | |
last_value(value)over(partition by id order by label) as last_value |
from tmp;
复制代码
7. 剖析函数 2:
select id,label,value,
row_number()over(partition by id order by value) as row_number, | |
rank()over(partition by id order by value) as rank, | |
dense_rank()over(partition by id order by value) as dense_rank |
from tmp;
复制代码
8. 多维分析 1:
select col1,col2,col3,count(1),
Grouping__ID
from tmp
group by col1,col2,col3
grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),())
复制代码
9. 多维分析 2:
select col1,col2,col3,count(1),
Grouping__ID
from tmp
group by col1,col2,col3
with cube;
复制代码
10. 数据歪斜 groupby:
select label,sum(cnt) as all from
(
select rd,label,sum(1) as cnt from | |
(select id,round(rand(),2) as rd,value from tmp1 | |
) as tmp | |
group by rd,label |
) as tmp
group by label;
复制代码
11. 数据歪斜 join:
select label,sum(value) as all from
(
select rd,label,sum(value) as cnt from | |
( | |
select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value | |
from | |
(select id,round(rand(),1) as rd,label,value from tmp1 | |
) as tmp1 | |
join | |
( | |
select id,rd,label,value from tmp2 | |
lateral view explode(split('0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9',',')) mytable as rd | |
) as tmp2 | |
on tmp1.rd = tmp2.rd and tmp1.label = tmp2.label | |
) as tmp1 | |
group by rd,label |
) as tmp1
group by label;
关键词:大数据培训