提问人:Hack-R 提问时间:3/15/2016 更新时间:7/4/2017 访问量:16657
Hive SQL:在 JOIN 中遇到的左别名和右别名
Hive SQL: Both left and right aliases encountered in JOIN
问:
我有这个有效的 T-SQL 查询:
select t1.*
,case when s1.period is not null then 'Y' else 'N' end as flag_cur
,case when s2.period is not null then 'Y' else 'N' end as flag_prev
,s1.cutoff_date as cutoff_date_cur ,s1.cutoff_dtkey as cutoff_dtkey_cur
,s2.cutoff_date as cutoff_date_prev ,s2.cutoff_dtkey as cutoff_dtkey_prev
into #tmp_leads2
from #tmp_leads t1
left join #param s1 on s1.period = '(a) Current' and s1.begin_date <= t1.CreatedDate and t1.CreatedDate < s1.end_date
left join #param s2 on s2.period = '(b) Previous' and s2.begin_date <= t1.CreatedDate and t1.CreatedDate < s2.end_date
我试图将其重写为Hive(v0.13):
create table tmp_leads2 as
select t1.*
,case when s1.period is not null then 'Y' else 'N' end as flag_cur
,case when s2.period is not null then 'Y' else 'N' end as flag_prev
,s1.cutoff_date as cutoff_date_cur ,s1.cutoff_dtkey as cutoff_dtkey_cur
,s2.cutoff_date as cutoff_date_prev ,s2.cutoff_dtkey as cutoff_dtkey_prev
from tmp_leads t1
left join param s1 on s1.period = '(a) Current' and s1.begin_date <= t1.CreatedDate and t1.CreatedDate < s1.end_date
left join param s2 on s2.period = '(b) Previous' and s2.begin_date <= t1.CreatedDate and t1.CreatedDate < s2.end_date ;
但是我收到错误:
Error occurred executing hive query: OK FAILED: SemanticException [Error 10017]: Line 8:53 Both left and right aliases encountered in JOIN 'CreatedDate'
我看到了它正在谈论的字段,但我不确定如何在保持查询结果相同的同时重写它。
答:
4赞
Gordon Linoff
3/15/2016
#1
问题来自s中的不平等条件。这带来了一个问题。以下内容可能足以满足您的目的:join
create table tmp_leads2 as
select t1.*,
(case when s1.period is not null then 'Y' else 'N' end) as flag_cur,
(case when s2.period is not null then 'Y' else 'N' end) as flag_prev,
s1.cutoff_date as cutoff_date_cur, s1.cutoff_dtkey as cutoff_dtkey_cur ,
s2.cutoff_date as cutoff_date_prev, s2.cutoff_dtkey as cutoff_dtkey_prev
from tmp_leads t1 left join
param s1
on s1.period = '(a) Current' left join
param s2
on s2.period = '(b) Previous'
where (s1.begin_date is null or s1.begin_date <= t1.CreatedDate and t1.CreatedDate < s1.end_date) or
(s2.begin_date is null or s2.begin_date <= t1.CreatedDate and t1.CreatedDate < s2.end_date);
这并不完全等同。它假设如果某个参数在表中,则该参数在所有日期都位于表中。这可能是一个合理的假设。如果没有,则需要更复杂的查询。
1赞
Sukhpreet
7/4/2017
#2
这是不会导致内部连接或别名问题并在 Hive 中为您提供预期结果的内容
create table tmp_leads2 as
select final.*
,case when s1period is not null then 'Y' else 'N' end as flag_cur
,case when s2period is not null then 'Y' else 'N' end as flag_prev
from
(select t1.*,
max(case when s1.begin_date <= t1.CreatedDate and t1.CreatedDate < s1.end_date then s1.peroid else null end) as s1period,
max(case when s1.begin_date <= t1.CreatedDate and t1.CreatedDate < s1.end_date then s1.cutoff_date else null end) as cutoff_date_cur,
max(case when s1.begin_date <= t1.CreatedDate and t1.CreatedDate < s1.end_date then s1.cutoff_dtkey else null end) as cutoff_dtkey_cur,
max(case when s2.begin_date <= t1.CreatedDate and t1.CreatedDate < s2.end_date then s2.peroid else null end) as s2period,
max(case when s2.begin_date <= t1.CreatedDate and t1.CreatedDate < s2.end_date then s2.cutoff_date else null end) as cutoff_date_prev,
max(case when s2.begin_date <= t1.CreatedDate and t1.CreatedDate < s2.end_date then s2.cutoff_dtkey else null end) as cutoff_dtkey_prev,
from tmp_leads t1
left join param s1 on s1.period = '(a) Current'
left join param s2 on s2.period = '(b) Previous'
group by t1.* /* type all column names required from t1*/
) final ;
评论