Group by to create a vertical coalesce

2019-08-10 22:36发布

问题:

I have this query.

select transaction, bbk, sbk, obk, ibk
from
(
  select 
    transaction, 
    case when "'BBK'" = 1 then country end bbk,
    case when "'SBK'" = 1 then country end sbk,
    case when "'OBK'" = 1 then country end obk,
    case when "'IBK'" = 1 then country end ibk  
  from (
    select 
    regexp_substr("col_a", '[^~]+', 1, 1) as transaction,
    regexp_substr("col_a", '[^~]+', 1, 2) as code,
    regexp_substr("col_a", '[^~]+', 1, 3) as country
    from Table1 t)
  pivot 
  ( 
    --case when count(code) = 1 then 
    count(code)
    for code in ('BBK','SBK','OBK','IBK')
  )  
)
group by transaction, bbk, sbk, obk, ibk
order by transaction

The results, as you can see in this fiddle

00004719    US  US  (null)  (null)
00004719    (null)  (null)  GB  (null)
00004719    (null)  (null)  (null)  DE

Show multiple lines per transaction. I would like to alter the query so that only 1 line per transaction occurs.

Essentially coalescing vertically the nulls in the other records to achieve:

00004719    US  US  GB  DE

How might this be done?

回答1:

That is exactly what pivot is for:

select 
  transaction, 
  "'BBK'",
  "'SBK'",
  "'OBK'",
  "'IBK'"
from (
  select 
  regexp_substr("col_a", '[^~]+', 1, 1) as transaction,
  regexp_substr("col_a", '[^~]+', 1, 2) as code,
  regexp_substr("col_a", '[^~]+', 1, 3) as country
  from Table1 t)
pivot 
( 
  MAX(country) for code in ('BBK','SBK','OBK','IBK')
);


回答2:

You can start with the results of your query and group over the transaction column again:

with x as (
select transaction, bbk, sbk, obk, ibk
from
(
      select 
        transaction, 
        case when "'BBK'" = 1 then country end bbk,
        case when "'SBK'" = 1 then country end sbk,
        case when "'OBK'" = 1 then country end obk,
        case when "'IBK'" = 1 then country end ibk  
      from (
        select 
        regexp_substr("col_a", '[^~]+', 1, 1) as transaction,
        regexp_substr("col_a", '[^~]+', 1, 2) as code,
        regexp_substr("col_a", '[^~]+', 1, 3) as country
        from Table1 t)
      pivot 
      ( 
        --case when count(code) = 1 then 
        count(code)
        for code in ('BBK','SBK','OBK','IBK')
      )  
  )
group by transaction, bbk, sbk, obk, ibk
) select
transaction, max(bbk), max(sbk), max(obk), max(ibk)
from x
group by transaction
order by transaction;

Example SQLFiddle

Edit:

or equivalently, just group by transaction in the original query:

select 
  transaction, 
  max(case when "'BBK'" = 1 then country end) bbk,
  max(case when "'SBK'" = 1 then country end) sbk,
  max(case when "'OBK'" = 1 then country end) obk,
  max(case when "'IBK'" = 1 then country end) ibk  
from (
  select 
  regexp_substr("col_a", '[^~]+', 1, 1) as transaction,
  regexp_substr("col_a", '[^~]+', 1, 2) as code,
  regexp_substr("col_a", '[^~]+', 1, 3) as country
  from Table1 t)
pivot ( 
  count(code)
  for code in ('BBK','SBK','OBK','IBK')
)  
group by 
  transaction
order by 
  transaction;

Example SQLFiddle