Alternative to Case when with subqquery to avoid e

2019-08-23 16:22发布

问题:

My table structure looks similiar to this

Customer_id Country item_type   Order_Size  Dates      Codes
A401           US   Fruit        Small       3/14/2016  11
A401           US   Fruit        Big         5/22/2016  12
A401           US   Vegetable   Small        7/12/2016  11
B509           US   Vegetable   Small        3/25/2015  92
B509           US   Vegetable   Big          3/15/2014  11
B509           US   Vegetable   Small        3/1/2014   34
A402           CA   Fruit       Small        3/14/2016  56
A402           CA   Fruit       Big          5/22/2016  76
A402           CA   Fruit       Small       7/12/2016   85
A403           CA   Vegetable   Small       7/12/2016   11
A403           CA   Vegetable   Small       3/25/2015   16
A403           CA   Vegetable   Big         3/15/2014   17
A403           CA   Vegetable   Small       3/1/2014    12

I am looking for each country how many repeated customers for each item_type are present only after they purchased Order_size =Big and only items purchased with order_size<>Big. To achieve this I wrote this code.

 SELECT Country,item_type,count(customer_id) from
   (select Country,customer_id, t.item_type, count(*)  as REPEATS
     from (select t.*,
         min(case when Order_Size = 'Big' then dates end) over (partition by customer_id, item_type) as min_big
  from data_test as t
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2

Result:

Country item_type   Count(Distinct(Customer_id))
CA  Vegetable   1
US  Vegetable   1
CA  Fruit   1

This works now but I wanted to add one more condition as to only when the codes are within certain table with condition so I wanted to add multiple conditions with one being subquery with the case when I modified my code.

SELECT Country,item_type,count(customer_id) from
   (select Country,customer_id, t.item_type, count(*)  as REPEATS
     from (select t.*,
         min(case when (Order_Size = 'Big' and Codes IN (SELECT CODES from table1 where type='TRUE' group by 1)) then dates end) over (partition by customer_id, item_type) as min_big
  from data_test as t
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2

This is throwing an error - Illegal expression in When clause within case statement. I also read that you cant use subquery within case and also use of IN. I have read many other questions related to this but I am still not clear as to how I can avoid using subquery in case. How do I change my code which does not throw error and also could be processed fast as my table is very large?

回答1:

You can change your code to connect to the table that drives the answer to "is this code supposed to be included" before the other rules. Be sure to do an outer join to a dataset that is distinct on the code field to prevent dupes.

This approach includes the result of your conditional before all the other rules and won't raise an error. Below I created a volatile table with a unique primary index on the code to do this but you could join to a derived table and get a similar result.

create volatile table vt_fruit_exp
( Customer_id char(4)
, Country char(2)
, item_type varchar(20)
, Order_Size char(5)
, Dates date
, Codes byteint)
primary index (Customer_id) on commit preserve rows;

insert into vt_fruit_exp values('A401','US','Fruit'    ,'Small' ,'2016-03-14', 11);
insert into vt_fruit_exp values('A401','US','Fruit'    ,'Big'   ,'2016-05-22', 12);
insert into vt_fruit_exp values('A401','US','Vegetable','Small' ,'2016-07-12', 11);
insert into vt_fruit_exp values('B509','US','Vegetable','Small' ,'2015-03-25', 92);
insert into vt_fruit_exp values('B509','US','Vegetable','Big'   ,'2014-03-15', 11);
insert into vt_fruit_exp values('B509','US','Vegetable','Small' ,'2014-03-01', 34);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Small' ,'2016-03-14', 56);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Big'   ,'2016-05-22', 76);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Small' ,'2016-07-12', 85);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2016-07-12', 11);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2015-03-25', 16);
insert into vt_fruit_exp values('A403','CA','Vegetable','Big'   ,'2014-03-15', 17);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2014-03-01', 12);

create volatile table Table1
( Codes byteint,Code_In_flg byteint) unique primary index (Codes) 
on commit preserve rows
;
insert into Table1 values (11,1); 
insert into Table1 values (76,1);
insert into Table1 values (12,1);

-- Each country-> how many repeated customers for each item_type are present AFTER they purchased Order_size=Big.  Only items purchased with order_size<>Big
-- Country item_type   Count(Distinct(Customer_id))
-- CA  Vegetable   1
-- US  Vegetable   1
-- CA  Fruit       1

SELECT
  Country
, item_type
, count(customer_id) 
FROM (
  select Country,customer_id, t.item_type, count(*)  as REPEATS
  from (
    Select
      t.*
    , Min(Case When Order_Size = 'big' Then Dates End) Over (Partition By Customer_Id, Item_Type) As Min_Big
    From vt_fruit_exp As T
  ) t
where dates > min_big
group by 1,2,3) D
group by 1,2;

-- This works now but I wanted to add one more condition as to only when the codes are within certain table with condition so I wanted to add multiple conditions with one being subquery with the case when I modified my code.

-- use a join to the table that refers to whether the code is to be included or not instead of attempting a subquery withing ordered analytic
SELECT
  Country
, item_type
, count(customer_id)
FROM (
  select Country,customer_id, t.item_type, count(*)  as REPEATS
  from (
    Select
      t.*
    , Min(Case When Order_Size = 'big' And b.Code_In_flg=1 Then Dates End) Over (Partition By Customer_Id, Item_Type) As Min_Big
  from vt_fruit_exp T left outer join Table1 B on t.Codes=b.Codes 
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2