How to combine GROUP BY and ROW_NUMBER?

2019-03-11 17:17发布

问题:

I hope following sample code is self-explanatory:

declare @t1 table (ID int,Price money, Name varchar(10))
declare @t2 table (ID int,Orders int,  Name varchar(10))
declare @relation  table (t1ID int,t2ID int)
insert into @t1 values(1, 200, 'AAA');
insert into @t1 values(2, 150, 'BBB');
insert into @t1 values(3, 100, 'CCC');
insert into @t2 values(1,25,'aaa');
insert into @t2 values(2,35,'bbb');
insert into @relation values(1,1);
insert into @relation values(2,1);
insert into @relation values(3,2);

select T2.ID AS T2ID
,T2.Name as T2Name
,T2.Orders
,T1.ID AS T1ID
,T1.Name As T1Name
,T1Sum.Price
FROM @t2 T2
INNER JOIN (
    SELECT Rel.t2ID
        ,MAX(Rel.t1ID)AS t1ID 
-- the MAX returns an arbitrary ID, what i need is: 
--      ,ROW_NUMBER()OVER(Partition By Rel.t2ID Order By Price DESC)As PriceList
        ,SUM(Price)AS Price
        FROM @t1 T1 
        INNER JOIN @relation Rel ON Rel.t1ID=T1.ID
        GROUP BY Rel.t2ID
)AS T1Sum ON  T1Sum.t2ID = T2.ID
INNER JOIN @t1 T1 ON T1Sum.t1ID=T1.ID

Result:

T2ID   T2Name   Orders  T1ID    T1Name  Price     
 1      aaa       25     2       BBB    350,00     
 2      bbb       35     3       CCC    100,00

What i need is commented above, a way to get the ROW_NUMBER but also to Group By in the first place. So i need the sum of all T1-prices grouped by T2.ID in the relation-table and in the outer query the t1ID with the highest price.

In other words: How to change MAX(Rel.t1ID)AS t1ID to somewhat returning the ID with the highest price?

So the desired result is(notice that first T1ID changed from 2 to 1 since it has the higher price):

T2ID   T2Name   Orders  T1ID    T1Name  Price     
 1      aaa       25     1       AAA    350,00     
 2      bbb       35     3       CCC    100,00

Note: in case you're wondering why i don't multiply Orders with Price: they are not realated(so i should have left off this column since it's a bit ambiguous, please ignore it, i've just added it to make all less abstract). Actually Orders must remain unchanged, that's the reason for the sub-query approach to join both and the reason why i need to group by in the first place.

Conclusion: obviously the core of my question can be answered by the OVER clause that can be applied to any aggregate function like SUM(see Damien's answer) what was new to me. Thank you all for your working approaches.

回答1:

Wow, the other answers look complex - so I'm hoping I've not missed something obvious.

You can use OVER/PARTITION BY against aggregates, and they'll then do grouping/aggregating without a GROUP BY clause. So I just modified your query to:

select T2.ID AS T2ID
    ,T2.Name as T2Name
    ,T2.Orders
    ,T1.ID AS T1ID
    ,T1.Name As T1Name
    ,T1Sum.Price
FROM @t2 T2
INNER JOIN (
    SELECT Rel.t2ID
        ,Rel.t1ID
 --       ,MAX(Rel.t1ID)AS t1ID 
-- the MAX returns an arbitrary ID, what i need is: 
      ,ROW_NUMBER()OVER(Partition By Rel.t2ID Order By Price DESC)As PriceList
        ,SUM(Price)OVER(PARTITION BY Rel.t2ID) AS Price
        FROM @t1 T1 
        INNER JOIN @relation Rel ON Rel.t1ID=T1.ID
--        GROUP BY Rel.t2ID
)AS T1Sum ON  T1Sum.t2ID = T2.ID
INNER JOIN @t1 T1 ON T1Sum.t1ID=T1.ID
where t1Sum.PriceList = 1

Which gives the requested result set.



回答2:

;with C as
(
  select Rel.t2ID,
         Rel.t1ID,
         t1.Price,
         row_number() over(partition by Rel.t2ID order by t1.Price desc) as rn
  from @t1 as T1
    inner join @relation as Rel
      on T1.ID = Rel.t1ID
)
select T2.ID as T2ID,
       T2.Name as T2Name,
       T2.Orders,
       T1.ID as T1ID,
       T1.Name as T1Name,
       T1Sum.Price
from @t2 as T2
  inner join (
              select C1.t2ID,
                     sum(C1.Price) as Price,
                     C2.t1ID
              from C as C1
                inner join C as C2 
                  on C1.t2ID = C2.t2ID and
                     C2.rn = 1
              group by C1.t2ID, C2.t1ID
             ) as T1Sum
    on T2.ID = T1Sum.t2ID
  inner join @t1 as T1
    on T1.ID = T1Sum.t1ID


回答3:

Undoubtly this can be simplified but the results match your expectations.

The gist of this is to

  • Calculate the maximum price in a seperate CTE for each t2ID
  • Calculate the total price in a seperate CTE for each t2ID
  • Combine the results of both CTE's

SQL Statement

;WITH MaxPrice AS ( 
    SELECT  t2ID
            , t1ID
    FROM    (       
                SELECT  t2.ID AS t2ID
                        , t1.ID AS t1ID
                        , rn = ROW_NUMBER() OVER (PARTITION BY t2.ID ORDER BY t1.Price DESC)
                FROM    @t1 t1
                        INNER JOIN @relation r ON r.t1ID = t1.ID        
                        INNER JOIN @t2 t2 ON t2.ID = r.t2ID
            ) maxt1
    WHERE   maxt1.rn = 1                            
)
, SumPrice AS (
    SELECT  t2ID = t2.ID
            , Price = SUM(Price)
    FROM    @t1 t1
            INNER JOIN @relation r ON r.t1ID = t1.ID
            INNER JOIN @t2 t2 ON t2.ID = r.t2ID
    GROUP BY
            t2.ID           
)           
SELECT  t2.ID
        , t2.Name
        , t2.Orders
        , mp.t1ID
        , t1.ID
        , t1.Name
        , sp.Price
FROM    @t2 t2
        INNER JOIN MaxPrice mp ON mp.t2ID = t2.ID
        INNER JOIN SumPrice sp ON sp.t2ID = t2.ID
        INNER JOIN @t1 t1 ON t1.ID = mp.t1ID


回答4:

The deduplication (to select the max T1) and the aggregation need to be done as distinct steps. I've used a CTE since I think this makes it clearer:

;WITH sumCTE
AS
(
    SELECT  Rel.t2ID, SUM(Price) price
    FROM    @t1         AS T1
    JOIN    @relation   AS Rel 
    ON      Rel.t1ID=T1.ID
    GROUP 
    BY      Rel.t2ID
)
,maxCTE
AS
(
    SELECT  Rel.t2ID, Rel.t1ID, 
            ROW_NUMBER()OVER(Partition By Rel.t2ID Order By Price DESC)As PriceList
    FROM    @t1         AS T1
    JOIN    @relation   AS Rel 
    ON      Rel.t1ID=T1.ID
)
SELECT T2.ID AS T2ID
,T2.Name as T2Name
,T2.Orders
,T1.ID AS T1ID
,T1.Name As T1Name
,sumT1.Price
FROM    @t2 AS T2
JOIN    sumCTE AS sumT1
ON      sumT1.t2ID = t2.ID
JOIN    maxCTE AS maxT1
ON      maxT1.t2ID = t2.ID
JOIN    @t1 AS T1
ON      T1.ID = maxT1.t1ID
WHERE   maxT1.PriceList = 1