TSQL : the top records of a given partition (condi

2019-09-10 17:10发布

问题:

I need to get all the records in TABLE_A where at least the 2 last Status are vacant (relative to Inspection_Date) and the Room_ID does not exist in TABLE_B.

This is a simplified table I am using as an example:

TABLE_A:

  Room_Id   Status    Inspection_Date
  -------------------------------------
    1        vacant      5/15/2015
    2        occupied    5/21/2015
    2        vacant      1/19/2016
    1        occupied   12/16/2015
    4        vacant      3/25/2016
    3        vacant      8/27/2015
    1        vacant      4/17/2016
    3        vacant     12/12/2015
    3        vacant      3/22/2016
    4        occupied    2/2/2015
    4        vacant      3/24/2015

TABLE_B:

  Room_Id   Status    Inspection_Date
  ------------------------------------
    1        vacant       5/15/2015
    2        occupied     5/21/2015
    2        vacant       1/19/2016
    1        vacant      12/16/2015
    1        vacant       4/17/2016

My result should look like this:

   Room_Id  Status  Inspection_Date
   ---------------------------------
    3       vacant      8/27/2015
    3       vacant     12/12/2015
    3       vacant      3/22/2016
    4       occupied    2/2/2015
    4       vacant      3/24/2015
    4       vacant      3/25/2016

I have tried it this way, it works with the example but is not working with my data .. the logic is not complete:

 With cteA As
(
Select *, Row_Number() Over (Partition By Room_ID, Status Order By     Inspection_Date Desc) RowNum From Table_A 
)
Select * From Table_A Where Room_Id In
(
Select Room_Id 
    From cteA
    Where Room_Id Not In (Select Room_Id From Table_B) 
        And Status = 'vacant' And RowNum > 1 
)
    Order By Room_Id, Inspection_Date

Here is the schema:

CREATE TABLE TABLE_A (`Room_Id` int, 
                      `Status` varchar(55), 
                      `Inspection_Date` Date
                     );

INSERT INTO TABLE_A (Room_Id, Status, Inspection_Date)
VALUES  (1, 'vacant',      '5/15/2015'),
        (2, 'occupied',    '5/21/2015'),
        (2, 'vacant',      '1/19/2016'),
        (1, 'occupied',   '12/16/2015'),
        (4, 'vacant',      '3/25/2016'),
        (3, 'vacant',      '8/27/2015'),
        (1, 'vacant',      '4/17/2016'),
        (3, 'vacant',     '12/12/2015'),
        (3, 'vacant',      '3/22/2016'),
        (4, 'occupied',       '2/2/2015'),
        (4, 'vacant',      '3/24/2015');

CREATE TABLE TABLE_B (`Room_Id` int, 
                      `Status` varchar(55),         
                      `Inspection_Date` Date
                     );

INSERT INTO TABLE_B (Room_Id, Status, Inspection_Date)
VALUES
        (1, 'vacant',      '5/15/2015'),
        (2, 'occupied',    '5/21/2015'),
        (2, 'vacant',      '1/19/2016'),
        (1, 'vacant',      '12/16/2015'),
        (1, 'vacant',      '4/17/2016'),;

回答1:

PLAIN

  1. For each room in TABLE_A select the last date (as lastDate)

  2. for each room in TABLE_A select previous date (as prevLastDate)

  3. Get room_ids from lastDate which has the status 'vacant' (as lastDateVacant)

  4. Get room_ids from prevLastDate which has the status 'vacant' (as prevLastDateVacant)

  5. Filter TABLE_A to have only IDs which are there in lastDateVacant and prevLastDateVacant (inner)

  6. Filter TABLE_A to have only IDs which are not in TABLE_B (left outer + IS NULL)

As the result you have:

WITH lastDate AS (
    SELECT room_id AS room,MAX(inspection_date) AS date
    FROM "TABLE_A"
    GROUP BY room_id
), prevLastDate AS (
    SELECT room_id AS room,MAX(inspection_date) AS date
    FROM "TABLE_A" a
    INNER JOIN lastDate ON a.room_id = lastDate.room and a.inspection_date < lastDate.date
    GROUP BY room_id
), lastDateVacant AS (
    SELECT room_id AS room FROM "TABLE_A"
    WHERE (room_id,inspection_date) IN (
        SELECT room, date FROM lastDate
    ) AND status = 'vacant' 
), prevLastDateVacant AS (
    SELECT room_id AS room FROM "TABLE_A"
    WHERE (room_id,inspection_date) IN (
        SELECT room, date FROM prevLastDate
    ) AND status = 'vacant' 
)

SELECT a.* FROM "TABLE_A" a 
INNER JOIN lastDateVacant 
    ON a.room_id = lastDateVacant.room
INNER JOIN prevLastDateVacant 
    ON a.room_id = prevLastDateVacant.room
LEFT OUTER JOIN "TABLE_B" AS b 
    ON a.room_id = b.room_id    
WHERE b.room_id IS NULL 
ORDER BY a.room_id ASC, a.inspection_date DESC

Window Function

Not sure if the syntax for TSQL is the same, but here is the shorter variant:

  1. Ranking with partion by room and or order by date

  2. Check for IDs with rank 1 and 2 having 'vacant' status, grouping by ID and having them occured more than once

WITH room AS ( select room from ( select room_id as room,status,inspection_date as date, RANK() OVER (PARTITION BY room_id ORDER BY inspection_date DESC) AS RANK from "TABLE_A" ) where (rank in ( 1,2) and status = 'vacant') group by room having count() > 1 ) SELECT a. FROM "TABLE_A" a INNER JOIN room ON a.room_id = room.room LEFT OUTER JOIN "TABLE_B" AS b ON a.room_id = b.room_id
WHERE b.room_id IS NULL ORDER BY a.room_id ASC, a.inspection_date DESC



回答2:

Your conditions translate almost directly into a query. You can use window functions for the vacant count and not exists for the relationship to table_b:

select a.*
from (select a.*,
             sum(case when status = 'vacant' then 1 else 0 end) over (partition by room_id) as num_vacant
      from table_a a
      where not exists (select 1
                        from table_b b
                        where b.room_id = a.room_id
                       )
    ) a
where num_vacant >= 2;

EDIT:

If you want the last two to be vacant, you can do find that last record that is non-vacant and then count the ones bigger than that:

select a.*
from (select a.*,
             sum(case when a2.max_nonvacant > a.inspection_date then 0 else 1) over (partition by room_id) as num_vacant_last
      from table_a a outer apply
           (select max(inspection_date) as max_nonvacant
            from table_a a2
            where a2.room_id = a.room_id and a2.status <> 'vacant'
           ) a2
      where not exists (select 1
                        from table_b b
                        where b.room_id = a.room_id
                       )
    ) a
where num_vacant_last >= 2;


回答3:

This worked for me and I have checked again and again.

with Rooms as (
select
    Room_Id, Status,
    row_number() over (partition by Room_Id order by Inspection_Date desc) as rn
from TABLE_A
), Candidates as (
select Room_Id from Rooms group by Room_Id
having sum(case when rn in (1, 2) and Status = 'vacant' then 1 else null end) = 2
)
select * from TABLE_A
where Room_Id in (select Room_Id from Candidates except select Room_Id from TABLE_B)
order by Room_Id, Inspection_Date desc


回答4:

I did this test: extracts all the room_id that considering the last two Status (equal status) in relation to the inspection_date (descending order):

select * from TABLE_A WHERE [Room_Id] IN
(
    SELECT [Room_Id] FROM 
    (SELECT ROW_NUMBER() OVER(PARTITION BY [Room_Id] ORDER BY [Inspection_Date] DESC ) AS id,   
      [Room_Id],[Status],[Inspection_Date]
      FROM TABLE_A 
      ) AA
    WHERE AA.ID <=2
--selecting the last two Inspection_Date
 and [Status] = 'vacant'  
            GROUP BY [Room_Id],[Status] HAVING COUNT(*) >1          
) 
AND 
[Room_Id] NOT IN (SELECT Room_Id FROM TABLE_B)
order by Room_Id, Inspection_Date desc