Got mired in a selective DELETE statement on a sin

2019-07-11 18:52发布

问题:

Say, I have a table like this (SQL Server 2008):

CREATE TABLE tbl (ID INT, dtIn DATETIME2, dtOut DATETIME2, Type INT)

INSERT tbl VALUES
(1, '05:00', '6:00', 1),  -- will be removed
(2, '05:00', '7:00', 1),  -- will be removed
(3, '05:01', '8:00', 1),
(4, '05:00', '8:00', 1),
(5, '05:00', '6:00', 2),  -- will be removed
(6, '05:00', '7:00', 2),
(7, '05:00', '7:00', 3),
(8, '04:00', '7:00', 3)

I need to remove all records of the same 'type' (if 2 or more are found) with the same 'dtIn' for their 'type', except the one with the largest 'dtOut'. In other words, the table above should result in this:

(3, '05:01', '8:00', 1),   -- no matching 'dtIn' for 'type' = 1
(4, '05:00', '8:00', 1),   -- largest 'dtOut' for 'type' = 1
(6, '05:00', '7:00', 2),   -- largest 'dtOut' for 'type' = 2
(7, '05:00', '7:00', 3),   -- no matching 'dtIn' for 'type' = 3
(8, '04:00', '7:00', 3)    -- no matching 'dtIn' for 'type' = 4

How do you even select several rows with the equal type from the same table. You can't do select * from tbl where type=type.... Anyway, I'd appreciate some help with this...

回答1:

Here's one way to select the rows you want to delete:

SELECT *
FROM tbl T1
WHERE EXISTS
(
    SELECT *
    FROM tbl T2
    WHERE T1.Type = T2.Type
    AND T1.dtIn = T2.dtIn
    AND (
            T1.dtOut < T2.dtOut
            OR (T1.dtOut = T2.dtOut AND T1.id < T2.id)
        )
)

This query can also be easily changed to actually delete the rows. Just change SELECT * to DELETE T1. But please do test that it does what you want before actually running the delete statement.

See it working online: sqlfiddle


Update

Here's an approach using ROW_NUMBER:

;WITH T1 AS (
    SELECT id, ROW_NUMBER() OVER (PARTITION BY Type, dtIn
                                  ORDER BY dtOut DESC, ID DESC) AS rn
    FROM tbl
)
SELECT * FROM tbl
WHERE id IN
(
    SELECT id
    FROM T1
    WHERE rn > 1
)

See it working online: sqlfiddle



回答2:

not very elegant (+ the conditions are the same as Mark's answer) but it's an alternative.

Fiddle is here

;with my_cte(ID)
as
(
  SELECT T1.ID
  FROM 
      tbl T1
      JOIN tbl T2 on
        T1.Type = T2.Type
        AND T1.dtIn = T2.dtIn
        AND T1.dtOut < T2.dtOut
  )
select *
from tbl t
where not exists 
              (
              select 1 
              from my_cte c 
              where t.ID = c.ID
              ) 


回答3:

Queries for Duplicate Rows

I just saw that you can have duplicate rows. So:

DELETE X
FROM
   (SELECT Row_Number() OVER (PARTITION BY Type, dtIn ORDER BY dtOut DESC) Item, *
   FROM #tbl) X
WHERE Item > 1

Note that this version above doesn't need a join so should perform better than all other queries on the page.

or

DELETE T
FROM #tbl T
   CROSS APPLY (
      SELECT TOP 1 *
      FROM #tbl T2
      WHERE
         T.dtIn = T2.dtIn
         AND T.Type = T2.Type
      ORDER BY T2.dtOut DESC, T2.ID DESC
   ) X
WHERE T.ID < X.ID

or

DELETE T
FROM #tbl T
   INNER JOIN #tbl T2
      ON T.dtIn = T2.dtIn
      AND T.Type = T2.Type
      AND (
         T.dtOut < T2.dtOut
         OR (T.tdOut = T2.dtOut AND T.ID > T2.ID)
      )

Queries for no Duplicate Rows

If you couldn't have duplicate rows (except for ID) then it would be simpler:

DELETE T
FROM #tbl T
WHERE EXISTS (
   SELECT * FROM #tbl T2
   WHERE
      T.dtIn = T2.dtIn
      AND T.Type = T2.Type
      AND T.dtOut < T2.dtOut
)

or even

DELETE T
FROM #tbl T
   INNER JOIN #tbl T2
      ON T.dtIn = T2.dtIn
      AND T.Type = T2.Type
      AND T.dtOut < T2.dtOut


回答4:

Mine is a little different from @Mark's, but I believe it should get you the same results (only because I assume that id is unique):

DELETE a
FROM dbo.tbl a
WHERE id <> (
    SELECT MAX(id)
    from dbo.tbl b
    WHERE a.type = b.type
      AND a.dtIn = b.dtIn
)


回答5:

DECLARE @Table TABLE     
(  
ID INT,  
newno int  
)   

insert @table  
SELECT T1.ID,   
ROW_NUMBER() OVER (PARTITION BY Type, dtIn
                              ORDER BY dtOut DESC, ID DESC) AS newno  
 FROM tbl T1      

 select *  
from tbl t
where t.ID IN
             (
              select ID from @Table where newno > 1
              )