SQL: Merge Date Ranges

2019-01-15 02:06发布

I've a table, which describes work slices of a business working calendar: (date format is 24 hours format)

PK  | STARTDATE          | ENDDATE
__________________________________________
1   | 2012/07/21 02:00   | 2012/07/21 04:00
2   | 2012/07/21 03:00   | 2012/07/21 10:00
3   | 2012/07/21 06:00   | 2012/07/21 17:00
4   | 2012/07/21 18:00   | 2012/07/21 19:00

Now, I like to merge the date ranges (within a given start and end date) like this:

PK  | STARTDATE          | ENDDATE
__________________________________________
1   | 2012/07/21 02:00   | 2012/07/21 17:00
2   | 2012/07/21 18:00   | 2012/07/21 19:00

Is there a way to do this with SQL97 standard? If so, what is with other operations (e.g. if I want to to an invered merge, the result should be

PK  | STARTDATE          | ENDDATE
__________________________________________
1   | 2012/07/21 00:00   | 2012/07/21 02:00
2   | 2012/07/21 19:00   | 2012/07/22 00:00

4条回答
Melony?
2楼-- · 2019-01-15 02:30

This is my solution.

IF OBJECT_ID('tempdb..#tblDates') IS NOT NULL
    DROP TABLE #tblDates

CREATE TABLE #tblDates (AutoId INT IDENTITY, StartDate DATE, EndDate DATE)

INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-08'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-07', '2014-11-10'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-06', '2014-11-12'

INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-15'

INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-10', '2014-12-13'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-12', '2014-12-15'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-14', '2014-12-16'


-- Optional / Remove the duplicated records of same StartDate and EndDate
DELETE FROM #tblDates WHERE AutoId NOT IN (SELECT MAX(AutoId) FROM #tblDates GROUP BY StartDate, EndDate)

-- Optional / Get only the record with max EndDate grouped by StartDate, Remove Others
DELETE  d1
FROM    #tblDates d1
        JOIN (SELECT x.StartDate, MAX(x.EndDate) MAXEndDate FROM #tblDates x GROUP BY x.StartDate) d2 ON d2.StartDate = d1.StartDate AND d2.MAXEndDate != d1.EndDate

-- Optional / Get only the record with min StartDate grouped by EndDate, Remove Others
DELETE  d1
FROM    #tblDates d1
        JOIN (SELECT x.EndDate, MIN(x.StartDate) MINStartDate FROM #tblDates x GROUP BY x.EndDate) d2 ON d2.EndDate = d1.EndDate AND d2.MINStartDate != d1.StartDate

-- Optional / Remove the overlapping ranges of relevant StartDate and EndDate
DELETE  c
FROM    #tblDates p
        JOIN #tblDates c ON c.AutoId != p.AutoId AND c.StartDate BETWEEN p.StartDate AND p.EndDate AND c.EndDate BETWEEN p.StartDate AND p.EndDate


;WITH Ranges
AS
(
    SELECT  s.StartDate, s.EndDate
    FROM    #tblDates s
            LEFT JOIN #tblDates a ON a.AutoId != s.AutoId AND s.StartDate BETWEEN a.StartDate AND a.EndDate AND s.StartDate != a.StartDate
    WHERE   a.AutoId IS NULL
    UNION ALL
    SELECT  r.StartDate, d.EndDate
    FROM    Ranges r
            JOIN #tblDates d ON r.EndDate != d.EndDate AND r.EndDate BETWEEN d.StartDate AND d.EndDate
)

SELECT StartDate, MAX(EndDate) EndDate FROM Ranges GROUP BY StartDate
查看更多
祖国的老花朵
3楼-- · 2019-01-15 02:44

Based on ErikE response :

IF(object_id('dbo.Periods') is not null)
    drop table Periods

go
create table Periods (
    StartDate date not null,
    EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '1980-01-01','1980-01-10' union all
select '1980-01-03','1980-01-07' union all

select '2000-01-01','2000-01-10' union all
select '2000-01-05','2000-01-30' union all
select '2000-01-12','2000-01-20' union all

select '2021-01-01','2021-01-01'
go

; with LeadLag AS (
   SELECT     
     rownum = row_number() OVER( ORDER BY StartDate),
     PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '0001-01-01')), 
     p.*
   FROM Periods p
), Dates AS (
   SELECT
        StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
        EndDate,           
        rownum
   FROM   LeadLag
), startGrouping AS (
   SELECT
      StartDate =  max(StartDate) OVER (ORDER BY rownum rows UNBOUNDED PRECEDING),
      EndDate,
      rownum
   FROM Dates
),
 groups AS (
   SELECT
      StartDate,
      EndDate,
      rownum,
      ingroupRownum = row_number() OVER(PARTITION BY StartDate ORDER BY EndDate desc)
   FROM startGrouping e1
)
SELECT StartDate, EndDate
from groups
WHERE  ingroupRownum = 1
查看更多
趁早两清
4楼-- · 2019-01-15 02:46

This should do the trick in most SQL-92 supporting DBMSes. No advanced SQL Syntax here.

The performance may not be so good because it has to join the same table 4 times. If using DBMS-specific syntax is an option you'll probably be able to get much better performance.

SELECT
  D.StartDate,
  (
    SELECT Min(E.EndDate)
    FROM dbo.Dates E
    WHERE
      E.EndDate >= D.EndDate
      AND NOT EXISTS (
        SELECT *
        FROM dbo.Dates E2
        WHERE
        E.StartDate < E2.StartDate
        AND E.EndDate > E2.StartDate
      )
  ) EndDate
FROM
  dbo.Dates D
WHERE
  NOT EXISTS (
    SELECT *
    FROM dbo.Dates D2
    WHERE
      D.StartDate < D2.EndDate
      AND D.EndDate > D2.EndDate
  );

See a Sql Fiddle for this same query working in several different RDBMSes:

Update

Here's a new query that still doesn't do recursion, and only scans the table once. It does have two sorts, which are the most expensive part of the query (88% of the cost in this sample with just a few rows). However, do not underestimate the benefit of doing fewer reads, and not having to join... sometimes queries like this can kick major butt.

WITH Data AS (
   SELECT
      StartDate = Convert(datetime, StartDate),
      EndDate = Convert(datetime, EndDate)
   FROM (VALUES
      ('02:00', '04:00'), ('03:00', '10:00'), (' 09:00', '12:00'), (' 11:00', '17:00'), (' 18:00', '19:00')
   ) D (StartDate, EndDate)
), LeadLag AS (
   SELECT
      PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '00010101')),
      NextStartDate = Coalesce(Lead(StartDate) OVER (ORDER BY StartDate), Convert(datetime2, '99991231')),
      *
   FROM Data
), Dates AS (
   SELECT
      X.*
   FROM
      LeadLag
      CROSS APPLY (
         SELECT
            StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
            EndDate = CASE WHEN EndDate < NextStartDate THEN EndDate ELSE NULL END
      ) X
   WHERE
      X.StartDate IS NOT NULL
      OR X.EndDate IS NOT NULL
), Final AS (
   SELECT
      StartDate,
      EndDate = Min(EndDate) OVER (ORDER BY EndDate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
   FROM Dates
)
SELECT *
FROM Final
WHERE StartDate IS NOT NULL
;
查看更多
Explosion°爆炸
5楼-- · 2019-01-15 02:47

Here's an example using SQL Server syntax. First it determines the "heads", or rows that have no previous overlapping rows. To determine the last "child" of a "head", it looks for the last row that is smaller than the next "head". Here's the SQL:

; with  heads as
        (
        select  row_number() over (order by head.StartDate) as PK
        ,       *
        from    YourTable head
        where   not exists 
                (
                select  *
                from    YourTable prev
                where   prev.StartDate < head.StartDate
                        and head.StartDate < prev.EndDate
                )
        )
select  row_number() over (order by h.StartDate) as PK
,       h.StartDate
,       max(yt.EndDate) as EndDate
from    heads h
left join
        heads nh
on      nh.PK = h.PK + 1
left join
        YourTable yt
on      h.StartDate <= yt.StartDate
        and (yt.StartDate < nh.StartDate or nh.StartDate is null)
group by
        h.StartDate

Live example at SQL Fiddle.

查看更多
登录 后发表回答