python union of multiple ranges

2019-01-06 22:09发布

问题:

I have these ranges:

7,10
11,13
11,15
14,20
23,39

I need to perform a union of the overlapping ranges to give ranges that are not overlapping, so in the example:

7,20
23,39

I've done this in Ruby where I have pushed the start and end of the range in array and sorted them and then perform union of the overlapping ranges. Any quick way of doing this in Python?

Thanks

回答1:

Let's say, (7, 10) and (11, 13) result into (7, 13):

a = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]
b = []
for begin,end in sorted(a):
    if b and b[-1][1] >= begin - 1:
        b[-1] = (b[-1][0], end)
    else:
        b.append((begin, end))

b is now

[(7, 20), (23, 39)]

EDIT:

As @CentAu correctly notices, [(2,4), (1,6)] would return (1,4) instead of (1,6). Here is the new version with correct handling of this case:

a = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]
b = []
for begin,end in sorted(a):
    if b and b[-1][1] >= begin - 1:
        b[-1][1] = max(b[-1][1], end)
    else:
        b.append([begin, end])


回答2:

Old question. But I wanted to add this answer for future references. sympy can be used to achieve union of intervals:

from sympy import Interval, Union
def union(data):
    """ Union of a list of intervals e.g. [(1,2),(3,4)] """
    intervals = [Interval(begin, end) for (begin, end) in data]
    u = Union(*intervals)
    return [list(u.args[:2])] if isinstance(u, Interval) \
       else list(u.args)

If output of Union is more than two intervals is a Union object while when there is a single interval, the output is an Interval object. That's the reason for the if statement in the return line.

examples:

In [26]: union([(10, 12), (14, 16), (15, 22)])
Out[26]: [[10, 12], [14, 22]]

In [27]: union([(10, 12), (9, 16)])
Out[27]: [[9, 16]]


回答3:

I tried with particular cases of presence of (45, 46) and (45, 45)
and also test cases that are unlikely to happen in your application: presence of (11,6), presence of (-1, -5), presence of (-9, 5), presence of (-3, 10).
Anyway the results are right for all these cases, it's a point.

The algorithm:

def yi(li):
    gen = (x for a,b in li for x in xrange(a,b+1))
    start = p = gen.next()
    for x in gen:
        if x>p+2:
            yield (start,p)
            start = p = x
        else:
            p = x
    yield (start,x)

If aff in the following code is set to True, the steps of the execution are displayed.

def yi(li):
    aff = 0
    gen = (x for a,b in li for x in xrange(a,b+1))
    start = p = gen.next()
    for x in gen:
        if aff:
            print ('start %s     p %d  p+2 %d     '
                   'x==%s' % (start,p,p+2,x))
        if x>p+2:
            if aff:
                print 'yield range(%d,%d)' % (start,p+1)
            yield (start,p)
            start = p = x
        else:
            p = x
    if aff:
        print 'yield range(%d,%d)' % (start,x+1)
    yield (start,x)



for li in ([(7,10),(23,39),(11,13),(11,15),(14,20),(45,46)],
           [(7,10),(23,39),(11,13),(11,15),(14,20),(45,46),(45,45)],
           [(7,10),(23,39),(11,13),(11,15),(14,20),(45,45)],

           [(7,10),(23,39),(11,13),(11,6),(14,20),(45,46)], 
           #1 presence of (11, 6)
           [(7,10),(23,39),(11,13),(-1,-5),(14,20),(45,45)], 
           #2  presence of (-1,-5)
           [(7,10),(23,39),(11,13),(-9,-5),(14,20),(45,45)], 
           #3  presence of (-9, -5)
           [(7,10),(23,39),(11,13),(-3,10),(14,20),(45,45)]): 
           #4  presence of (-3, 10)

    li.sort()
    print 'sorted li    %s'%li
    print '\n'.join('  (%d,%d)   %r' % (a,b,range(a,b)) 
                     for a,b in li)
    print 'list(yi(li)) %s\n' % list(yi(li))

result

sorted li    [(7, 10), (11, 13), (11, 15), (14, 20),
              (23, 39), (45, 46)]
  (7,10)   [7, 8, 9]
  (11,13)   [11, 12]
  (11,15)   [11, 12, 13, 14]
  (14,20)   [14, 15, 16, 17, 18, 19]
  (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
             35, 36, 37, 38]
  (45,46)   [45]
list(yi(li)) [(7, 20), (23, 39), (45, 46)]

sorted li    [(7, 10), (11, 13), (11, 15), (14, 20), 
              (23, 39), (45, 45), (45, 46)]
  (7,10)   [7, 8, 9]
  (11,13)   [11, 12]
  (11,15)   [11, 12, 13, 14]
  (14,20)   [14, 15, 16, 17, 18, 19]
  (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
             35, 36, 37, 38]
  (45,45)   []
  (45,46)   [45]
list(yi(li)) [(7, 20), (23, 39), (45, 46)]

sorted li    [(7, 10), (11, 13), (11, 15), (14, 20), 
              (23, 39), (45, 45)]
  (7,10)   [7, 8, 9]
  (11,13)   [11, 12]
  (11,15)   [11, 12, 13, 14]
  (14,20)   [14, 15, 16, 17, 18, 19]
  (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
             35, 36, 37, 38]
  (45,45)   []
list(yi(li)) [(7, 20), (23, 39), (45, 45)]

sorted li    [(7, 10), (11, 6), (11, 13), (14, 20), 
              (23, 39), (45, 46)]
  (7,10)   [7, 8, 9]
  (11,6)   []
  (11,13)   [11, 12]
  (14,20)   [14, 15, 16, 17, 18, 19]
  (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
             35, 36, 37, 38]
  (45,46)   [45]
list(yi(li)) [(7, 20), (23, 39), (45, 46)]

sorted li    [(-1, -5), (7, 10), (11, 13), (14, 20), 
              (23, 39), (45, 45)]
  (-1,-5)   []
  (7,10)   [7, 8, 9]
  (11,13)   [11, 12]
  (14,20)   [14, 15, 16, 17, 18, 19]
  (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
             35, 36, 37, 38]
  (45,45)   []
list(yi(li)) [(7, 20), (23, 39), (45, 45)]

sorted li    [(-9, -5), (7, 10), (11, 13), (14, 20), 
              (23, 39), (45, 45)]
  (-9,-5)   [-9, -8, -7, -6]
  (7,10)   [7, 8, 9]
  (11,13)   [11, 12]
  (14,20)   [14, 15, 16, 17, 18, 19]
  (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
             35, 36, 37, 38]
  (45,45)   []
list(yi(li)) [(-9, -5), (7, 20), (23, 39), (45, 45)]

sorted li    [(-3, 10), (7, 10), (11, 13), (14, 20), 
              (23, 39), (45, 45)]
  (-3,10)   [-3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  (7,10)   [7, 8, 9]
  (11,13)   [11, 12]
  (14,20)   [14, 15, 16, 17, 18, 19]
  (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
             35, 36, 37, 38]
  (45,45)   []
list(yi(li)) [(-3, 20), (23, 39), (45, 45)]


回答4:

The following function works well with:

a = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]

and

a = [(2,4),(1,6)]

def range_overlap_adjust(list_ranges):
    overlap_corrected   =   []
    for start, stop in sorted(list_ranges):
        if  overlap_corrected and start-1 <= overlap_corrected [-1][1] and stop >= overlap_corrected [-1][1]:
            overlap_corrected [-1] = min(overlap_corrected [-1][0], start), stop
        elif overlap_corrected and start <= overlap_corrected [-1][1] and stop <= overlap_corrected [-1][1]:
            break
        else:
            overlap_corrected.append((start,stop))
    return overlap_corrected

test

list_ranges = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]   


print range_overlap_adjust(list_ranges)

gives:

[(7, 20), (23, 39)]