What's a good, generic algorithm for collapsin

2019-01-16 14:56发布

问题:

I have a method that gets a number of objects of this class

class Range<T>
{
    public T Start;
    public T End;
}

In my case T is DateTime, but lets use int for simplicity. I would like a method that collapses those ranges into ones that cover the same "area" but that do not overlap.

So if I had the following ranges

  • 1 to 5
  • 3 to 9
  • 11 to 15
  • 12 to 14
  • 13 to 20

The method should give me

  • 1 to 9
  • 11 to 20

Guess it would be called a union? I imagine the method signature could look something like this:

public static IEnumerable<Range<T>> Collapse<T>(
    this IEnumerable<Range<T>>, 
    IComparable<T> comparer)
{
    ...
}

I have looked at some other questions here that are kind of similar, but I haven't found an implementation of this yet. This answer and some other answers to the same question describes algorithms, but I am not quite sure if I understand the algorithms. Not especially good at implementing algorithms either, so I was hoping someone here could help me out.

回答1:

This seems to works and is easy to understand.

    public static IEnumerable<Range<T>> Collapse<T>(this IEnumerable<Range<T>> me, IComparer<T> comparer)
    {
        List<Range<T>> orderdList = me.OrderBy(r => r.Start).ToList();
        List<Range<T>> newList = new List<Range<T>>();

        T max = orderdList[0].End;
        T min = orderdList[0].Start;

        foreach (var item in orderdList.Skip(1))
        {
            if (comparer.Compare(item.End, max) > 0 && comparer.Compare(item.Start, max) > 0)
            {
                newList.Add(new Range<T> { Start = min, End = max });
                min = item.Start;
            }
            max = comparer.Compare(max, item.End) > 0 ? max : item.End;
        }
        newList.Add(new Range<T>{Start=min,End=max});

        return newList;
    }

Here is the variation which I mentioned in the comments. It's basically the same thing, but with some checking and yielding of the results instead of collecting in a list before returning.

    public static IEnumerable<Range<T>> Collapse<T>(this IEnumerable<Range<T>> ranges, IComparer<T> comparer)
    {
        if(ranges == null || !ranges.Any())
            yield break;

        if (comparer == null)
            comparer = Comparer<T>.Default;

        var orderdList = ranges.OrderBy(r => r.Start);
        var firstRange = orderdList.First();

        T min = firstRange.Start;
        T max = firstRange.End;

        foreach (var current in orderdList.Skip(1))
        {
            if (comparer.Compare(current.End, max) > 0 && comparer.Compare(current.Start, max) > 0)
            {
                yield return Create(min, max);
                min = current.Start;
            }
            max = comparer.Compare(max, current.End) > 0 ? max : current.End;
        }
        yield return Create(min, max);
    }


回答2:

A Python solution for the non-verbosephile:

ranges = [
  (11, 15),
  (3, 9),
  (12, 14),
  (13, 20),
  (1, 5)]

result = []
cur = None
for start, stop in sorted(ranges): # sorts by start
  if cur is None:
    cur = (start, stop)
    continue
  cStart, cStop = cur
  if start <= cStop:
    cur = (cStart, max(stop, cStop))
  else:
    result.append(cur)
    cur = (start, stop)
result.append(cur)

print result


回答3:

static void Main(string[] args) {
    List<Range<int>> ranges = new List<Range<int>>() 
    {               
        new Range<int>(3,9),
        new Range<int>(1,5),
        new Range<int>(11,15),
        new Range<int>(12,14),
        new Range<int>(13,20),
    };

    var orderedRanges = ranges.OrderBy(r => r.Start);
    var lastRange = new Range<int>(orderedRanges.First().Start, orderedRanges.First().End);

    List<Range<int>> newranges = new List<Range<int>>();            
    newranges.Add(lastRange);

    foreach (var range in orderedRanges.Skip(1)) {
        if (range.Start >= lastRange.Start && range.Start <= lastRange.End && range.End > lastRange.End) {
            lastRange.End = range.End;
        }
        else if (range.Start > lastRange.End) {
            lastRange = new Range<int>(range.Start, range.End);
            newranges.Add(lastRange);
        }
    }

    foreach (var r in newranges) {
        Console.WriteLine("{0}, {1}", r.Start, r.End);
    }
}

Something like this. Didn't verify that it works with all inputs.



回答4:

The idea of collapsing a list just screamed out "reduce" to me. It didn't end up quite as elegant as I had hoped though.

def collapse(output,next_range):
    last_start,last_end = output[-1]
    next_start, next_end = next_range
    if (next_start <= last_end):
        output[-1] = (last_start, max(next_end, last_end))
    else:
        output.append(next_range)
    return output

ranges = [
  (11, 15),
  (3, 9),
  (12, 14),
  (13, 20),
  (1, 5)]

ranges.sort()
result = [ranges.pop(0)]
reduce(collapse, ranges,result)

print result

thanks to yairchu for typing in the data so I could cut and paste it :)



回答5:

This could probably be optimized...

using System.Collections.Generic;
using System.Linq;
using System;
static class Range
{
    public static Range<T> Create<T>(T start, T end)
    {
        return new Range<T>(start, end);
    }
    public static IEnumerable<Range<T>> Normalize<T>(
        this IEnumerable<Range<T>> ranges)
    {
        return Normalize<T>(ranges, null);
    }
    public static IEnumerable<Range<T>> Normalize<T>(
        this IEnumerable<Range<T>> ranges, IComparer<T> comparer)
    {
        var list = ranges.ToList();
        if (comparer == null) comparer = Comparer<T>.Default;
        for (int i = list.Count - 1; i >= 0; i--)
        {
            var item = list[i];

            for (int j = 0; j < i; j++)
            {
                Range<T>? newValue = TryMerge<T>(comparer, item, list[j]);

                // did we find a useful transformation?
                if (newValue != null)
                {
                    list[j] = newValue.GetValueOrDefault();
                    list.RemoveAt(i);
                    break;
                }
            }
        }
        list.Sort((x, y) =>
        {
            int t = comparer.Compare(x.Start, y.Start);
            if (t == 0) t = comparer.Compare(x.End, y.End);
            return t;
        });
        return list.AsEnumerable();
    }

    private static Range<T>? TryMerge<T>(IComparer<T> comparer, Range<T> item, Range<T> other)
    {
        if (comparer.Compare(other.End, item.Start) == 0)
        { // adjacent ranges
            return new Range<T>(other.Start, item.End);
        }
        if (comparer.Compare(item.End, other.Start) == 0)
        { // adjacent ranges
            return new Range<T>(item.Start, other.End);
        }
        if (comparer.Compare(item.Start, other.Start) <= 0
            && comparer.Compare(item.End, other.End) >= 0)
        { // item fully swalls other
            return item;
        }
        if (comparer.Compare(other.Start, item.Start) <= 0
            && comparer.Compare(other.End, item.End) >= 0)
        { // other fully swallows item
            return other;
        }
        if (comparer.Compare(item.Start, other.Start) <= 0
            && comparer.Compare(item.End, other.Start) >= 0
            && comparer.Compare(item.End, other.End) <= 0)
        { // partial overlap
            return new Range<T>(item.Start, other.End);
        }
        if (comparer.Compare(other.Start, item.Start) <= 0
             && comparer.Compare(other.End, item.Start) >= 0
            && comparer.Compare(other.End, item.End) <= 0)
        { // partial overlap
            return new Range<T>(other.Start, item.End);
        }
        return null;
    }
}
public struct Range<T>
{
    private readonly T start, end;
    public T Start { get { return start; } }
    public T End { get { return end; } }
    public Range(T start, T end)
    {
        this.start = start;
        this.end = end;
    }
    public override string ToString()
    {
        return start + " to " + end;
    }
}

static class Program
{
    static void Main()
    {
        var data = new[] 
        {
            Range.Create(1,5), Range.Create(3,9),
            Range.Create(11,15), Range.Create(12,14),
            Range.Create(13,20)
        };
        var result = data.Normalize();
        foreach (var item in result)
        {
            Console.WriteLine(item);
        }
    }
}


回答6:

A ruby version. Sort the ranges before merge seems to be a good idea.

def merge a , b
    return b if a.nil?
    if b.begin <= a.end
        (a.begin..b.end)
    el
        [a , b ]     #no overlap
    end
end

ranges = [(1..5),(11..15),(3..9),(12..14),(13..20)]
sorted_ranges = ranges.sort_by {|r| r.begin}   #sorted by the start of the range

merged_ranges = sorted_ranges.inject([]) do |m , r|
       last = m.pop
       m << merge(last , r)   
       m.flatten
end

puts merged_ranges


回答7:

Tossing another hat into the ring. Very much the same implementation as Gary W's (from which I got the sorted list approach), but done as a test case and with some helpful functions added to the Range class.

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;

import edu.emory.mathcs.backport.java.util.Collections;

import junit.framework.TestCase;

public class Range2Test extends TestCase {
    public void testCollapse() throws Exception {
        Set<Range<Integer>> set = new HashSet<Range<Integer>>();
        set.add(new Range<Integer>(1, 5));
        set.add(new Range<Integer>(3, 9));
        set.add(new Range<Integer>(11, 15));
        set.add(new Range<Integer>(12, 14));
        set.add(new Range<Integer>(13, 20));
        Set<Range<Integer>> expected = new HashSet<Range<Integer>>();
        expected.add(new Range<Integer>(1, 9));
        expected.add(new Range<Integer>(11, 20));
        assertEquals(expected, collapse(set));
    }

    private static <T extends Comparable<T>> Set<Range<T>> collapse(Set<Range<T>> ranges) {
        if (ranges == null)
            return null;
        if (ranges.size() < 2)
            return new HashSet<Range<T>>(ranges);
        ArrayList<Range<T>> list = new ArrayList<Range<T>>(ranges);
        Collections.sort(list);
        Set<Range<T>> result = new HashSet<Range<T>>();
        Range<T> r = list.get(0);
        for (Range<T> range : list) 
            if (r.overlaps(range)) {
                r = r.union(range);
            } else {
                result.add(r);
                r = range;
            }
        result.add(r);
        return result;
    }

    private static class Range<T extends Comparable<T>> implements Comparable<Range<T>> {
        public Range(T start, T end) {
            if (start == null || end == null)
                throw new NullPointerException("Range requires start and end.");
            this.start = start;
            this.end = end;
        }
        public T    start;
        public T    end;

        private boolean contains(T t) {
            return start.compareTo(t) <= 0 && t.compareTo(end) <= 0;
        }

        public boolean overlaps(Range<T> that) {
            return this.contains(that.start) || that.contains(this.start);
        }

        public Range<T> union(Range<T> that) {
            T start = this.start.compareTo(that.start) < 0 ? this.start : that.start;
            T end = this.end.compareTo(that.end) > 0 ? this.end : that.end;
            return new Range<T>(start, end);
        }

        public String toString() {
            return String.format("%s - %s", start, end);
        }

        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + end.hashCode();
            result = prime * result + start.hashCode();
            return result;
        }

        @SuppressWarnings("unchecked")
        public boolean equals(Object obj) {
        if (this == obj)                    return true;
        if (obj == null)                    return false;
        if (getClass() != obj.getClass())   return false;
        Range<T> that = (Range<T>) obj;
        return end.equals(that.end) && start.equals(that.start);
        }

        public int compareTo(Range<T> that) {
            int result = this.start.compareTo(that.start);
            if (result != 0)
                return result;
            return this.end.compareTo(that.end);
        }
    }
}


回答8:

Here is a simple looping impelmentation, but at least is clear.

  • It works for DateTime as well as Int, in my simple tests
  • Most of the complexity is in the Overlap/Combine methods on the range
  • The algorithm is actually easily understandable, no floating vars
  • Adds some ability to the Range class which is probably useful in general

-- this line intentionally meaningless, to fix markdown problem --

public static class CollapseRange
{
    public static IEnumerable<Range<T>> Collapse<T>(this IEnumerable<Range<T>> me)
        where T:struct
    {
        var result = new List<Range<T>>();
        var sorted = me.OrderBy(x => x.Start).ToList();
        do {
            var first = sorted.FirstOrDefault();
            sorted.Remove(first);
            while (sorted.Any(x => x.Overlap(first))) {
                var other = sorted.FirstOrDefault(x => x.Overlap(first));
                first = first.Combine(other);
                sorted.Remove(other);
            }
            result.Add(first);
        } while (sorted.Count > 0);
        return result;
    }
}

[DebuggerDisplay("Range {Start} - {End}")]
public class Range<T> where T : struct
{
    public T Start { set; get; }
    public T End { set; get; }
    public bool Overlap(Range<T> other)
    {
        return (Within(other.Start) || Within(other.End) || other.Within(this.Start) || other.Within(this.End));
    }
    public bool Within(T point)
    {
        var Comp = Comparer<T>.Default;
        var st = Comp.Compare(point, this.Start);
        var ed = Comp.Compare(this.End, point);
        return (st >= 0 && ed >= 0);
    }
    /// <summary>Combines to ranges, updating the current range</summary>
    public void Merge(Range<T> other)
    {
        var Comp = Comparer<T>.Default;
        if (Comp.Compare(this.Start, other.Start) > 0) this.Start = other.Start;
        if (Comp.Compare(other.End, this.End) > 0) this.End = other.End;
    }
    /// <summary>Combines to ranges, returning a new range in their place</summary>
    public Range<T> Combine(Range<T> other)
    {
        var Comp = Comparer<T>.Default;
        var newRange = new Range<T>() { Start = this.Start, End = this.End };
        newRange.Start = (Comp.Compare(this.Start, other.Start) > 0) ? other.Start : this.Start;
        newRange.End = (Comp.Compare(other.End, this.End) > 0) ? other.End : this.End;
        return newRange;
    }
}


回答9:

Algorithm in Go based on the Python answer:

package main

import "sort"
import "fmt"

type TupleList [][]int

// Methods required by sort.Interface.
func (s TupleList) Len() int {
    return len(s)
}
func (s TupleList) Less(i, j int) bool {
    return s[i][1] < s[j][1]
}
func (s TupleList) Swap(i, j int) {
    s[i], s[j] = s[j], s[i]
}

func main() {

    ranges :=
        TupleList{
            {11, 15},
            {3, 9},
            {12, 14},
            {13, 20},
            {1, 5}}

    fmt.Print(ranges)
    sort.Sort(ranges)
    fmt.Print("\n")
    fmt.Print(ranges)
    fmt.Print("\n")
    result := TupleList{}

    var cur []int
    for _, t := range ranges {
        if cur == nil {
            cur = t
            continue
        }
        cStart, cStop := cur[0], cur[1]
        if t[0] <= cStop {
            cur = []int{cStart, max(t[1], cStop)}
        } else {
            result = append(result, cur)
            cur = t
        }
    }
    result = append(result, cur)
    fmt.Print(result)
}

func max(v1, v2 int) int {
    if v1 <= v2 {
        return v2
    }
    return v1
}


回答10:

This is a slight variation. I didn't need to collapse an unordered list, I wanted to maintain a sorted list instead. This is more efficient in my case. I am posting it here in case it is useful to anyone else reading this thread. Obviously can be made generic very easily.

        private static List<Tuple<int, int>> Insert(List<Tuple<int, int>> ranges, int startIndex, int endIndex)
        {
            if (ranges == null || ranges.Count == 0)
                return new List<Tuple<int, int>> { new Tuple<int, int>(startIndex, endIndex) };

            var newIndex = ranges.Count;
            for (var i = 0; i < ranges.Count; i++)
            {
                if (ranges[i].Item1 > startIndex)
                {
                    newIndex = i;
                    break;
                }
            }

            var min = ranges[0].Item1;
            var max = ranges[0].Item2;

            var newRanges = new List<Tuple<int, int>>();
            for (var i = 0; i <= ranges.Count; i++)
            {
                int rangeStart;
                int rangeEnd;
                if (i == newIndex)
                {
                    rangeStart = startIndex;
                    rangeEnd = endIndex;
                }
                else
                {
                    var range = ranges[i > newIndex ? i - 1 : i];
                    rangeStart = range.Item1;
                    rangeEnd = range.Item2;
                }

                if (rangeStart > max && rangeEnd > max)
                {
                    newRanges.Add(new Tuple<int, int>(min, max));
                    min = rangeStart;
                }
                max = rangeEnd > max ? rangeEnd : max;
            }
            newRanges.Add(new Tuple<int, int>(min, max));

            return newRanges;
        }