How to efficiently shuffle edges in a graph

2019-02-16 00:23发布

问题:

I am writing a code to shuffle the edges of a graph according to the Configuration Model. In essence, two edges [(v1,v2) and (v3,v4)] are randomly chosen and swapped [yielding (v1,v3) and (v2,v4)] if

  • no self-edge is created [v1 is not v3, and v2 is not v4];
  • no multi-edge is created [the edges (v1,v3) and (v2,v4) did not already existed].

I wrote the following code to achieve this

// Instantiates an empty undirected graph.
typedef boost::adjacency_list< boost::setS,
                               boost::vecS,
                               boost::undirectedS > graph_t;
graph_t graph(9);

// Adds edges to the graph.
boost::add_edge(0, 1, graph);  boost::add_edge(0, 3, graph);
boost::add_edge(0, 5, graph);  boost::add_edge(0, 7, graph);
boost::add_edge(1, 2, graph);  boost::add_edge(2, 3, graph);
boost::add_edge(2, 4, graph);  boost::add_edge(4, 8, graph);
boost::add_edge(5, 7, graph);  boost::add_edge(5, 8, graph);
boost::add_edge(6, 7, graph);  boost::add_edge(7, 8, graph);

// Number of edges.
unsigned int nb_edges = boost::num_edges(graph);

// Defines a function that give a random edge.
std::random_device rd;
std::mt19937 engine(rd());
std::uniform_int_distribution<int> get_rand_edge(0, nb_edges - 1);

// Descriptors and iterators.
graph_t::vertex_descriptor v1, v2, v3, v4;
graph_t::edge_iterator e1_it, e2_it, e_end;

// Shuffles the edges, with the condition of not creating multiple edges or self-loops.
unsigned int nb_edge_swaps(0);
while(nb_edge_swaps < 10 * nb_edges)
{
  // Gets the first edge.
  std::tie(e1_it, e_end) = boost::edges(graph);
  std::advance(e1_it, get_rand_edge(engine));
  v1 = boost::source(*e1_it, graph);
  v2 = boost::target(*e1_it, graph);

  // Gets the second edge.
  std::tie(e2_it, e_end) = boost::edges(graph);
  std::advance(e2_it, get_rand_edge(engine));
  v3 = boost::source(*e2_it, graph);
  v4 = boost::target(*e2_it, graph);

  // Avoids self-loops.
  if((v1 != v3) && (v2 != v4))
  {
    // Avoids multiple edge.
    if(boost::edge(v1, v3, graph).second == false)
    {
      // Avoids multiple edge.
      if(boost::edge(v2, v4, graph).second == false)
      {
        // Destroys the old edges.
        boost::remove_edge(*e1_it, graph);
        boost::remove_edge(boost::edge(v3, v4, graph).first, graph);
        // Creates the new edges.
        boost::add_edge(v1, v3, graph);
        boost::add_edge(v2, v4, graph);
        // Counts the number of changes.
        ++nb_edge_swaps;
      }
    }
  }
}

which seems to work quite well, albeit slowly. I was wondering if there were another clever way to achieve the same task more efficiently. I would like the solution to use the Boost Graph Library, but any ideas are welcomed. Thanks!

回答1:

Without much guidance, I went and created some comparative benchmarks. Timings wih 90 vertices and 120 edges:

Full sample details (click for interactive charts):

Turns out my intuition about adjacency matrix being faster came out quite the opposite:

I assume it can be fixed by creating a specialized approach to selecting a random edge¹. I'll leave that as an exercise for the reader now.

Benchmark Code

Using https://github.com/rmartinho/nonius

#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/adjacency_matrix.hpp>
#include <boost/graph/edge_list.hpp>
#include <boost/graph/random.hpp>
#include <boost/graph/graphviz.hpp>
#include <boost/container/flat_set.hpp>
#include <nonius/benchmark.h++>

namespace edge_list_detail {
    struct edge { 
        using first_type = size_t;
        using second_type = size_t;
        first_type  s;
        second_type t;

        edge(first_type s, second_type t) : s(std::min(s,t)), t(std::max(s,t)) { assert(s!=t); }
        bool operator<(edge const& other) const { return std::tie(s,t) < std::tie(other.s, other.t); }
    };

    using node_based_set = std::set<edge>;
    using flat_set       = boost::container::flat_set<edge>;

    void reserve(node_based_set const&, size_t) {}
    void reserve(flat_set& c, size_t n) { c.reserve(n); }

    void erase_two(node_based_set& from, node_based_set::iterator e1, node_based_set::iterator e2) {
        from.erase(e1);
        from.erase(e2);
    }

    void erase_two(flat_set& from, flat_set::iterator e1, flat_set::iterator e2) {
        if (e2<e1) std::swap(e1, e2);
        from.erase(e2); // invalidates higher iterators
        from.erase(e1);
    }
}

typedef boost::adjacency_list   < boost::setS, boost::vecS, boost::undirectedS > adj_list_t;
typedef boost::adjacency_matrix < boost::undirectedS                           > adj_mat_t;

static std::mt19937 engine(std::random_device{}());

static auto const sample_adj_list = [] {
    using namespace boost;
    adj_list_t graph(90);
    generate_random_graph(graph, 90, 120, engine);
    {
        std::ofstream ofs("/tmp/raw.dot");
        write_graphviz(ofs, graph);
    }

    return graph;
}();

static auto const sample_adj_mat = [] {
    using namespace boost;
    adj_mat_t graph(num_vertices(sample_adj_list));
    for (auto e : make_iterator_range(edges(sample_adj_list))) {
        add_edge(source(e, sample_adj_list), target(e, sample_adj_list), graph);
    }
    return graph;
}();

template <typename graph_t> auto nth_edge(graph_t& graph, size_t n) {
    return std::next(boost::edges(graph).first, n);
}
auto nth_edge(edge_list_detail::node_based_set& lst, size_t n) {
    return std::next(lst.begin(), n);
}
auto nth_edge(edge_list_detail::flat_set& lst, size_t n) {
    return std::next(lst.begin(), n);
}

template <typename graph_t> void OP_algo(nonius::chronometer& cm, graph_t graph) {
    // Number of edges.
    cm.measure([&] {
        unsigned int nb_edges = boost::num_edges(graph);

        // Defines a function that give a random edge.
        std::uniform_int_distribution<int> get_rand_edge(0, nb_edges - 1);

        // Descriptors and iterators.
        typename graph_t::vertex_descriptor v1, v2, v3, v4;
        typename graph_t::edge_iterator e1_it, e2_it, e_end;

        // Shuffles the edges, with the condition of not creating multiple edges or self-loops.
        unsigned int nb_edge_swaps(0);
        while(nb_edge_swaps < 10 * nb_edges)
        {
            {
                e1_it = nth_edge(graph, get_rand_edge(engine));
                v1 = boost::source(*e1_it, graph);
                v2 = boost::target(*e1_it, graph);

                e2_it = nth_edge(graph, get_rand_edge(engine));
                v3 = boost::source(*e2_it, graph);
                v4 = boost::target(*e2_it, graph);
            }

            // Avoids self-loops.
            if((v1 != v3) && (v2 != v4))
            {
                // Avoids multiple edge.
                if(boost::edge(v1, v3, graph).second == false)
                {
                    // Avoids multiple edge.
                    if(boost::edge(v2, v4, graph).second == false)
                    {
                        // Destroys the old edges.
                        boost::remove_edge(*e1_it, graph);
                        boost::remove_edge(boost::edge(v3, v4, graph).first, graph);
                        // Creates the new edges.
                        boost::add_edge(v1, v3, graph);
                        boost::add_edge(v2, v4, graph);
                        // Counts the number of changes.
                        ++nb_edge_swaps;
                    }
                }
            }
        }
        return;
        {
            std::ofstream ofs("/tmp/shuffled.dot");
            boost::write_graphviz(ofs, graph);
        }
    });

}

template <typename list_t> void edge_list_algo(nonius::chronometer& cm, list_t& lst) {
    cm.measure([&] {
        unsigned int nb_edges = lst.size();

        // Defines a function that give a random edge.
        std::uniform_int_distribution<int> get_rand_edge(0, nb_edges - 1);

        // Shuffles the edges, with the condition of not creating multiple edges or self-loops.
        unsigned int nb_edge_swaps(0);
        while(nb_edge_swaps < 10 * nb_edges)
        {
            auto e1 = nth_edge(lst, get_rand_edge(engine));
            auto v1 = e1->s;
            auto v2 = e1->t;

            auto e2 = nth_edge(lst, get_rand_edge(engine));
            auto v3 = e2->s;
            auto v4 = e2->t;

            // Avoids self-loops.
            // Avoids multiple edge.
            if ((v1 == v3) || (v2 == v4) || lst.count({v1,v3}) || lst.count({v2,v4}))
                continue;

            // swap edges
            edge_list_detail::erase_two(lst, e1, e2);
            lst.emplace(v1, v3);
            lst.emplace(v2, v4);

            // Counts the number of changes.
            ++nb_edge_swaps;
        }
        return;
    });

}

template <typename edge_list>
void edge_list_config(nonius::chronometer& cm) {
        using namespace boost;
        edge_list lst;
        {
            edge_list_detail::reserve(lst, num_edges(sample_adj_list));
            for (auto e : make_iterator_range(edges(sample_adj_list))) {
                lst.emplace(source(e, sample_adj_list), target(e, sample_adj_list));
            }
        }
        edge_list_algo(cm, lst); 

        typedef boost::edge_list<typename edge_list::iterator> graph_t;
        graph_t graph(lst.begin(), lst.end());
        {
            std::ofstream ofs("/tmp/edge_list.dot");
            //boost::write_graphviz(ofs, graph);
        }
}

NONIUS_BENCHMARK("original_adj_list",   [](nonius::chronometer cm) { OP_algo(cm,        sample_adj_list);        });
NONIUS_BENCHMARK("original_adj_matrix", [](nonius::chronometer cm) { OP_algo(cm,        sample_adj_mat);         });
NONIUS_BENCHMARK("node_based_edge_list",[](nonius::chronometer cm) { edge_list_config<edge_list_detail::node_based_set>(cm); });
NONIUS_BENCHMARK("flat_edge_list",      [](nonius::chronometer cm) { edge_list_config<edge_list_detail::flat_set>(cm); });

#define NONIUS_RUNNER
#include <nonius/main.h++>

To create the graphs:

./test -r html -o stats.html

¹ (nth_edge below is generic and not efficient for adjacency_matrix).