Python - Set the first element of a generator - Ap

2019-08-22 16:06发布

What I want to achieve is to generate combinations in different processes to get them faster.

In this post there'a a method to get the kth combination of a set of n numbers in groups of p numbers.

Once I have the kth combination, I can manage to generate the rest of combinations in different processes (dividing the total number of combinations between processes so I know the combination each process has to begin with).

The way I've done this has been with the same function that gets the kth combination, inside a for loop between the first and the last combination the process has to generate.

This works, but it's quite slow, as it's running inside a for loop (so slow that it's much faster to generate all those combinations in just one process with itertools.combinations() instead of the previous method running in 8 different processes).

After that, I wonder if there's a way to set the first element a generator has to begin with, so I could get the beneficts of both methods (get the first combination each process has to begin with the kth combination method, and the speed of itertools.combinations()).

EDIT: code added

import time
import operator as op
from math import factorial
from itertools import combinations
from multiprocessing import Pool


def nCr(n, r):

    # https://stackoverflow.com/a/4941932/1167783

    r = min(r, n-r)
    if r == 0: 
        return 1
    numer = reduce(op.mul, xrange(n, n-r, -1))
    denom = reduce(op.mul, xrange(1, r+1))
    return numer // denom


def kthCombination(k, l, r):

    # https://stackoverflow.com/a/1776884/1167783

    if r == 0:
        return []
    elif len(l) == r:
        return l
    else:
        i = nCr(len(l)-1, r-1)
        if k < i:
            return l[0:1] + kthCombination(k, l[1:], r-1)
        else:
            return kthCombination(k-i, l[1:], r)


def create_dividing_list(num_total, num_div):

    div = num_total / num_div
    remaining = num_total - div * num_div
    dividing_list = []

    for i in xrange(0, remaining*(div+1), div+1):
        dividing_list.append([i, i+div+1])

    for i in xrange(remaining*(div+1), num_total, div):
        dividing_list.append([i, i+div])

    return dividing_list


def comb_thread(comb_initial, comb_final, numbers_list, p):

    for comb in xrange(comb_initial, comb_final):


        # Do something, for example, store those combinations
        # For timing i'm going to do something simple
        x = kthCombination(comb, numbers_list, p)


def comb_main(n, p):

    cpus = 8

    numbers_list = [i for i in range(n)]

    total_comb_number = factorial(n)/(factorial(p)*factorial(n-p))

    if cpus < len(numbers_list):
        # List style >>  [['comb_initial_1', 'comb_final_1'], ['comb_initial_2', 'comb_final_2'], ... ]
        dividing_list = create_dividing_list(total_comb_number, cpus)
    else:
        # List style >> [['comb_initial_1', 'comb_final_1'], ['comb_initial_2', 'comb_final_2'], ... ]
        dividing_list = create_dividing_list(total_comb_number, len(numbers_list))


    if cpus >= len(numbers_list):
        pool = Pool(processes=len(numbers_list))
        for i in range(len(dividing_list)):
            pool.apply_async(comb_thread, [dividing_list[i][0], dividing_list[i][1], numbers_list, p])
    else:
        pool = Pool(processes=cpus)
        for i in range(cpus):
            pool.apply_async(comb_thread, [dividing_list[i][0], dividing_list[i][1], numbers_list, p])

    pool.close()
    pool.join()

    print '\n'


def iter(n, p):

    for i in combinations([i for i in range(n)], p):

    # Do something, for example, store those combinations
    # For timing i'm going to do something simple
        x = i



if __name__ == "__main__":  

    n = 20
    p = 10

    print '%s combinations' % (factorial(n)/(factorial(p)*factorial(n-p)))

    t0_mult = time.time()

    comb_main(n, p)

    t1_mult = time.time()
    total_mult = t1_mult - t0_mult


    t0_iter = time.time()

    iter(n, p)

    t1_iter = time.time()
    total_iter = t1_iter - t0_iter

    print 'Multiprocessing: %s' %total_mult
    print 'Itertools: %s' %total_iter

0条回答
登录 后发表回答