What I want to achieve is to generate combinations in different processes to get them faster.
In this post there'a a method to get the kth combination of a set of n numbers in groups of p numbers.
Once I have the kth combination, I can manage to generate the rest of combinations in different processes (dividing the total number of combinations between processes so I know the combination each process has to begin with).
The way I've done this has been with the same function that gets the kth combination, inside a for loop between the first and the last combination the process has to generate.
This works, but it's quite slow, as it's running inside a for loop (so slow that it's much faster to generate all those combinations in just one process with itertools.combinations() instead of the previous method running in 8 different processes).
After that, I wonder if there's a way to set the first element a generator has to begin with, so I could get the beneficts of both methods (get the first combination each process has to begin with the kth combination method, and the speed of itertools.combinations()).
EDIT: code added
import time
import operator as op
from math import factorial
from itertools import combinations
from multiprocessing import Pool
def nCr(n, r):
# https://stackoverflow.com/a/4941932/1167783
r = min(r, n-r)
if r == 0:
return 1
numer = reduce(op.mul, xrange(n, n-r, -1))
denom = reduce(op.mul, xrange(1, r+1))
return numer // denom
def kthCombination(k, l, r):
# https://stackoverflow.com/a/1776884/1167783
if r == 0:
return []
elif len(l) == r:
return l
else:
i = nCr(len(l)-1, r-1)
if k < i:
return l[0:1] + kthCombination(k, l[1:], r-1)
else:
return kthCombination(k-i, l[1:], r)
def create_dividing_list(num_total, num_div):
div = num_total / num_div
remaining = num_total - div * num_div
dividing_list = []
for i in xrange(0, remaining*(div+1), div+1):
dividing_list.append([i, i+div+1])
for i in xrange(remaining*(div+1), num_total, div):
dividing_list.append([i, i+div])
return dividing_list
def comb_thread(comb_initial, comb_final, numbers_list, p):
for comb in xrange(comb_initial, comb_final):
# Do something, for example, store those combinations
# For timing i'm going to do something simple
x = kthCombination(comb, numbers_list, p)
def comb_main(n, p):
cpus = 8
numbers_list = [i for i in range(n)]
total_comb_number = factorial(n)/(factorial(p)*factorial(n-p))
if cpus < len(numbers_list):
# List style >> [['comb_initial_1', 'comb_final_1'], ['comb_initial_2', 'comb_final_2'], ... ]
dividing_list = create_dividing_list(total_comb_number, cpus)
else:
# List style >> [['comb_initial_1', 'comb_final_1'], ['comb_initial_2', 'comb_final_2'], ... ]
dividing_list = create_dividing_list(total_comb_number, len(numbers_list))
if cpus >= len(numbers_list):
pool = Pool(processes=len(numbers_list))
for i in range(len(dividing_list)):
pool.apply_async(comb_thread, [dividing_list[i][0], dividing_list[i][1], numbers_list, p])
else:
pool = Pool(processes=cpus)
for i in range(cpus):
pool.apply_async(comb_thread, [dividing_list[i][0], dividing_list[i][1], numbers_list, p])
pool.close()
pool.join()
print '\n'
def iter(n, p):
for i in combinations([i for i in range(n)], p):
# Do something, for example, store those combinations
# For timing i'm going to do something simple
x = i
if __name__ == "__main__":
n = 20
p = 10
print '%s combinations' % (factorial(n)/(factorial(p)*factorial(n-p)))
t0_mult = time.time()
comb_main(n, p)
t1_mult = time.time()
total_mult = t1_mult - t0_mult
t0_iter = time.time()
iter(n, p)
t1_iter = time.time()
total_iter = t1_iter - t0_iter
print 'Multiprocessing: %s' %total_mult
print 'Itertools: %s' %total_iter