Concurrent Prime Generator

2020-02-14 03:48发布

问题:

I'm going through the problems on projecteuler.net to learn how to program in Erlang, and I am having the hardest time creating a prime generator that can create all of the primes below 2 million, in less than a minute. Using the sequential style, I have already written three types of generators, including the Sieve of Eratosthenes, and none of them perform well enough.

I figured a concurrent Sieve would work great, but I'm getting bad_arity messages, and I'm not sure why. Any suggestions on why I have the problem, or how to code it properly?

Here's my code, the commented out sections are where I tried to make things concurrent:

-module(primeserver).
-compile(export_all).

start() ->
    register(primes, spawn(fun() -> loop() end)).

is_prime(N) -> rpc({is_prime,N}).

rpc(Request) ->
    primes ! {self(), Request},
    receive
        {primes, Response} ->
            Response
    end.

loop() ->
    receive
        {From, {is_prime, N}} ->
            if
                N  From ! {primes, false};
                N =:= 2 -> From ! {primes, true};
                N rem 2 =:= 0 -> From ! {primes, false};
                true ->
                    Values = is_not_prime(N),
                    Val = not(lists:member(true, Values)),
                    From ! {primes, Val}
            end,
            loop()
    end.

for(N,N,_,F) -> [F(N)];
for(I,N,S,F) when I + S  [F(I)|for(I+S, N, S, F)];
for(I,N,S,F) when I + S =:= N -> [F(I)|for(I+S, N, S, F)];
for(I,N,S,F) when I + S > N -> [F(I)].

get_list(I, Limit) ->
    if
        I 
            [I*A || A 
            []
    end.

is_not_prime(N) ->
    for(3, N, 2, 
        fun(I) -> 
            List = get_list(I,trunc(N/I)),
            lists:member(N,lists:flatten(List))
        end
        ).

    %%L = for(1,N, fun() -> spawn(fun(I) -> wait(I,N) end) end),
    %%SeedList = [A || A  
    %%      lists:foreach(fun(X) ->
    %%              Pid ! {in_list, X} 
    %%                end, SeedList)
    %%        end, L).

%%wait(I,N) ->
%%  List = [I*A || A  lists:member(X,List)
%%  end.

回答1:

The 'badarity' error means that you're trying to call a 'fun' with the wrong number of arguments. In this case...

%%L = for(1,N, fun() -> spawn(fun(I) -> wait(I,N) end) end),

The for/3 function expects a fun of arity 1, and the spawn/1 function expects a fun of arity 0. Try this instead:

L = for(1, N, fun(I) -> spawn(fun() -> wait(I, N) end) end),

The fun passed to spawn inherits needed parts of its environment (namely I), so there's no need to pass it explicitly.

While calculating primes is always good fun, please keep in mind that this is not the kind of problem Erlang was designed to solve. Erlang was designed for massive actor-style concurrency. It will most likely perform rather badly on all examples of data-parallel computation. In many cases, a sequential solution in, say, ML will be so fast that any number of cores will not suffice for Erlang to catch up, and e.g. F# and the .NET Task Parallel Library would certainly be a much better vehicle for these kinds of operations.



回答2:

I wrote an Eratosthenesque concurrent prime sieve using the Go and channels.

Here is the code: http://github.com/aht/gosieve

I blogged about it here: http://blog.onideas.ws/eratosthenes.go

The program can sieve out the first million primes (all primes upto 15,485,863) in about 10 seconds. The sieve is concurrent, but the algorithm is mainly synchronous: there are far too many synchronization points required between goroutines ("actors" -- if you like) and thus they can not roam freely in parallel.



回答3:

Another alternative to consider is to use probabalistic prime generation. There is an example of this in Joe's book (the "prime server") which uses Miller-Rabin I think...



回答4:

You can find four different Erlang implementations for finding prime numbers (two of which are based on the Sieve of Eratosthenes) here. This link also contains graphs comparing the performance of the 4 solutions.



回答5:

The Sieve of Eratosthenes is fairly easy to implement but -- as you have discovered -- not the most efficient. Have you tried the Sieve of Atkin?

Sieve of Atkin @ Wikipedia



回答6:

Primes parallel algorithm : http://www.cs.cmu.edu/~scandal/cacm/node8.html



回答7:

Two quick single-process erlang prime generators; sprimes generates all primes under 2m in ~2.7 seconds, fprimes ~3 seconds on my computer (Macbook with a 2.4 GHz Core 2 Duo). Both are based on the Sieve of Eratosthenes, but since Erlang works best with lists, rather than arrays, both keep a list of non-eliminated primes, checking for divisibility by the current head and keeping an accumulator of verified primes. Both also implement a prime wheel to do initial reduction of the list.

-module(primes).
-export([sprimes/1, wheel/3, fprimes/1, filter/2]).    

sieve([H|T], M) when H=< M -> [H|sieve([X || X<- T, X rem H /= 0], M)];
sieve(L, _) -> L.
sprimes(N) -> [2,3,5,7|sieve(wheel(11, [2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,2,4,6,2,6,4,2,4,2,10,2,10], N), math:sqrt(N))].

wheel([X|Xs], _Js, M) when X > M ->
    lists:reverse(Xs);
wheel([X|Xs], [J|Js], M) ->
    wheel([X+J,X|Xs], lazy:next(Js), M);
wheel(S, Js, M) ->
    wheel([S], lazy:lazy(Js), M).

fprimes(N) ->
    fprimes(wheel(11, [2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,2,4,6,2,6,4,2,4,2,10,2,10], N), [7,5,3,2], N).
fprimes([H|T], A, Max) when H*H =< Max ->
    fprimes(filter(H, T), [H|A], Max);
fprimes(L, A, _Max) -> lists:append(lists:reverse(A), L).

filter(N, L) ->
    filter(N, N*N, L, []).
filter(N, N2, [X|Xs], A) when X < N2 ->
    filter(N, N2, Xs, [X|A]);
filter(N, _N2, L, A) ->
    filter(N, L, A).
filter(N, [X|Xs], A) when X rem N /= 0 ->
    filter(N, Xs, [X|A]);
filter(N, [_X|Xs], A) ->
    filter(N, Xs, A);
filter(_N, [], A) ->
    lists:reverse(A).

lazy:lazy/1 and lazy:next/1 refer to a simple implementation of pseudo-lazy infinite lists:

lazy(L) ->
    repeat(L).

repeat(L) -> L++[fun() -> L end].

next([F]) -> F()++[F];
next(L) -> L.

Prime generation by sieves is not a great place for concurrency (but it could use parallelism in checking for divisibility, although the operation is not sufficiently complex to justify the additional overhead of all parallel filters I have written thus far).

`



回答8:

Project Euler problems (I'd say most of the first 50 if not more) are mostly about brute force with a splash of ingenuity in choosing your bounds.

Remember to test any if N is prime (by brute force), you only need to see if its divisible by any prime up to floor(sqrt(N)) + 1, not N/2.

Good luck



回答9:

I love Project Euler.

On the subject of prime generators, I am a big fan of the Sieve of Eratosthenes.

For the purposes of the numbers under 2,000,000 you might try a simple isPrime check implementation. I don't know how you'd do it in erlang, but the logic is simple.

For Each NUMBER in LIST_OF_PRIMES
     If TEST_VALUE % NUMBER == 0
          Then FALSE
END
TRUE

if isPrime == TRUE add TEST_VALUE to your LIST_OF_PRIMES

iterate starting at 14 or so with a preset list of your beginning primes. 

c# ran a list like this for 2,000,000 in well under the 1 minute mark

Edit: On a side note, the sieve of Eratosthenes can be implemented easily and runs quickly, but gets unwieldy when you start getting into huge lists. The simplest implementation, using a boolean array and int values runs extremely quickly. The trouble is that you begin running into limits for the size of your value as well as the length of your array. -- Switching to a string or bitarray implementation helps, but you still have the challenge of iterating through your list at large values.



回答10:

here is a vb version

    'Sieve of Eratosthenes 
'http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes 
'1. Create a contiguous list of numbers from two to some highest number n. 
'2. Strike out from the list all multiples of two (4, 6, 8 etc.). 
'3. The list's next number that has not been struck out is a prime number. 
'4. Strike out from the list all multiples of the number you identified in the previous step. 
'5. Repeat steps 3 and 4 until you reach a number that is greater than the square root of n (the highest number in the list). 
'6. All the remaining numbers in the list are prime. 
Private Function Sieve_of_Eratosthenes(ByVal MaxNum As Integer) As List(Of Integer)
    'tested to MaxNum = 10,000,000 - on 1.8Ghz Laptop it took 1.4 seconds
    Dim thePrimes As New List(Of Integer)
    Dim toNum As Integer = MaxNum, stpw As New Stopwatch
    If toNum > 1 Then 'the first prime is 2
        stpw.Start()
        thePrimes.Capacity = toNum 'size the list
        Dim idx As Integer
        Dim stopAT As Integer = CInt(Math.Sqrt(toNum) + 1)
        '1. Create a contiguous list of numbers from two to some highest number n.
        '2. Strike out from the list all multiples of 2, 3, 5. 
        For idx = 0 To toNum
            If idx > 5 Then
                If idx Mod 2 <> 0 _
                AndAlso idx Mod 3 <> 0 _
                AndAlso idx Mod 5 <> 0 Then thePrimes.Add(idx) Else thePrimes.Add(-1)
            Else
                thePrimes.Add(idx)
            End If
        Next
        'mark 0,1 and 4 as non-prime
        thePrimes(0) = -1
        thePrimes(1) = -1
        thePrimes(4) = -1
        Dim aPrime, startAT As Integer
        idx = 7 'starting at 7 check for primes and multiples 
        Do
            '3. The list's next number that has not been struck out is a prime number. 
            '4. Strike out from the list all multiples of the number you identified in the previous step. 
            '5. Repeat steps 3 and 4 until you reach a number that is greater than the square root of n (the highest number in the list). 
            If thePrimes(idx) <> -1 Then ' if equal to -1 the number is not a prime
                'not equal to -1 the number is a prime
                aPrime = thePrimes(idx)
                'get rid of multiples 
                startAT = aPrime * aPrime
                For mltpl As Integer = startAT To thePrimes.Count - 1 Step aPrime
                    If thePrimes(mltpl) <> -1 Then thePrimes(mltpl) = -1
                Next
            End If
            idx += 2 'increment index 
        Loop While idx < stopAT
        '6. All the remaining numbers in the list are prime. 
        thePrimes = thePrimes.FindAll(Function(i As Integer) i <> -1)
        stpw.Stop()
        Debug.WriteLine(stpw.ElapsedMilliseconds)
    End If
    Return thePrimes
End Function