Erlang; list comprehension without duplicates

2019-08-07 14:10发布

I am doing somthing horrible but I don't know how to make it better.

I am forming all pairwise sums of the elements of a List called SomeList, but I don't want to see duplicates ( I guess I want "all possible pairwise sums" ):

sets:to_list(sets:from_list([A+B || A <- SomeList, B <- SomeList]))

SomeList does NOT contain duplicates.

This works, but is horribly inefficient, because the original list before the set conversion is GIGANTIC.

Is there a better way to do this?

标签: erlang
4条回答
Summer. ? 凉城
2楼-- · 2019-08-07 14:16

An effective way is to use foldl instead of lists comprehension, because in this case you nedd a state on each step

sets:to_list(
    lists:foldl(fun(A, S1) -> 
        lists:foldl(fun(B, S2) -> 
            sets:add_element(A+B, S2) 
        end, S1, SomeListA) 
    end, sets:new(), SomeListB)).
查看更多
▲ chillily
3楼-- · 2019-08-07 14:25

This solution keeps it relatively fast and makes use of as much pre-written library code as possible.

Note that I use lists:zip/2 here rather than numeric +, only to illustrate that this approach works for any kind of non-repeating permutation of a unique list. You may only care about arithmetic, but if you want more, this can do it.

-export([permute_unique/1]).
permute_unique([]) ->
    [];
permute_unique([A|Ab]) ->
    lists:zip(lists:duplicate(length(Ab)+1, A), [A|Ab])
    ++
    permute_unique(Ab).

%to sum integers, replace the lists:zip... line with
%   [B+C || {B,C} <- lists:zip(lists:duplicate(length(Ab)+1, A), [A|Ab])]
%to perform normal arithmetic and yield a numeric value for each element

I am not sure what you consider gigantic - you will end up with N*(N+1)/2 total elements in the permuted list for a unique list of N original elements, so this gets big really fast.

I did some basic performance testing of this, using an Intel (Haswell) Core i7 @ 4GHz with 32GB of memory, running Erlang/OTP 17 64-bit.

5001 elements in the list took between 2 and 5 seconds according to timer:tc/1.

10001 elements in the list took between 15 and 17 seconds, and required about 9GB of memory. This generates a list of 50,015,001 elements.

15001 elements in the list took between 21 and 25 seconds, and required about 19GB of memory.

20001 elements in the list took 49 seconds in one run, and peaked at about 30GB of memory, with about 200 million elements in the result. That is the limit of what I can test.

查看更多
家丑人穷心不美
4楼-- · 2019-08-07 14:26

You could simply use lists:usort/1

lists:usort([X+Y || X <- L, Y <- L]).

if the chance to have duplicates is very high, then you can generate the sum using 2 loops and store the sum in an ets set (or using map, I didn't check the performance of both).

7> Inloop = fun Inloop(_,[],_) -> ok; Inloop(Store,[H|T],X) -> ets:insert(Store,{X+H}), Inloop(Store,T,X) end.
#Fun<erl_eval.42.54118792>
8> Outloop = fun Outloop(Store,[],_) -> ok; Outloop(Store,[H|T],List) -> Inloop(Store,List,H), Outloop(Store,T,List) end.
#Fun<erl_eval.42.54118792>
9> Makesum = fun(L) -> S = ets:new(temp,[set]), Outloop(S,L,L), R =ets:foldl(fun({X},Acc) -> [X|Acc] end,[],S), ets:delete(S), R end.
#Fun<erl_eval.6.54118792>
10> Makesum(lists:seq(1,10)).
[15,13,8,11,20,14,16,12,7,3,10,9,19,18,4,17,6,2,5]
11> lists:sort(Makesum(lists:seq(1,10))).
[2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
12> 
查看更多
可以哭但决不认输i
5楼-- · 2019-08-07 14:32

This module will allow you to compare times of execution when using list comprehension, sets or ets. You can of course add additional functions to this comparison:

-module(pairwise).

-export([start/2]).

start(Type, X) ->
    L = lists:seq(1, X),
    timer:tc(fun do/2, [Type, L]).

do(compr, L) ->
    sets:to_list(sets:from_list([A+B || A <- L, B <- L]));

do(set, L) ->
    F = fun(Sum, Set) -> sets:add_element(Sum, Set) end,
    R = fun(Set) -> sets:to_list(Set) end,
    do(L, L, sets:new(), {F, R});

do(ets, L) ->
    F = fun(Sum, Tab) -> ets:insert(Tab, {Sum}), Tab end,
    R = fun(Tab) ->
                Fun = fun({X}, Acc) -> [X|Acc] end,
                Res = ets:foldl(Fun, [], Tab),
                ets:delete(Tab),
                Res
        end,
    do(L, L, ets:new(?MODULE, []), {F, R}).

do([A|AT], [B|BT], S, {F, _} = Funs) -> do([A|AT], BT, F(A+B, S), Funs);
do([_AT], [], S, {_, R}) -> R(S);
do([_A|AT], [], S, Funs) -> do(AT, AT, S, Funs).

Results:

36> {_, Res1} = pairwise:start(compr, 20).
{282,
 [16,32,3,19,35,6,22,38,9,25,12,28,15,31,2,18,34,5,21,37,8,
  24,40,11,27,14,30|...]}
37> {_, Res2} = pairwise:start(set, 20).  
{155,
 [16,32,3,19,35,6,22,38,9,25,12,28,15,31,2,18,34,5,21,37,8,
  24,40,11,27,14,30|...]}
38> {_, Res3} = pairwise:start(ets, 20).  
{96,
 [15,25,13,8,21,24,40,11,26,20,14,28,23,16,12,39,34,36,7,32,
  35,3,33,10,9,19,18|...]}
39> R1=lists:usort(Res1), R2=lists:usort(Res2), R3=lists:usort(Res3).
[2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
 24,25,26,27,28,29,30|...]
40> R1 = R2 = R3.
[2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
 24,25,26,27,28,29,30|...]

The last line is to compare that all functions return the same result but sorted differently.

First number in each resulted tuple is the time of execution as returned from timer:tc(fun do/2, [Type, L]).. In this example it's 282 for list comprehension, 155 for sets and 96 for ets.

查看更多
登录 后发表回答