How to Search for an item in a List in Erlang?

2019-02-12 08:01发布

问题:

I am writing a cache gen-server for the company Use. I am wondering how to search an item from the list as I want the cost of the search for comparing various data structures in erlang like dict, orddict, List, tuples, tree, queue etc to use for cache program.

Example:

List = [{"A1",["ankit","sush", "Hover", "x4", "a3","nilesh","mike","erlang" | ...]}|...].

Now, I want to search for the Key A1 and search for 'mike' in the list. What is the best way to search the above List.

Please provide some examples. Atleast Pseudo for it.

回答1:

Just to simplify on the example at https://stackoverflow.com/a/15587565/56250:

listFind(Element, List) ->
  lists:member(Element, List).

lists:member works with empty lists. Quick look at the source (https://github.com/erlang/otp/blob/07b8f441ca711f9812fad9e9115bab3c3aa92f79/erts/emulator/beam/erl_bif_lists.c#L184) suggests it executes lazily.



回答2:

Thanks for the Answers. I have written a Erlang code to find the timings for Inserting & Fetching Date into various data Structures like LIST, DICT, QUEUE, SET and PARALLEL MAP of the data structures. Only list's insert & fetch is completed but rest has parallel map insert. I would like to share the code & results of the code. Following is the module test.erl

-module(test).
-author('ankit@hover.in').
-export ([ 
       %%list
       create_list/1, fetch_list/2, test_list/1, parmap_create_list/1 , ins_list/3, parallel_list/1,
       para_fetch_list/2, pfetch_list/1,

       %%dict
       create_dict/1, fetch_dict/2, test_dict/1, parmap_create_dict/1 , ins_dict/3, parallel_dict/1,

       %queue
       create_q/1, fetch_q/2, test_q/1, parmap_create_q/1, ins_q/3, parallel_q/1,

       %set
       create_set/1, fetch_set/2, test_set/1, parmap_create_set/1 , ins_set/3, parallel_set/1,

       create/1, multi_create/0,
       pcreate/1, parallel_multi_create/0

       %test/1 , multi_test/0 
      ]).

%%creation tests

%% For List
create_list(N) ->
    lists:foldl ( fun(X,Prev) -> Prev ++ [X] end, [] , lists:seq(1,N)).

test_list(N) ->
    {R1,List} = timer:tc(?MODULE, create_list, [N]),
    %%Set = lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)),
    {R2,_} = timer:tc(?MODULE, fetch_list,[ N, List ]),
    {list,inserT,R1,fetchT,R2}.

%% For Dict
create_dict(N) ->
    lists:foldl( fun(X,Prev)-> dict:append([],X,Prev) end, dict:new(), lists:seq(1,N)).

test_dict(N) ->
    {R1,Dict} = timer:tc(?MODULE, create_dict, [N]),
    %%Set = lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)),
    {R2,_} = timer:tc(?MODULE, fetch_dict,[ N, Dict ]),
    {dict,inserT,R1,fetchT,R2}.

%% For Queue
create_q(N) ->
    lists:foldl(fun(X,Prev) -> queue:in(X,Prev) end, queue:new() , lists:seq(1,N)).

test_q(N)->
    {R1,Q}  = timer:tc(?MODULE, create_q,[N]),
    %%Q = lists:foldl(fun(X,Prev) -> queue:in(X,Prev) end,Q0, lists:seq(1,N)),
    {R2,_} = timer:tc(?MODULE, fetch_q, [ N,Q ] ),
    {queue,inserT,R1,fetchT,R2}.

%% For Set
create_set(N) ->
    SET = sets:new(),
    lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)).

test_set(N) ->   
    {R1,Set} = timer:tc(?MODULE, create_set, [N]),
    %%Set = lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)),
    {R2,_} = timer:tc(?MODULE, fetch_set,[ N, Set ]),
    {set,inserT,R1,fetchT,R2}.

create(N)->
    [   timer:tc(?MODULE, X , [N])  || X <- [ test_list, test_dict, test_q,test_set ] ].

xmulti_create()->
    [ ?MODULE:create(X) || X<- [10,100,1000,10000,100000] ].

multi_create() ->
    InputRange = [10,100,1000,10000,100000],
    lists:map(fun(X) ->
              ?MODULE:create(X)
          end,InputRange).

%%fetching midpoint tests
fetch_q(N,Q)->
    fetch_q(N,Q,100).

fetch_q(N,Q,Find) ->
    F = fun(I) -> queue:out_r(I) end,
    R = lists:foldl( fun(_X,{Bool,PrevQ} ) ->
                 {{value, Ele},QQ} = F(PrevQ),
                 Ret1 = case Ele of 
                       Temp when Temp =:= Find -> 
                       true;
                       _ ->
                       Bool
                   end,
                 Ret2 = QQ, 

                 {Ret1,Ret2}

             end,{false,Q}, 
             lists:seq(1,N)),
    R.

fetch_set(N,Set) ->
    fetch_set(N,Set,100).

fetch_set(_N,Set,Find) ->
    Return = sets:is_element(Find,Set),
    Return.

fetch_list(N,List) ->
    fetch_list(N,List,500).

fetch_list(_N,List,Find) ->
    Ret = lists:foldl(fun(X,Prev) when X =:= Find -> true; 
             (X, Prev) -> Prev 
              end,false,List),
    Ret.

fetch_dict(N,Dict) ->
    {ok,List} = dict:find([],Dict),
    fetch_dict(N,List,500).

fetch_dict(_N,List,Find) ->
    Ret = lists:foldl(fun(X,Prev) when X =:= Find -> true; 
             (X, Prev) -> Prev
              end,false,List),
    Ret.

%% parallel operation

%% Parallel Map for Queue
parallel_q(N) ->
    {R1,Set} = timer:tc(?MODULE, parmap_create_q, [N]),
    {parallel_q,pcreate,R1}.

parmap_create_q(N) ->

    PID = spawn(fun() ->
            ?MODULE:ins_q( queue:new(),0,N) end),
    [PID ! {self(),X} || X <- lists:seq(1,N) ],
    receive
    {ok, Q} ->
        %%io:format("~n Still insertin, Q till now ~p",[Q]),
        ok;
    {done, Q}  ->
        io:format("~n *******DONE*********~n ~p",[Q]);
    _E ->
        io:format("unexpected ~p",[_E])
    end.

ins_q(Q,Ctr, Max) ->    
    receive
    {From, N} ->
        %%io:format("~n insert ~p, ctr ~p, Q ~p ~n",[N,Ctr,Q]),
        NewQ = queue:in( N, Q) ,
        case Ctr
        of Temp when Temp < Max ->
            From ! {ok, NewQ};
        _-> 
            From ! {done, NewQ}
        end,
        ?MODULE:ins_q(NewQ,Ctr+1, Max);
    _E ->
        io:format("unexpected ~p",[_E]),
        ?MODULE:ins_q(Q, Ctr, Max)
    end.

%% Parallel Map for set
parallel_set(N) ->
    {R1,Set} = timer:tc(?MODULE, parmap_create_set, [N]),
    {parallel_set,pcreate,R1}.

parmap_create_set(N) ->

    PID = spawn(fun() ->
            ?MODULE:ins_set( sets:new(),0,N) end),
    [PID ! {self(),X} || X <- lists:seq(1,N) ],
    receive
    {ok, Sets} ->
        %%io:format("~n Still insertin, Sets till now ~p",[Sets]),
        ok;
    {done, Sets}  ->
        io:format("~n *******DONE*********~n ~p",[Sets]);
    _E ->
        io:format("unexpected ~p",[_E])
    end.

ins_set(Sets,Ctr, Max) ->    
    receive
    {From, N} ->
        %%io:format("~n insert ~p, ctr ~p, Sets ~p ~n",[N,Ctr,Sets]),
        NewSets = sets:add_element(N,Sets),

        case Ctr
        of Temp when Temp < Max ->
            From ! {ok, NewSets};
        _-> 
            From ! {done, NewSets}
        end,
        ?MODULE:ins_set(NewSets,Ctr+1, Max);
    _E ->
        io:format("unexpected ~p",[_E]),
        ?MODULE:ins_set(Sets, Ctr, Max)
    end.

%% Parallel Map for dict
parallel_dict(N) ->
    {R1,Set} = timer:tc(?MODULE, parmap_create_dict, [N]),
    {parallel_dict,pcreate,R1}.

parmap_create_dict(N) ->

    PID = spawn(fun() ->
            ?MODULE:ins_dict( dict:new(),0,N) end),
    [PID ! {self(),X} || X <- lists:seq(1,N) ],
    receive
    {ok, Dict} ->
        %%io:format("~n Still insertin, Sets till now ~p",[Dict]),
        ok;
    {done, Dict}  ->
        io:format("~n *******DONE*********~n ~p",[Dict]);
    _E ->
        io:format("unexpected ~p",[_E])
    end.

ins_dict(Dict,Ctr, Max) ->    
    receive
    {From, N} ->
        %%io:format("~n insert ~p, ctr ~p, Dict ~p ~n",[N,Ctr,Dict]),
        NewDict = dict:append([],N,Dict),

        case Ctr
        of Temp when Temp < Max ->
            From ! {ok, NewDict};
        _-> 
            From ! {done, NewDict}
        end,
        ?MODULE:ins_dict(NewDict,Ctr+1, Max);
    _E ->
        io:format("unexpected ~p",[_E]),
        ?MODULE:ins_dict(Dict, Ctr, Max)
    end.

%% Parallel Map for list
parallel_list(N) ->
    {R1,List} = timer:tc(?MODULE, parmap_create_list, [N]),
    {R2,_} = timer:tc(?MODULE, para_fetch_list, [N,List]),
    {parallel_list,pCreate,R1,pFetch,R2}.

parmap_create_list(N) ->

    PID = spawn(fun() ->
            ?MODULE:ins_list( [],0,N) end),
    [PID ! {self(),X} || X <- lists:seq(1,N) ],
    receive
    {ok, List} ->
        %%io:format("~n Still insertin, List till now ~p",[List]),
        ok;
    {done, List}  ->
        io:format("~n *******DONE*********~n ~p",[List]);
    _E ->
        io:format("unexpected ~p",[_E])
    end.

ins_list(List,Ctr, Max) ->    
    receive
    {From, N} ->
        %%io:format("~n insert ~p, ctr ~p, Sets ~p ~n",[N,Ctr,Dict]),
        NewList = List ++ [N] ,

        case Ctr
        of Temp when Temp < Max ->
            From ! {ok, NewList};
        _-> 
            From ! {done, NewList}
        end,
        ?MODULE:ins_list(NewList,Ctr+1, Max);
    _E ->
        io:format("unexpected ~p",[_E]),
        ?MODULE:ins_list(List, Ctr, Max)
    end.

para_fetch_list(List, FindN) ->
    Pid = spawn(fun() ->
            ?MODULE:pfetch_list(FindN) end),
    Self = self(),
    Now1 = now(),
    lists:map ( fun(X) ->
            Pid ! {Self, X},
            receive
                {true,Find} ->
                io:format("~n Found ~p ",[Find]),
                true;
                {false,Find}  ->
                %io:format("~n NOT FOUND ~p ",[Find]),
                false;
                _E ->
                io:format("para main unexpected ~p",[_E])
            after 4000 ->
                io:format("timerout ",[])

            end
        end, List ) ,
    Now2 = now(),
    timer:now_diff(Now2,Now1).

pfetch_list(Find) ->    
    receive
    {From, CurrN} ->

        _Ret = case CurrN of 
              Temp when Temp =:= Find ->
              From ! {true,Find},
              true;
              _ -> 
              From ! {false,Find},
              false
          end,
        %%io:format("~n insert ~p, ctr ~p, Sets ~p ~n",[N,Ctr,Dict]),
        ?MODULE:pfetch_list(Find);
    _E ->
        io:format("pfetch unexpected ~p",[_E]),
        ?MODULE:pfetch_list(Find)
    end.    

pcreate(N)->
    [   timer:tc(?MODULE, X , [N])  || X <- [ parallel_list ] ].

parallel_multi_create() ->
    InputRange = [10,100,1000,10000],
    lists:map(fun(X) ->
              ?MODULE:pcreate(X)
          end,InputRange).

Following are the results: For parallel Map DS

(ankit@127.0.0.1)2> test:pcreate(1000).
[{5399,{parallel_list,pCreate,5352,pFetch,30}},
 {9886,{parallel_dict,pcreate,9875}},
 {87603,{parallel_q,pcreate,87593}},
 {42271,{parallel_set,pcreate,42223}}]

For All DS

    (ankit@127.0.0.1)3> test:multi_create().
 %% For 10   
[[{38,{list,inserT,14,fetchT,8}},
      {91,{dict,inserT,67,fetchT,11}},
      {47,{queue,inserT,13,fetchT,21}},
      {81,{set,inserT,61,fetchT,7}}],
%%for 100
     [{234,{list,inserT,191,fetchT,30}},
      {690,{dict,inserT,642,fetchT,35}},
      {218,{queue,inserT,60,fetchT,144}},
      {559,{set,inserT,540,fetchT,6}}],
%% for 1000
     [{13746,{list,inserT,13452,fetchT,262}},
      {96137,{dict,inserT,96009,fetchT,116}},
      {967,{queue,inserT,284,fetchT,674}},
      {5562,{set,inserT,5547,fetchT,5}}],
%% for 10000  
   [{438301,{list,inserT,437256,fetchT,1027}},
      {361450,{dict,inserT,360412,fetchT,1020}},
      {7937,{queue,inserT,2292,fetchT,5636}},
      {31293,{set,inserT,31279,fetchT,5}}],
% for 100000
     [{43750210,{list,inserT,43739956,fetchT,10238}},
      {51517971,{dict,inserT,51507134,fetchT,10819}},
      {92503,{queue,inserT,29676,fetchT,62811}},
      {682118,{set,inserT,682100,fetchT,5}}]]

Hope this information helps you to determine which is the best DS for using for different purpose. I'll update it when I am finished with whole code.



回答3:

If you want to search a list use the functions in the list module which is part of the extensive documentation that comes with Erlang.

If you want to know the best data structures to use - that's a slightly different question which will require a bit more information.



回答4:

Use keyfind function from lists module. For example:

30> List = [{"A1",["ankit","sush", "Hover", "x4", "a3","nilesh","mike","erlang"]}].
[{"A1",
  ["ankit","sush","Hover","x4","a3","nilesh","mike",
   "erlang"]}]
31> lists:keyfind("A1", 1, List).
{"A1",
 ["ankit","sush","Hover","x4","a3","nilesh","mike","erlang"]}


回答5:

If you want this kind of "List" you can easily hand craft your own search:

search_key(Key, [{Key,_}=V|_]) -> {ok, V};
search_key(Key, [_|T]) -> search_key(Key, T);
search_key(_, []) -> not_found.

search_val(Val, [{_,L}=X|T]) ->
  case _search_val(Val, L) of
    ok -> {ok, X};
    not_found -> search_val(Val, T)
  end;
search_val(_, []) -> not_found.

_search_val(Val, [Val|_])-> ok;
_search_val(Val, [_|T])-> _search_val(Val, T);
_search_val(_,[])-> not_found.

But I'm not sure what exactly you want. For example if you want search for key "A1" and than for value "mike" in the list, it will be different solution. And if you want know how store this sort in best structure it is just another one question. You should provide more information.



回答6:

If your list is a simple one Term based item list, then the simplest solution is:

listFind ( Element, [] ) ->
    false;

listFind ( Element, [ Item | ListTail ] ) ->
    case ( Item == Element ) of
        true    ->  true;
        false   ->  listFind(Element, ListTail)
    end.

The above will work assuming that how you check the equality of your items in the list is with the == operator. You can modify that operator for accomodating other types of comparison/equality as you need.



回答7:

You can also use any from http://erlang.org/doc/man/lists.html#any-2

lists:any(fun(X) -> X == <<"yourthing">> end, YourList)

will return true if found or false otherwise.