可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
I have an array, and I want to make a hash so I can quickly ask "is X in the array?".
In perl, there is an easy (and fast) way to do this:
my @array = qw( 1 2 3 );
my %hash;
@hash{@array} = undef;
This generates a hash that looks like:
{
1 => undef,
2 => undef,
3 => undef,
}
The best I've come up with in Ruby is:
array = [1, 2, 3]
hash = Hash[array.map {|x| [x, nil]}]
which gives:
{1=>nil, 2=>nil, 3=>nil}
Is there a better Ruby way?
EDIT 1
No, Array.include? is not a good idea. Its slow. It does a query in O(n) instead of O(1). My example array had three elements for brevity; assume the actual one has a million elements. Let's do a little benchmarking:
#!/usr/bin/ruby -w
require 'benchmark'
array = (1..1_000_000).to_a
hash = Hash[array.map {|x| [x, nil]}]
Benchmark.bm(15) do |x|
x.report("Array.include?") { 1000.times { array.include?(500_000) } }
x.report("Hash.include?") { 1000.times { hash.include?(500_000) } }
end
Produces:
user system total real
Array.include? 46.190000 0.160000 46.350000 ( 46.593477)
Hash.include? 0.000000 0.000000 0.000000 ( 0.000523)
回答1:
If all you need the hash for is membership, consider using a Set
:
Set
Set implements a collection of unordered values with no
duplicates. This is a hybrid of Array's intuitive inter-operation
facilities and Hash's fast lookup.
Set is easy to use with Enumerable objects (implementing
each
). Most of the initializer methods and binary operators accept
generic Enumerable objects besides sets and arrays. An
Enumerable object can be converted to Set using the
to_set
method.
Set uses Hash as storage, so you must note the following points:
- Equality of elements is determined according to
Object#eql?
and Object#hash
.
- Set assumes that the identity of each element does not change while it is stored. Modifying an element of a set will render the set to an
unreliable state.
- When a string is to be stored, a frozen copy of the string is stored instead unless the original string is already frozen.
Comparison
The comparison operators <
, >
, <=
and >=
are implemented as
shorthand for the {proper_,}{subset?,superset?} methods. However, the
<=>
operator is intentionally left out because not every pair of
sets is comparable. ({x,y} vs. {x,z} for example)
Example
require 'set'
s1 = Set.new [1, 2] # -> #<Set: {1, 2}>
s2 = [1, 2].to_set # -> #<Set: {1, 2}>
s1 == s2 # -> true
s1.add("foo") # -> #<Set: {1, 2, "foo"}>
s1.merge([2, 6]) # -> #<Set: {1, 2, "foo", 6}>
s1.subset? s2 # -> false
s2.subset? s1 # -> true
[...]
Public Class Methods
new(enum = nil)
Creates a new set containing the elements of the given enumerable
object.
If a block is given, the elements of enum are preprocessed by the
given block.
回答2:
try this one:
a=[1,2,3]
Hash[a.zip]
回答3:
You can do this very handy trick:
Hash[*[1, 2, 3, 4].map {|k| [k, nil]}.flatten]
=> {1=>nil, 2=>nil, 3=>nil, 4=>nil}
回答4:
If you want to quickly ask "is X in the array?" you should use Array#include?
.
Edit (in response to addition in OP):
If you want speedy look up times, use a Set. Having a Hash that points to all nil
s is silly. Conversion is an easy process too with Array#to_set
.
require 'benchmark'
require 'set'
array = (1..1_000_000).to_a
set = array.to_set
Benchmark.bm(15) do |x|
x.report("Array.include?") { 1000.times { array.include?(500_000) } }
x.report("Set.include?") { 1000.times { set.include?(500_000) } }
end
Results on my machine:
user system total real
Array.include? 36.200000 0.140000 36.340000 ( 36.740605)
Set.include? 0.000000 0.000000 0.000000 ( 0.000515)
You should consider just using a set to begin with, instead of an array so that a conversion is never necessary.
回答5:
I'm fairly certain that there isn't a one-shot clever way to construct this hash. My inclination would be to just be explicit and state what I'm doing:
hash = {}
array.each{|x| hash[x] = nil}
It doesn't look particularly elegant, but it's clear, and does the job.
FWIW, your original suggestion (under Ruby 1.8.6 at least) doesn't seem to work. I get an "ArgumentError: odd number of arguments for Hash" error. Hash.[] expects a literal, even-lengthed list of values:
Hash[a, 1, b, 2] # => {a => 1, b => 2}
so I tried changing your code to:
hash = Hash[*array.map {|x| [x, nil]}.flatten]
but the performance is dire:
#!/usr/bin/ruby -w
require 'benchmark'
array = (1..100_000).to_a
Benchmark.bm(15) do |x|
x.report("assignment loop") {hash = {}; array.each{|e| hash[e] = nil}}
x.report("hash constructor") {hash = Hash[*array.map {|e| [e, nil]}.flatten]}
end
gives
user system total real
assignment loop 0.440000 0.200000 0.640000 ( 0.657287)
hash constructor 4.440000 0.250000 4.690000 ( 4.758663)
Unless I'm missing something here, a simple assignment loop seems the clearest and most efficient way to construct this hash.
回答6:
Rampion beat me to it. Set might be the answer.
You can do:
require 'set'
set = array.to_set
set.include?(x)
回答7:
Your way of creating the hash looks good. I had a muck around in irb and this is another way
>> [1,2,3,4].inject(Hash.new) { |h,i| {i => nil}.merge(h) }
=> {1=>nil, 2=>nil, 3=>nil, 4=>nil}
回答8:
I think chrismear's point on using assignment over creation is great. To make the whole thing a little more Ruby-esque, though, I might suggest assigning something other than nil
to each element:
hash = {}
array.each { |x| hash[x] = 1 } # or true or something else "truthy"
...
if hash[376] # instead of if hash.has_key?(376)
...
end
The problem with assigning to nil
is that you have to use has_key?
instead of []
, since []
give you nil
(your marker value) if the Hash
doesn't have the specified key. You could get around this by using a different default value, but why go through the extra work?
# much less elegant than above:
hash = Hash.new(42)
array.each { |x| hash[x] = nil }
...
unless hash[376]
...
end
回答9:
Maybe I am misunderstanding the goal here; If you wanted to know if X was in the array, why not do array.include?("X") ?
回答10:
Doing some benchmarking on the suggestions so far gives that chrismear and Gaius's assignment-based hash creation is slightly faster than my map method (and assigning nil is slightly faster than assigning true). mtyaka and rampion's Set suggestion is about 35% slower to create.
As far as lookups, hash.include?(x)
is a very tiny amount faster than hash[x]
; both are twice as a fast as set.include?(x)
.
user system total real
chrismear 6.050000 0.850000 6.900000 ( 6.959355)
derobert 6.010000 1.060000 7.070000 ( 7.113237)
Gaius 6.210000 0.810000 7.020000 ( 7.049815)
mtyaka 8.750000 1.190000 9.940000 ( 9.967548)
rampion 8.700000 1.210000 9.910000 ( 9.962281)
user system total real
times 10.880000 0.000000 10.880000 ( 10.921315)
set 93.030000 17.490000 110.520000 (110.817044)
hash-i 45.820000 8.040000 53.860000 ( 53.981141)
hash-e 47.070000 8.280000 55.350000 ( 55.487760)
Benchmarking code is:
#!/usr/bin/ruby -w
require 'benchmark'
require 'set'
array = (1..5_000_000).to_a
Benchmark.bmbm(10) do |bm|
bm.report('chrismear') { hash = {}; array.each{|x| hash[x] = nil} }
bm.report('derobert') { hash = Hash[array.map {|x| [x, nil]}] }
bm.report('Gaius') { hash = {}; array.each{|x| hash[x] = true} }
bm.report('mtyaka') { set = array.to_set }
bm.report('rampion') { set = Set.new(array) }
end
hash = Hash[array.map {|x| [x, true]}]
set = array.to_set
array = nil
GC.start
GC.disable
Benchmark.bmbm(10) do |bm|
bm.report('times') { 100_000_000.times { } }
bm.report('set') { 100_000_000.times { set.include?(500_000) } }
bm.report('hash-i') { 100_000_000.times { hash.include?(500_000) } }
bm.report('hash-e') { 100_000_000.times { hash[500_000] } }
end
GC.enable
回答11:
If you're not bothered what the hash values are
irb(main):031:0> a=(1..1_000_000).to_a ; a.length
=> 1000000
irb(main):032:0> h=Hash[a.zip a] ; h.keys.length
=> 1000000
Takes a second or so on my desktop.
回答12:
If you're looking for an equivalent of this Perl code:
grep {$_ eq $element} @array
You can just use the simple Ruby code:
array.include?(element)
回答13:
Here's a neat way to cache lookups with a Hash:
a = (1..1000000).to_a
h = Hash.new{|hash,key| hash[key] = true if a.include? key}
Pretty much what it does is create a default constructor for new hash values, then stores "true" in the cache if it's in the array (nil otherwise). This allows lazy loading into the cache, just in case you don't use every element.
回答14:
This preserves 0's if your hash was [0,0,0,1,0]
hash = {}
arr.each_with_index{|el, idx| hash.merge!({(idx + 1 )=> el }) }
Returns :
# {1=>0, 2=>0, 3=>0, 4=>1, 5=>0}