What is causing all these errors including the Arg

2019-09-21 09:32发布

I'm trying to write a fairly straightforward implementation of a hash set of strings

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace StringSet
{

    class StringSet
    {

        private List<List<string>> _Buckets;
        private int _numStrings;

        public StringSet ( ) 
        {
            this._Buckets = new List<List<string>>();
            this._numStrings = 0;
        }

        public StringSet ( string[] S )
        {
            // better way to do this?
            this._Buckets = new List<List<string>>();
            foreach ( string s in S ) this._Buckets.Add(new List<string>());
            foreach ( string s in S ) { this.Insert(s); ++_numStrings; }
        }

        private int _GetBucketNumber ( string s, List<List<string>> Buckets )
        {
            //       s: string whose index to look up
            // Buckets: source buckets

            // disallow empty or NULL strings
            if ( String.IsNullOrEmpty(s) ) { throw new ArgumentException("Cannot add empty or NULL string to set"); }
            if ( Buckets.Count == 0 ) { throw new ArgumentException("Tried to call _GetBucketNumber on empty bucket list"); }

            // XOR characters together and mod by length of buckets
            char c = s[0];
            for ( int i = 1; i < s.Length; ++i ) { c ^= s[i]; }
            return (int)c % Buckets.Count;
        }

        private void _RehashIfNecessary ( )
        {
            // if the number of strings in the set exceeds the number of buckets, 
            // increase the number of buckets to either double its current size 
            // or the largest number of buckets possible, whichever is smaller
            if ( this._numStrings > this._Buckets.Count )
            {
                List<List<string>> NewBuckets = new List<List<string>>(Math.Min(this._Buckets.Count * 2, Int32.MaxValue));
                foreach ( List<string> Bucket in this._Buckets )
                {
                    foreach ( string s in Bucket )
                    {
                        NewBuckets[this._GetBucketNumber(s, NewBuckets)].Add(s);
                    }
                }
                this._Buckets = NewBuckets;
            }
        }

        public void Insert ( string s )
        {
            // disallow empty or NULL strings
            if ( String.IsNullOrEmpty(s) ) { throw new ArgumentException("Cannot add empty or NULL string to set"); }

            // Get bucket that string belongs in
            List<string> Bucket = this._Buckets[this._GetBucketNumber(s,this._Buckets)];
            // Add if not already there
            if ( Bucket.IndexOf(s) == -1 ) { Bucket.Add(s); }
            ++_numStrings; _RehashIfNecessary();
        }

        public bool Contains ( string s )
        {
            // returns true or false depending on whether s is a 
            // string currently in the set

            return (this._Buckets[this._GetBucketNumber(s,this._Buckets)].IndexOf(s) != -1);
        }

        public void Print ( )
        {
            for ( int i = 0; i < this._Buckets.Count; ++i )
            {
                Console.WriteLine("Bucket {0}: {1}", i, this._Buckets[i].ToArray().ToString());
            }
        }

    }

    class Program
    {
        static void Main ( string[] args )
        {
            string[] strs = new string[] { "apple", "potato", "car", "cat", "dog", "sheep", "Trump" };
            try
            {
                StringSet TestSet = new StringSet(strs);
                TestSet.Print();
            }
            catch ( Exception E )
            {
                Console.WriteLine("Exception occured: {0}", E.Message);
            }
        }
    }
}

and am getting the massive list of errors

'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_32\mscorlib\v4.0_4.0.0.0__b77a5c561934e089\mscorlib.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\assembly\GAC_MSIL\Microsoft.VisualStudio.HostingProcess.Utilities\12.0.0.0__b03f5f7f11d50a3a\Microsoft.VisualStudio.HostingProcess.Utilities.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\System.Windows.Forms\v4.0_4.0.0.0__b77a5c561934e089\System.Windows.Forms.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\System\v4.0_4.0.0.0__b77a5c561934e089\System.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\System.Drawing\v4.0_4.0.0.0__b03f5f7f11d50a3a\System.Drawing.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\assembly\GAC_MSIL\Microsoft.VisualStudio.HostingProcess.Utilities.Sync\12.0.0.0__b03f5f7f11d50a3a\Microsoft.VisualStudio.HostingProcess.Utilities.Sync.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\assembly\GAC_MSIL\Microsoft.VisualStudio.Debugger.Runtime\12.0.0.0__b03f5f7f11d50a3a\Microsoft.VisualStudio.Debugger.Runtime.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Users\Me\Documents\Visual Studio 2013\Projects\StringSet\StringSet\bin\Debug\StringSet.vshost.exe'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\System.Core\v4.0_4.0.0.0__b77a5c561934e089\System.Core.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\System.Xml.Linq\v4.0_4.0.0.0__b77a5c561934e089\System.Xml.Linq.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\System.Data.DataSetExtensions\v4.0_4.0.0.0__b77a5c561934e089\System.Data.DataSetExtensions.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\Microsoft.CSharp\v4.0_4.0.0.0__b03f5f7f11d50a3a\Microsoft.CSharp.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_32\System.Data\v4.0_4.0.0.0__b77a5c561934e089\System.Data.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Windows\Microsoft.Net\assembly\GAC_MSIL\System.Xml\v4.0_4.0.0.0__b77a5c561934e089\System.Xml.dll'. Skipped loading symbols. Module is optimized and the debugger option 'Just My Code' is enabled.
The thread 0x1164 has exited with code 259 (0x103).
The thread 0x1594 has exited with code 259 (0x103).
The thread 0x1588 has exited with code 0 (0x0).
The thread 0x1744 has exited with code 0 (0x0).
The thread 0xc88 has exited with code 259 (0x103).
'StringSet.vshost.exe' (CLR v4.0.30319: StringSet.vshost.exe): Loaded 'C:\Users\Me\Documents\Visual Studio 2013\Projects\StringSet\StringSet\bin\Debug\StringSet.exe'. Symbols loaded.
A first chance exception of type 'System.ArgumentException' occurred in StringSet.exe
The thread 0x780 has exited with code 259 (0x103).
The thread 0x1170 has exited with code 259 (0x103).
The program '[4252] StringSet.vshost.exe' has exited with code 0 (0x0).

which is not tracing to a line of code and is not being caught by my try-catch, so I have no idea what's going wrong here. Any help?

Also, I'm wondering doubling the size of the set when it's full a la my line of code

List<List<string>> NewBuckets = new List<List<string>>(Math.Min(this._Buckets.Count * 2, Int32.MaxValue));

is the best way to do it or if there's a "smarter" way of keeping a hash set sufficiently large. Because if, say, the set is orginally 10,000 elements and is only going to grow to 10,001, then expanding it to 20,000 is too big (9,999 wasted buckets), but there's no way of predicting how large it needs to be. Right? Increasing the size by 10% or 100% will, on average, be as effient in terms of both time and space.

Also, how can I prevent overflow in

this._Buckets.Count * 2

??? I want to change that so that it's Int32.MaxValue if it overflows.

4条回答
地球回转人心会变
2楼-- · 2019-09-21 10:15

Try this and write back its modified at _RehashIfNecessary and Print

class StringSet
{

    private List<List<string>> _Buckets;
    private int _numStrings;

    public StringSet()
    {
        this._Buckets = new List<List<string>>();
        this._numStrings = 0;
    }

    public StringSet(string[] S)
    {
        // better way to do this?
        this._Buckets = new List<List<string>>();
        foreach (string s in S) this._Buckets.Add(new List<string>());

        foreach (string s in S) { this.Insert(s); ++_numStrings; }
    }

    private int _GetBucketNumber(string s, List<List<string>> Buckets)
    {
        //       s: string whose index to look up
        // Buckets: source buckets

        // disallow empty or NULL strings
        if (String.IsNullOrEmpty(s)) { throw new ArgumentException("Cannot add empty or NULL string to set"); }
        if (Buckets.Count == 0) { throw new ArgumentException("Tried to call _GetBucketNumber on empty bucket list"); }

        // XOR characters together and mod by length of buckets
        char c = s[0];
        for (int i = 1; i < s.Length; ++i)
        {
            c ^= s[i];
        }
        return (int)c % Buckets.Count;
    }

    private void _RehashIfNecessary()
    {
        // if the number of strings in the set exceeds the number of buckets, 
        // increase the number of buckets to either double its current size 
        // or the largest number of buckets possible, whichever is smaller
        if (this._numStrings > this._Buckets.Count)
        {
            List<List<string>> NewBuckets = new List<List<string>>(_numStrings++);
            foreach (List<string> Bucket in this._Buckets)
            {
                NewBuckets.Add(new List<string>(Bucket));
            }
            this._Buckets = NewBuckets;
        }
    }

    public void Insert(string s)
    {
        // disallow empty or NULL strings
        if (String.IsNullOrEmpty(s)) { throw new ArgumentException("Cannot add empty or NULL string to set"); }

        // Get bucket that string belongs in
        List<string> Bucket = this._Buckets[this._GetBucketNumber(s, this._Buckets)];
        // Add if not already there
        if (Bucket.IndexOf(s) == -1)
        {
            Bucket.Add(s);
        }
        ++_numStrings;
        _RehashIfNecessary();
    }

    public bool Contains(string s)
    {
        // returns true or false depending on whether s is a 
        // string currently in the set

        return (this._Buckets[this._GetBucketNumber(s, this._Buckets)].IndexOf(s) != -1);
    }

    public void Print()
    {
        int j;
        //checking the bucket
        for (int i = 0; i < this._Buckets.Count; i++)
        {
            if (_Buckets[i].Count == 0)
                Console.Write($"Bucket {i}: Empty!");
            else
            {
                Console.Write("Bucket {0}: ", i);
                //Checking items within the bucket
                for (j = 0; j < _Buckets[i].Count; j++)
                {
                    Console.Write($"[{this._Buckets[i].ToArray().GetValue(j).ToString()}] ");
                }
            }
            Console.WriteLine();
        }
    }

}

class Program
{
    static void Main(string[] args)
    {
        string[] strs = new string[] { "apple", "potato", "car", "cat", "dog", "sheep", "Trump" };

        StringSet TestSet = new StringSet(strs);
        TestSet.Print();

        Console.Read();
    }
}

write back..

查看更多
淡お忘
3楼-- · 2019-09-21 10:16

NewBuckets initialization was missing

private void _RehashIfNecessary ( )
{
    // if the number of strings in the set exceeds the number of buckets, 
    // increase the number of buckets to either double its current size 
    // or the largest number of buckets possible, whichever is smaller
    if ( this._numStrings > this._Buckets.Count )
    {
        List<List<string>> NewBuckets = new List<List<string>>(Math.Min(this._Buckets.Count * 2, Int32.MaxValue));

        // this is missing.
        foreach ( var s in this._Buckets )
        {
            NewBuckets.Add(new List<string>());
        }

        foreach ( List<string> Bucket in this._Buckets )
        {
            foreach ( string s in Bucket )
            {                       
                NewBuckets[this._GetBucketNumber(s, NewBuckets)].Add(s);
            }
        }
        this._Buckets = NewBuckets;
    }
}

Check your Working Code

查看更多
何必那么认真
4楼-- · 2019-09-21 10:19

Main question

First of all, you don't have a "massive list of errors". If you read closely, you'll find just one:

A first chance exception of type 'System.ArgumentException' occurred in StringSet.exe

Second, your try-catch does in fact catch that exception. The reason you don't see the console output is (probably) that Visual Studio closes the console immediately when you're running your application in debug mode. But if you put a breakpoint in the catch block, the debugger would hit it.

In fact, the ArgumentException that is thrown, is one of your own:

Tried to call _GetBucketNumber on empty bucket list

The stack trace shows that this happens inside a call to _RehashIfNecessary. This makes sence, because when you do NewBuckets[this._GetBucketNumber(s, NewBuckets)].Add(s); inside that method, NewBuckets is indeed still empty.

The List<T> constructor that takes a capacity argument initializes a new instance of the List class that is empty and has the specified initial capacity. Therefore NewBuckets still needs to be filled with the right amount of List<string>'s.

Side questions

I'm wondering doubling the size of the set when it's full a la my line of code is the best way to do it or if there's a "smarter" way

There may or may not be a smarter way. As you've said yourself, you simply don't know how big the collection will become. This is true most of the time, but not always.

It turns out that doubling the size of some form of storage is good enough for many standard containers in many languages. This is true because expanding storage might just allocate a whole new block of memory and copy all of the old data to it.

On modern pc's, laptops or servers, memory is essentially free (how many items are you going to put in your hash table, really?) while the action of allocating new memory and copying data isn't. So unless you're working with special constraints, doubling the size of your StringSet when expanding is fine.

Also, how can I prevent overflow in this._Buckets.Count * 2? I want to change that so that it's Int32.MaxValue if it overflows.

There are several ways. You could convert all of your calculations to Int64, while keeping the result Int32. But something that might be a little less invasive could be something like this:

class StringSet
{
    private const int HalfOfInt32Max = Int32.MaxValue / 2;
    /* snip */
    private Int32 NewMax(int oldMax)
    {
        if (oldMax < HalfOfInt32Max)
        {
            return oldMax * 2;
        }
        return Int32.MaxValue;
    }
}

and then use NewMax(this._Buckets.Count);.

查看更多
仙女界的扛把子
5楼-- · 2019-09-21 10:31

The error occures in your _RehashIfNecessary() Method. You create a new list of Buckets and call the Method _GetBucketNumber with the new bucket-list which has a count of 0. in the GetBucketNumber-Method you throw an exception when Buckets.Count equals 0.

The overflow-question on this._Buckets.Count * 2:

//Ternary-Expression
List<List<string>> NewBuckets = new List<List<string>>(Int32.MaxValue / 2 < this._Buckets.Count ? Int32.MaxValue : this._Buckets.Count * 2);

//Classic if-else
List<List<string>> NewBuckets = null;
if (Int32.MaxValue / 2 < this._Buckets.Count)
    NewBuckets = new List<List<string>>(Int32.MaxValue);
else
    NewBuckets = new List<List<string>>(this._Buckets.Count * 2);
查看更多
登录 后发表回答