c# Registry to XML Invalid character issue

2019-07-14 03:02发布

问题:

I have a problem when trying to create an XML file from registry. On my laptop(W7 64b) it is working fine, the xml file is generated but on another computer (Xp 32b) an exception is thrown : System.ArgumentException '.', hexadecimal values 0x00, is an invalid character. I have read few useful things about it but I don't know how to solve in this case, here is the code :

        try
        {

        string regPath = "SOFTWARE\\IPS";
        XElement xRegRoot = new XElement("Root", new XAttribute("Registry", regPath));

        ReadRegistry(regPath, xRegRoot);

        string xmlStringReg = xRegRoot.ToString();

        XmlDocument docR = new XmlDocument();
        docR.LoadXml(xmlStringReg);

            docR.Save(AppDomain.CurrentDomain.BaseDirectory + "\\_RegistryList.xml");
        }
        catch (System.Exception ex)
        {
            Console.WriteLine(ex.ToString());
            LogToFile(ex.ToString());
        }

    private static void ReadRegistry(string keyPath, XElement xRegRoot)
    {
        string[] subKeys=null;
        RegistryKey HKLM = Registry.LocalMachine;
        RegistryKey RegKey = HKLM.OpenSubKey(keyPath);

        try
        {
            subKeys = RegKey.GetSubKeyNames();
            foreach (string subKey in subKeys)
            {
                string fullPath = keyPath + "\\" + subKey;                    
                Console.WriteLine("\r\nKey Name  | " + fullPath);
                LogToFile("Key Name  | " + fullPath);

                XElement xregkey = new XElement("RegKeyName", new XAttribute("FullName", fullPath), new XAttribute("Name", subKey));
                xRegRoot.Add(xregkey);
                ReadRegistry(fullPath, xRegRoot);
            }

            string[] subVals = RegKey.GetValueNames();
            foreach (string val in subVals)
            {
                string keyName = val;
                string keyType = RegKey.GetValueKind(val).ToString();
                string keyValue = RegKey.GetValue(val).ToString();

                Console.WriteLine("Key Value | " + keyType + " | " + keyName + " | " + keyValue);
                LogToFile("Key " + keyType + " | " + keyName + " | " + keyValue);
                XElement xregvalue = new XElement("RegKeyValue", new XAttribute("keyType", keyType), new XAttribute("keyName", keyName), new XAttribute("keyValue", keyValue));
                xRegRoot.Add(xregvalue);
            }
        }
        catch (System.Exception ex)
        {
            Console.WriteLine(ex.ToString());
            LogToFile(ex.ToString());
        }
    }

Thanks in advance.

回答1:

Here are a couple little improvements that a) compile, and b) handle surrogate pairs:

    /// <summary>
    /// Remove illegal XML characters from a string.
    /// </summary>
    public static string SanitizeString(string s)
    {
        if (string.IsNullOrEmpty(s))
        {
            return s;
        }

        StringBuilder buffer = new StringBuilder(s.Length);

        for (int i = 0; i < s.Length; i++)
        {
            int code;
            try
            {
                code = Char.ConvertToUtf32(s, i);
            }
            catch (ArgumentException)
            {
                continue;
            }
            if (IsLegalXmlChar(code))
                buffer.Append(Char.ConvertFromUtf32(code));
            if (Char.IsSurrogatePair(s, i))
                i++;
        }

        return buffer.ToString();
    }

    /// <summary>
    /// Whether a given character is allowed by XML 1.0.
    /// </summary>
    private static bool IsLegalXmlChar(int codePoint)
    {
        return (codePoint == 0x9 ||
            codePoint == 0xA ||
            codePoint == 0xD ||
            (codePoint >= 0x20 && codePoint <= 0xD7FF) ||
            (codePoint >= 0xE000 && codePoint <= 0xFFFD) ||
            (codePoint >= 0x10000/* && character <= 0x10FFFF*/) //it's impossible to get a code point bigger than 0x10FFFF because Char.ConvertToUtf32 would have thrown an exception
        );
    }


回答2:

I did some experiments:

  • new XElement("foo\x00bar") throws on construction.
  • new XAttribute("foo\x00bar", "baz") throws on construction.
  • new XText("foo\x00bar") throws only when calling .ToString().

new XAttribute("foo", "bar\x00baz") is equivalent to new XAttribute("foo", new XText("bar\x00baz")), so it won't throw on construction.

I did not manage to make any of the registry-methods return a string with null-characters, but you should be able to find where this is returned yourself.



回答3:

You can read more about it here: http://seattlesoftware.wordpress.com/2008/09/11/hexadecimal-value-0-is-an-invalid-character/

And more about it here: XElement & UTF-8 Issue

A valid list of xml chars are here http://en.wikipedia.org/wiki/Valid_characters_in_XML

But essentially you can fix it by removing illegal chars before serialising

/// <summary>
/// Remove illegal XML characters from a string.
/// </summary>
public string SanitizeXmlString(string xml)
{
    if (string.IsNullOrEmpty(value))
    {
        return value;
    }

    StringBuilder buffer = new StringBuilder(xml.Length);

    foreach (char c in xml)
    {
        if (IsLegalXmlChar(c))
        {
            buffer.Append(c);
        }
    }

    return buffer.ToString();
}

/// <summary>
/// Whether a given character is allowed by XML 1.0.
/// </summary>
public bool IsLegalXmlChar(int character)
{
    return
    (
         character == 0x9 /* == '\t' == 9   */          ||
         character == 0xA /* == '\n' == 10  */          ||
         character == 0xD /* == '\r' == 13  */          ||
        (character >= 0x20    && character <= 0xD7FF  ) ||
        (character >= 0xE000  && character <= 0xFFFD  ) ||
        (character >= 0x10000 && character <= 0x10FFFF)
    );
}