XMLDiff fails to recognize differences correcly?

2019-05-30 16:47发布

问题:

What am I missing here? Is there an option that XMLDiff should care about element names and seek for best match to recognize following changes correctly?

a Helper class for making comparisons between two XML files:

public class XMLDiffer
{
    public XDocument Diff(string originalXML, string changedXML)
    {
        //http://msdn2.microsoft.com/en-us/library/aa302294.aspx
        XmlDiff xmlDiff = new XmlDiff(XmlDiffOptions.IgnoreChildOrder | XmlDiffOptions.IgnoreComments | XmlDiffOptions.IgnoreWhitespace);

        xmlDiff.Algorithm = XmlDiffAlgorithm.Precise;
        StringBuilder diffgramStringBuilder = new StringBuilder();
        bool xmlComparisonResult = false;
        using (StringReader legacySr = new StringReader(originalXML), nextgenSr = new StringReader(changedXML))
        {
            using (XmlReader legacyReader = XmlReader.Create(legacySr), nextgenReader = XmlReader.Create(nextgenSr))
            {
                using (StringWriter sw = new StringWriter(diffgramStringBuilder))
                {
                    using (XmlWriter diffgramWriter = XmlWriter.Create(sw))
                    {
                        xmlComparisonResult = xmlDiff.Compare(legacyReader, nextgenReader, diffgramWriter);
                    }
                }
            }
        }
        XDocument xdoc = XDocument.Parse(diffgramStringBuilder.ToString());
        return xdoc;
    }

    public string GetChangeHtml(string originalXML, string changedXML)
    {
        XmlDiffView view = new XmlDiffView();
        var diffgram = Diff(originalXML, changedXML);
        string ret = "";
        using (StringReader legacySr = new StringReader(originalXML), diffGramSr = new StringReader(diffgram.ToString()))
        {
            using (XmlReader legacyReader = XmlReader.Create(legacySr), diffgramReader = XmlReader.Create(diffGramSr))
            {
                using (StringWriter sw = new StringWriter())
                {
                    view.Load(legacyReader, diffgramReader);
                    view.GetHtml(sw);
                    ret = sw.ToString();
                }
            }
        }
        return ret;
    }

}

With Following test:

[TestMethod]
public void XMLDiff_AreNotSame_GetChangeHtmlAll()
{
    //Arrange
    string source = "<root><child>some text</child><child>more text</child><child1>REMOVED</child1></root>";
    //Ordering of the generic child nodes is not changed,  but it might
    string target = "<root><child>some text CHANGE</child><child>more text</child><child>ADDITION</child></root>";

    XMLDiffer differ = new XMLDiffer();

    //Act
    var diffview = differ.GetChangeHtml(source, target);

    //Assert
    Assert.IsNotNull(diffview);
}

Produces following (html and table elements added): https://pste.eu/p/Fm7Z.html

More info about library: http://msdn2.microsoft.com/en-us/library/aa302294.aspx

Nuget link for references: https://www.nuget.org/packages/XMLDiffPatch/

回答1:

I ended up implementing following classes to get changes:

public class XMLComparer : IEqualityComparer<XNode>
{
    public bool Equals(XNode e1, XNode e2)
    {
        if (!(e1 is XElement)) return true;
        if (!(e2 is XElement)) return false;
        var el1 = e1 as XElement;
        var el2 = e2 as XElement;
        return Tuple.Create(el1.Name, el1.Value).Equals(Tuple.Create(el2.Name, el2.Value));
    }

    public int GetHashCode(XNode n)
    {
        if (!(n is XElement)) return 0;
        var el = n as XElement;
        return Tuple.Create(el.Name, el.Value).GetHashCode();
    }

}

public class XMLDifference
{
    public bool IsNew { get; set; }
    public XElement Node { get; set; }
}

public class XMLDifferenceComparer
{
    public List<XMLDifference> GetDifferences(string original, string changed)
    {
        List<XMLDifference> ret = new List<XMLDifference>();
        var originalDoc = XDocument.Parse(original);
        var changedDoc = XDocument.Parse(changed);
        //Get differences that are present in new xml version
        var differences = changedDoc.Root.Descendants().Except(originalDoc.Root.Descendants(), new XMLComparer());
        ret.AddRange(GetList(differences, true));
        //Get differences that have changed since the old xml version
        var oldValues = originalDoc.Root.Descendants().Except(changedDoc.Root.Descendants(), new XMLComparer());
        ret.AddRange(GetList(oldValues, false));
        return ret;
    }

    private List<XMLDifference> GetList(IEnumerable<XNode> nodes, bool isNew)
    {
        List<XMLDifference> ret = new List<XMLDifference>();
        foreach (XNode d in nodes)
        {
            var diff = new XMLDifference();
            diff.IsNew = isNew;

            var el = d as XElement;
            diff.Node = el;
            ret.Add(diff);
        }
        return ret;
    }
}

This can recognize changes but is not element specific, it cannot map which element exactly was changed and how, caused by lack of unique identifiers for each element.

The main idea for this solution came from here: https://gist.github.com/krcourville/6933451