My question is How to un-escape xml that has already been escaped.
I tried the code provided by Tomalak in response to How to unescape XML characters with help of XSLT?, but I can't get that to do what I want.
I have SoapMsg Xml. The body contains a few elements one of which is a String. This string
contains Escaped XML. This is often done in RPC SoapMsg because they don't allow complex
types. To Get around this they embed Escaped-Xml inside a String Element, see sXmlParameters in the input below.
Example Input:
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:pan="http://wsdl.somebody.com/Stuff/">
<soap:Header />
<soap:Body>
<pan:SomeCommand>
<first>eefbb52a0fee443cbda838caffbc2654</first>
<second>f26eb2f5dabc457ca045e64585f7b185</second>
<sXmlParameters><PARAMETERS><TIMEOUTDATETIME>2011-03-15
2:09:48.997</TIMEOUTDATETIME></PARAMETERS></sXmlParameters>
</pan:SomeCommand>
</soap:Body>
</soap:Envelope>
I also see this data escaped with <![CDATA[>]]>
, I need to un-escape it also.
Converted Output:
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:pan="http://wsdl.somebody.com/Stuff/">
<soap:Header />
<soap:Body>
<pan:SomeCommand>
<first>eefbb52a0fee443cbda838caffbc2654</first>
<second>f26eb2f5dabc457ca045e64585f7b185</second>
<sXmlParameters>
<PARAMETERS>
<TIMEOUTDATETIME>2011-03-15 2:09:48.997</TIMEOUTDATETIME>
</PARAMETERS>
</sXmlParameters>
</pan:SomeCommand>
</soap:Body>
</soap:Envelope>
This will already take care of half of your problem – not the CDATA part:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="//sXmlParameters">
<xsl:copy>
<xsl:call-template name="unescape">
<xsl:with-param name="escaped" select="string(.)"/>
</xsl:call-template>
</xsl:copy>
</xsl:template>
<xsl:template name="unescape">
<xsl:param name="escaped"/>
<xsl:choose>
<xsl:when test="contains($escaped,'<')">
<xsl:variable name="beforeelem" select="substring-before($escaped,'<')"/>
<xsl:variable name="elemname1" select="substring-before(substring-after($escaped,'<'),' ')"/>
<xsl:variable name="elemname2" select="substring-before(substring-after($escaped,'<'),'>')"/>
<xsl:variable name="elemname3" select="substring-before(substring-after($escaped,'<'),'/>')"/>
<xsl:variable name="hasattributes" select="string-length($elemname1) > 0 and ((string-length($elemname2)=0 or string-length($elemname1) < string-length($elemname2)) and (string-length($elemname3)=0 or string-length($elemname1) < string-length($elemname3)))"/>
<xsl:variable name="elemclosed" select="string-length($elemname3) > 0 and (string-length($elemname2)=0 or string-length($elemname3) < string-length($elemname2))"/>
<xsl:variable name="elemname">
<xsl:choose>
<xsl:when test="$hasattributes">
<xsl:value-of select="$elemname1"/>
</xsl:when>
<xsl:when test="not($elemclosed)">
<xsl:value-of select="$elemname2"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$elemname3"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="elemclosetag" select="concat('</',$elemname,'>')"/>
<xsl:variable name="innercontent">
<xsl:if test="not($elemclosed)">
<xsl:call-template name="skipper-before">
<xsl:with-param name="source" select="substring-after(substring-after($escaped,'<'),'>')"/>
<xsl:with-param name="delimiter" select="$elemclosetag"/>
</xsl:call-template>
</xsl:if>
</xsl:variable>
<xsl:variable name="afterelem">
<xsl:choose>
<xsl:when test="not($elemclosed)">
<xsl:call-template name="skipper-after">
<xsl:with-param name="source" select="substring-after(substring-after($escaped,'<'),'>')"/>
<xsl:with-param name="delimiter" select="$elemclosetag"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after(substring-after($escaped,'<'),'/>')"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:element name="{$elemname}">
<xsl:if test="$hasattributes">
<xsl:call-template name="unescapeattributes">
<xsl:with-param name="escapedattributes">
<xsl:choose>
<xsl:when test="not($elemclosed)">
<xsl:value-of select="normalize-space(substring-after($elemname2,' '))"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="normalize-space(substring-after($elemname3,' '))"/>
</xsl:otherwise>
</xsl:choose>
</xsl:with-param>
</xsl:call-template>
</xsl:if>
<xsl:call-template name="unescape">
<xsl:with-param name="escaped" select="$innercontent"/>
</xsl:call-template>
</xsl:element>
<xsl:call-template name="unescape">
<xsl:with-param name="escaped" select="$afterelem"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="unescapetext">
<xsl:with-param name="escapedtext" select="$escaped"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="unescapeattributes">
<xsl:param name="escapedattributes"/>
<xsl:variable name="attrname" select="substring-before($escapedattributes,'=')"/>
<xsl:variable name="attrquote" select="substring($escapedattributes,string-length($attrname)+2,1)"/>
<xsl:variable name="attrvalue" select="substring-before(substring-after($escapedattributes,$attrquote),$attrquote)"/>
<xsl:variable name="afterattr" select="substring-after(substring-after($escapedattributes,$attrquote),$attrquote)"/>
<xsl:attribute name="{$attrname}">
<xsl:call-template name="unescapetext">
<xsl:with-param name="escapedtext" select="$attrvalue"/>
</xsl:call-template>
</xsl:attribute>
<xsl:if test="contains($afterattr,'=')">
<xsl:call-template name="unescapeattributes">
<xsl:with-param name="escapedattributes" select="normalize-space($afterattr)"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="unescapetext">
<xsl:param name="escapedtext"/>
<xsl:call-template name="string-replace-all">
<xsl:with-param name="text">
<xsl:call-template name="string-replace-all">
<xsl:with-param name="text">
<xsl:call-template name="string-replace-all">
<xsl:with-param name="text" select="$escapedtext"/>
<xsl:with-param name="replace">&gt;</xsl:with-param>
<xsl:with-param name="by">></xsl:with-param>
</xsl:call-template>
</xsl:with-param>
<xsl:with-param name="replace">&lt;</xsl:with-param>
<xsl:with-param name="by"><</xsl:with-param>
</xsl:call-template>
</xsl:with-param>
<xsl:with-param name="replace">&amp;</xsl:with-param>
<xsl:with-param name="by">&</xsl:with-param>
</xsl:call-template>
</xsl:template>
<!-- replaces substrings in strings -->
<xsl:template name="string-replace-all">
<xsl:param name="text"/>
<xsl:param name="replace"/>
<xsl:param name="by"/>
<xsl:choose>
<xsl:when test="contains($text, $replace)">
<xsl:value-of select="substring-before($text,$replace)"/>
<xsl:value-of select="$by"/>
<xsl:call-template name="string-replace-all">
<xsl:with-param name="text" select="substring-after($text,$replace)"/>
<xsl:with-param name="replace" select="$replace"/>
<xsl:with-param name="by" select="$by"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$text"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- returns the substring after the last delimiter -->
<xsl:template name="skipper-after">
<xsl:param name="source"/>
<xsl:param name="delimiter"/>
<xsl:choose>
<xsl:when test="contains($source,$delimiter)">
<xsl:call-template name="skipper-after">
<xsl:with-param name="source" select="substring-after($source,$delimiter)"/>
<xsl:with-param name="delimiter" select="$delimiter"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$source"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- returns the substring before the last delimiter -->
<xsl:template name="skipper-before">
<xsl:param name="source"/>
<xsl:param name="delimiter"/>
<xsl:param name="result"/>
<xsl:choose>
<xsl:when test="contains($source,$delimiter)">
<xsl:call-template name="skipper-before">
<xsl:with-param name="source" select="substring-after($source,$delimiter)"/>
<xsl:with-param name="delimiter" select="$delimiter"/>
<xsl:with-param name="result">
<xsl:if test="result!=''">
<xsl:value-of select="concat($result,$delimiter)"/>
</xsl:if>
<xsl:value-of select="substring-before($source,$delimiter)"/>
</xsl:with-param>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$result"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
I found that I can use saxon to do this in a much simpler way using the following:
<xsl:template match="SomeCommand">
<sXmlParameters>
<xsl:apply-templates select="saxon:parse(.)" />
</sXmlParameters>
</xsl:template>
there is also saxon:seriralize() that can be used to escape the xml
thanks to all for you input
Wrote a SAX parser for xml-escaped strings in pure xsl 1.0+EXSLT
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:pxml="https://github.com/ilyakharlamov/pure-xsl/parseStringAsXML"
version="1.0">
<xsl:import href="https://raw.githubusercontent.com/ilyakharlamov/pure-xsl/master/parseStringAsXML.xsl"/>
<xsl:template match="/">
<xsl:call-template name="pxml:parseStringAsXML">
<xsl:with-param name="string"><PARAMETERS><TIMEOUTDATETIME>2011-03-152:09:48.997</TIMEOUTDATETIME></PARAMETERS></xsl:with-param>
</xsl:call-template>
</xsl:template>
</xsl:stylesheet>
Output:
<PARAMETERS>
<TIMEOUTDATETIME>2011-03-152:09:48.997</TIMEOUTDATETIME>
</PARAMETERS>