I need some assistance converting an xml document to a CSV file using an xslt stylesheet. I am trying to use the following xsl and I can't seem to get it right. I want my comma delimited file to include column headings, followed by the data. My biggest issues are removing the final comma after the last item and inserting a carriage return so each group of data appears on a separate line. I have been using XML Notepad.
<xsl:template match="/">
<xsl:element name="table">
<xsl:apply-templates select="/*/*[1]" mode="header" />
<xsl:apply-templates select="/*/*" mode="row" />
</xsl:element>
</xsl:template>
<xsl:template match="*" mode="header">
<xsl:element name="tr">
<xsl:apply-templates select="./*" mode="column" />
</xsl:element>
</xsl:template>
<xsl:template match="*" mode="row">
<xsl:element name="tr">
<xsl:apply-templates select="./*" mode="node" />
</xsl:element>
</xsl:template>
<xsl:template match="*" mode="column">
<xsl:element name="th">
<xsl:value-of select="translate(name(.),'qwertyuiopasdfghjklzxcvbnm_','QWERTYUIOPASDFGHJKLZXCVBNM ')" />
</xsl:element>,
</xsl:template>
<xsl:template match="*" mode="node">
<xsl:element name="td">
<xsl:value-of select="." />
</xsl:element>,
</xsl:template>
I use this to simple XSLT to convert XML to CSV; it assumes all child nodes of the root node are to be rows in the CSV, taking the element names of the first child of the root to be field names.
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text"/>
<xsl:template match="/">
<xsl:for-each select="*/*[1]/*">
<xsl:value-of select="name()" />
<xsl:if test="not(position() = last())">,</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
<xsl:apply-templates select="*/*" mode="row"/>
</xsl:template>
<xsl:template match="*" mode="row">
<xsl:apply-templates select="*" mode="data" />
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="*" mode="data">
<xsl:choose>
<xsl:when test="contains(text(),',')">
<xsl:text>"</xsl:text>
<xsl:call-template name="doublequotes">
<xsl:with-param name="text" select="text()" />
</xsl:call-template>
<xsl:text>"</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="." />
</xsl:otherwise>
</xsl:choose>
<xsl:if test="position() != last()">,</xsl:if>
</xsl:template>
<xsl:template name="doublequotes">
<xsl:param name="text" />
<xsl:choose>
<xsl:when test="contains($text,'"')">
<xsl:value-of select="concat(substring-before($text,'"'),'""')" />
<xsl:call-template name="doublequotes">
<xsl:with-param name="text" select="substring-after($text,'"')" />
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$text" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
So this XML:
<csv>
<row>
<field1>foo</field1>
<field2>ba"r</field2>
</row>
<row>
<field1>foo,2</field1>
<field2>bar,"2</field2>
</row>
</csv>
Converts to:
field1,field2
foo,ba"r
"foo,2","bar,""2"
Not sure if this helps though, it depends how your XML is laid out.
Edit: Here's a more thorough transform:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:key name="field" match="/*/*/*" use="name()" />
<xsl:output method="text"/>
<xsl:template match="/">
<xsl:for-each select="*/*/*[generate-id() = generate-id(key('field',name())[1])]">
<xsl:value-of select="name()" />
<xsl:if test="position() != last()">,</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
<xsl:apply-templates select="*/*" mode="row"/>
</xsl:template>
<xsl:template match="*" mode="row">
<xsl:variable name="row" select="*" />
<xsl:for-each select="/*/*/*[generate-id() = generate-id(key('field',name())[1])]">
<xsl:variable name="name" select="name()" />
<xsl:apply-templates select="$row[name()=$name]" mode="data" />
<xsl:if test="position() != last()">,</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="*" mode="data">
<xsl:choose>
<xsl:when test="contains(text(),',')">
<xsl:text>"</xsl:text>
<xsl:call-template name="doublequotes">
<xsl:with-param name="text" select="text()" />
</xsl:call-template>
<xsl:text>"</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="." />
</xsl:otherwise>
</xsl:choose>
<xsl:if test="position() != last()">,</xsl:if>
</xsl:template>
<xsl:template name="doublequotes">
<xsl:param name="text" />
<xsl:choose>
<xsl:when test="contains($text,'"')">
<xsl:value-of select="concat(substring-before($text,'"'),'""')" />
<xsl:call-template name="doublequotes">
<xsl:with-param name="text" select="substring-after($text,'"')" />
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$text" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
This one will create a column in your CSV for all tag names that exist in all 'rows', and populate the appropriate column in each row.
I was using the "more thorough transform" of the xslt from @flynn1179 's answer.. but found that it was taking an extraordinary amount of time when the number of rows increased. (this is using oracle dbms_xmlgen xslt transform )
I found that because I know what my 2 levels of data are called (ROWSET and ROW in this case) i was able to optimize the xslt and gain a significant performance increase.
My modified version (specifically aimed at oracle's dbms_xmlgen package) is below. it also includes bits from other csv transforms I found around the internet.
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="utf-8"/>
<xsl:strip-space elements="*"/>
<!-- some variables for unprintable charcaters -->
<xsl:variable name="CRLF">
<xsl:text> </xsl:text>
</xsl:variable>
<xsl:variable name="CR">
<xsl:text> </xsl:text>
</xsl:variable>
<xsl:variable name="LF">
<xsl:text> </xsl:text>
</xsl:variable>
<xsl:variable name="apos">'</xsl:variable>
<xsl:template match="/ROWSET">
<xsl:for-each select="ROW[1]/*">
<xsl:value-of select="local-name()" />
<xsl:if test="position() != last()">,</xsl:if>
</xsl:for-each>
<xsl:value-of select="$LF"/>
<xsl:apply-templates />
</xsl:template>
<xsl:template match="ROW">
<xsl:apply-templates />
<xsl:value-of select="$LF"/>
</xsl:template>
<xsl:template match="ROW/*">
<xsl:choose>
<xsl:when test="contains( text(), ',' ) or
contains( text(), $apos ) or
contains( text(), $CRLF ) or
contains( text(), $CR ) or
contains( text(), $LF )">
<!-- Field contains a comma, apostrophe and/or a linefeed, so quote -->
<xsl:text>"</xsl:text>
<xsl:call-template name="doublequotes">
<xsl:with-param name="text" select="text()" />
</xsl:call-template>
<xsl:text>"</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="." />
</xsl:otherwise>
</xsl:choose>
<xsl:if test="position() != last()">,</xsl:if>
</xsl:template>
<xsl:template name="doublequotes">
<xsl:param name="text" />
<xsl:choose>
<xsl:when test="contains($text,'"')">
<!-- recursive call -->
<xsl:value-of select="concat(substring-before($text,'"'),'""')" />
<xsl:call-template name="doublequotes">
<xsl:with-param name="text" select="substring-after($text,'"')" />
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$text" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>