I am trying to convert a XML file to CSV file dynamically using Java code. I am able to obtain the data converted to CSV but the problem is my data is having "" and ','.
Here is my sample XML:
<record>
<column name="ID">537316</column>
<column name="TYPE">MANUAL</column>
<column name="SECONDID">546</column>
<column name="INFO">"THIS","IS",FOR,"TEST"</column>
<column name="KEY">345</column>
</record>
Here is the Java code:
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.w3c.dom.Document;
class xmltocsv {
public static void main(String args[]) throws Exception {
File stylesheet = new File("C:/testxsl.xsl");
File xmlSource = new File("C:/test.xml");
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(xmlSource);
StreamSource stylesource = new StreamSource(stylesheet);
Transformer transformer = TransformerFactory.newInstance()
.newTransformer(stylesource);
Source source = new DOMSource(document);
Result outputTarget = new StreamResult(new File("c:/output.csv"));
transformer.transform(source, outputTarget);
}
}
Here is my XSL file:
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/*">
<xsl:for-each select="*[1]/*">
<xsl:text>"</xsl:text>
<xsl:value-of select="@name"/>
<xsl:text>"</xsl:text>
<xsl:if test="position() != last()">,</xsl:if>
<xsl:if test="position() = last()">
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:for-each>
<xsl:apply-templates/>
</xsl:template>
<xsl:output method="text" encoding="iso-8859-1"/>
<xsl:param name="fieldNames" select="'yes'" />
<xsl:strip-space elements="*" />
<xsl:template match="/*/child::*">
<xsl:for-each select="child::*">
<xsl:if test="position() != last()"><xsl:text>"</xsl:text><xsl:value-of Select="normalize-space(.)"/><xsl:text>"</xsl:text>,</xsl:if>
<xsl:if test="position() = last()"><xsl:text>"</xsl:text><xsl:value-of select="normalize-space(.)"/><xsl:text>"</xsl:text><xsl:text>
</xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
The sample output should be:
ID,TYPE,SECONDID,INFO,KEY
"537316","MANUAL","546","THIS"",""IS"",FOR,""TEST""","345"
But the output I am getting is:
ID,TYPE,SECONDID,INFO,KEY\n
"537316","MANUAL","546",""THIS","IS",FOR,"TEST"","345"
The XML I am using is from Database and contains special character(") which is causing unexpected result(As I open the output CSV using MS Excel) in my output CSV.
I need to validate data the for quotes and if there are quotes I has to add extra quotes for getting the desired output.
Could someone please help me with the if condition that I can use in my XSL for validating the string and searching for ("") in the data.
The following stylesheet:
XSLT 1.0
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
<xsl:output method="text" encoding="utf-8"/>
<xsl:template match="/">
<xsl:text>ID,TYPE,SECONDID,INFO,KEY </xsl:text>
<xsl:for-each select="record/column">
<xsl:text>"</xsl:text>
<xsl:call-template name="substitute">
<xsl:with-param name="text" select="."/>
</xsl:call-template>
<xsl:text>"</xsl:text>
<xsl:if test="position()!=last()">
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:template>
<xsl:template name="substitute">
<xsl:param name="text"/>
<xsl:param name="searchString">"</xsl:param>
<xsl:param name="replaceString">""</xsl:param>
<xsl:choose>
<xsl:when test="contains($text,$searchString)">
<xsl:value-of select="substring-before($text,$searchString)"/>
<xsl:value-of select="$replaceString"/>
<xsl:call-template name="substitute">
<xsl:with-param name="text" select="substring-after($text,$searchString)"/>
<xsl:with-param name="searchString" select="$searchString"/>
<xsl:with-param name="replaceString" select="$replaceString"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$text"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
when applied to your example input, will produce the following output:
ID,TYPE,SECONDID,INFO,KEY
"537316","MANUAL","546","""THIS"",""IS"",FOR,""TEST""","345"
which I believe is a correct representation of your input data in CSV format.
class XmlToCsv {
private static void emitHeaders( Node record ){
NodeList fields = record.getChildNodes();
String del = "";
for( int iField = 0; iField < fields.getLength(); iField++ ){
Node node = fields.item( iField );
if( ! ( node instanceof Element ) ) continue;
System.out.print( del );
System.out.print( ((Element)node).getAttribute("name") );
del = ",";
}
System.out.println();
}
private static void emitData( Node record ){
NodeList fields = record.getChildNodes();
String del = "";
for( int iField = 0; iField < fields.getLength(); iField++ ){
Node node = fields.item( iField );
if( ! ( node instanceof Element ) ) continue;
System.out.print( del );
String cont = node.getTextContent();
cont = cont.replaceAll( "\"", "\"\"" );
System.out.print( '"' + cont + '"' );
del = ",";
}
System.out.println();
}
public static void main(String args[]) throws Exception {
File xmlSource = new File("test.xml");
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(xmlSource);
Source source = new DOMSource(document);
Element table = document.getDocumentElement();
NodeList records = table.getElementsByTagName("record");
emitHeaders( records.item( 0 ) );
for( int iRec = 0; iRec < records.getLength(); iRec++ ){
emitData( records.item( iRec ) );
}
}
}
It would be even simpler using JAXB.
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/*">
<xsl:for-each select="*[1]/*">
<xsl:text>"</xsl:text>
<xsl:value-of select="@name"/>
<xsl:text>"</xsl:text>
<xsl:if test="position() != last()">,</xsl:if>
<xsl:if test="position() = last()">
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:for-each>
<xsl:apply-templates/>
</xsl:template>
<xsl:output method="text" encoding="iso-8859-1"/>
<xsl:param name="fieldNames" select="'yes'" />
<xsl:strip-space elements="*" />
<xsl:template match="/*/child::*">
<xsl:for-each select="child::*">
<xsl:if test="position() != last()">
<xsl:text>"</xsl:text>
<xsl:call-template name="substitute">
<xsl:with-param name="text" select="."/>
</xsl:call-template>
<xsl:text>"</xsl:text>
<xsl:text>,</xsl:text>
</xsl:if>
<xsl:if test="position() = last()"><xsl:text>"</xsl:text><xsl:value-of select="normalize-space(.)"/><xsl:text>"</xsl:text><xsl:text>
</xsl:text></xsl:if>
</xsl:for-each>
</xsl:template>
<xsl:template name="substitute">
<xsl:param name="text"/>
<xsl:param name="searchString">"</xsl:param>
<xsl:param name="replaceString">""</xsl:param>
<xsl:choose>
<xsl:when test="contains($text,$searchString)">
<xsl:value-of select="substring-before($text,$searchString)"/>
<xsl:value-of select="$replaceString"/>
<xsl:call-template name="substitute">
<xsl:with-param name="text" select="substring-after($text,$searchString)"/>
<xsl:with-param name="searchString" select="$searchString"/>
<xsl:with-param name="replaceString" select="$replaceString"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$text"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>