Before I get jumped on by people saying the XML parser shouldn’t care if the elements are empty or self-closed, there is a reason why I can’t allow self-closed XML elements. The reason is that I’m actually working with SGML not XML and the SGML DTD I’m working with is very strict and doesn't allow it.
What I have is several thousand SGML files which I’ve needed to run XSLT on. I’ve therefore had to convert the SGML to XML temporarily in order to apply the XSLT. I’ve then written a method that converts them back to SGML (essentially just replacing the XML declaration with the SGML declaration and writing back any other entity declarations such as graphic entities).
My problem is that after this conversion back to SGML, when I open the files in my SGML editor, the files don’t parse as the empty elements have been self-closed.
Does anybody know how I can stop this happening please when using XmlDocument?
The methods that convert the SGML to XML and back again are shown below
//converts the SGML file to XML – it’s during this conversion that the
//empty elements get self-closed, i think
private XmlDocument convertToXML(TextReader reader)
{
// setup SgmlReader
Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader();
//sgmlReader.DocType = "HTML";
sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
sgmlReader.InputStream = reader;
// create document
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.Load(sgmlReader);
return doc;
}
// method to apply the XSLT stylesheet to the XML document
private void filterApplic(string applicFilter)
{
string stylesheet = getRequiredStylesheet(); // do this just once
if (stylesheet != "")
{
foreach (string file in FilesToConvert)
{
fileName = Path.GetFileName(file); //gets just the file name from the path
fileNameNoExt = Path.GetFileNameWithoutExtension(file);
string ext = Path.GetExtension(file);
if (ext == ".sgm")
{
try
{
publicIdentifier = getDoctype(file); // gets the sgml declaration
entitiesList = getEntitites(file); // gets the list of entities
TextReader tr = new StreamReader(file);
myDoc = convertToXML(tr);
myDoc.Save(outputFolder + "\\temp.xml");
var myXslTrans = new XslCompiledTransform();
myXslTrans.Load(stylesheet);
myXslTrans.Transform(outputFolder + "\\temp.xml", Path.Combine(outputFolder, fileNameNoExt +".xml"));
XmlDocument convertedDoc = new XmlDocument();
convertedDoc.Load(Path.Combine(outputFolder, fileNameNoExt + ".xml"));
convertToSGM(convertedDoc);
filesTransformed++;
}
catch (Exception e)
{
MessageBox.Show(e.ToString());
}
}
}
}
else
{
MessageBox.Show("The stylesheet was retured empty. Cannot perform Applicability filter.");
return;
}
MessageBox.Show("Complete! " + filesTransformed.ToString() + " files filtered for " + applicFilter);
}
//convert files back to SGML
private void convertToSGM(XmlDocument myDoc)
{
using (var stringWriter = new StringWriter())
using (var xmlTextWriter = XmlWriter.Create(stringWriter, settings))
{
myDoc.WriteTo(xmlTextWriter);
xmlTextWriter.Flush();
string xmltext = stringWriter.GetStringBuilder().ToString();
xmltext = xmltext.Replace("<?xml version=\"1.0\" encoding=\"utf-16\"?>", "<!DOCTYPE DMODULE " + publicIdentifier + ">");
xmltext = xmltext.Replace("<?xml version=\"1.0\" encoding=\"utf-8\"?>", "<!DOCTYPE DMODULE " + publicIdentifier + ">");
if (entitiesList.Count != 0)
{
string entityListAsOne = "";
foreach (string entity in entitiesList)
{
entityListAsOne = entityListAsOne + "\r\n" + entity;
}
xmltext = xmltext.Replace("//EN\">", "//EN\" [" + entityListAsOne + "]>");
}
File.WriteAllText(Path.Combine(outputFolder, fileNameNoExt + ".sgm"), xmltext);
}
}