I'm working on a project where I need to parse a lot of html files. I need to get every <p>
from within one <div class="story-body">
So far I have this code and it does what I want, but I was wondering how to do this using the xpath expression. I tried this:
textBody.SelectNodes ("What to put here? I tried //p but it gives every p in document not inside the one div")
But without success. Any ideas?
public void Parse(){
HtmlNode title = doc.DocumentNode.SelectSingleNode ("//h1[(@class='story-header')]");
HtmlNode textBody = doc.DocumentNode.SelectSingleNode ("//div[(@class='story-body')]");
XmlText textT;
XmlText textS;
string story = "";
if(title != null){
textT = xmlDoc.CreateTextNode(title.InnerText);
titleElement.AppendChild(textT);
Console.WriteLine(title.InnerText);
}
foreach (HtmlNode node in textBody.ChildNodes) {
if(node.Name == "p" || (node.Name == "span" && node.GetAttributeValue("class", "class") == "cross-head")){
story += node.InnerText + "\n\n";
Console.WriteLine(node.InnerText);
}
}
textS = xmlDoc.CreateTextNode (story);
storyElement.AppendChild (textS);
try
{
xmlDoc.Save("test.xml");
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
}