I have some html:
<h2>Results</h2>
<div class="box">
<table class="tFormat">
<th>Head</th>
<tr>1</tr>
</table>
</div>
<h2>Grades</h2>
<div class="box">
<table class="tFormat">
<th>Head</th>
<tr>1</tr>
</table>
</div>
I was wondering how would I get the table under "Results"
I've tried:
var nodes = doc.DocumentNode.SelectNodes("//h2");
foreach (var o in nodes)
{
if (o.InnerText.Equals("Results"))
{
foreach (var c in o.SelectNodes("//table"))
{
Console.WriteLine(c.InnerText);
}
}
}
It works but it also gets the table under Grades h2
Note that the div is not hierarchically inside the header, so it doesn't make sense to look for it there.
This can work for you - it finds the next element after the title:
if (o.InnerText.Equals("Results"))
{
var nextDiv = o.NextSibling;
while (nextDiv != null && nextDiv.NodeType != HtmlNodeType.Element)
nextDiv = nextDiv.NextSibling;
// nextDiv should be correct here.
}
You can also write a more specific xpath to find just that div:
doc.DocumentNode.SelectNodes("//h2[text()='Results']/following-sibling::div[1]");
var nodes = doc.DocumentNode.SelectNodes("//h2");
if (nodes.FirstOrDefault()!=null)
{
var o=nodes.FirstOrDefault();
if (o.InnerText.Equals("Results"))
{
foreach (var c in o.SelectNodes("//table"))
{
Console.WriteLine(c.InnerText);
}
}
}