J'essaie d'analyser une table qui ressemble à ceci:
<table><tbody>
<tr><th a href=""></th><th></th></tr>
<tr><td class="v"></td><td class="d"></td><td class="h"></td><td class="a"> </td><td class="o"></td><td class="o"></td><td class="o"></td><td class="p"><table class="p" title="ttt"></table></td></tr>
<tr><td class="v"></td><td class="d"></td><td class="h"></td><td class="a"> </td><td class="o"></td><td class="o"></td><td class="o"></td><td class="p"><table class="p" title="eee"></table></td></tr>
<tr><td class="v"></td><td class="d"></td><td class="h"></td><td class="a"> </td><td class="o"></td><td class="o"></td><td class="o"></td><td class="p"><table class="p" title="rtr"></table></td></tr>
<tr><th a href=""></th><th></th></tr>
<tr><td class="v"></td><td class="d"></td><td class="h"></td><td class="a"> </td><td class="o"></td><td class="o"></td><td class="o"></td><td class="p"><table class="p" title="ouu"></table></td></tr>
<tr><td class="v"></td><td class="d"></td><td class="h"></td><td class="a"> </td><td class="o"></td><td class="o"></td><td class="o"></td><td class="p"><table class="p" title="teee"></table></td></tr>
</tbody></table>
Et j'utilise ce code dans ASP.net pour obtenir les cellules de chaque ligne que je veux:
var getHtmlWeb = new HtmlWeb();
var document = getHtmlWeb.Load(txtbox.Text);
//get tables
foreach (HtmlNode table in document.DocumentNode.SelectNodes("//table"))
{
//get each table row
foreach (HtmlNode row in table.SelectNodes("tr"))
{
Outputlabel.Text += "row: <br />";
//get table head tags that have a link, get the Inner text
if((row.SelectSingleNode("//th//a").InnerText) != null)
{
Outputlabel.Text += row.SelectSingleNode("//th//a").InnerText + "<br />";
}
// get the cells with the classes I want
string d = row.SelectSingleNode("//td[@class='d']").InnerText;
Outputlabel.Text += row.SelectSingleNode("//td[@class='d']").InnerText + " ";
string h = row.SelectSingleNode("//td[@class='h']").InnerText;
Outputlabel.Text += row.SelectSingleNode("//td[@class='h']").InnerText + " ";
string a = row.SelectSingleNode("//td[@class='a']").InnerText;
Outputlabel.Text += row.SelectSingleNode("//td[@class='a']").InnerText + " ";
string op = "";
//there are 3 classes in each row to have the class="o"
if (row.SelectNodes("//td[@class='o']") != null)
{
foreach (HtmlNode o in row.SelectNodes("//td[@class='o']"))
{
op += o.InnerText;
}
Outputlabel.Text += op + " ";
}
var pr = row.SelectSingleNode("//td//table[@class='p']");
string pr = probability.Attributes["title"].Value;
Outputlabel.Text += pr + "<br />";
}
}
Je ne reçois que la première ligne de la première table et cela se répète plusieurs fois ... et je ne reçois pas la classe "o" ni le titre de la table avec la classe "p" dans la balise td avec la classe "p"
Cela semble fonctionner de cette façon pour le fichier HTML en ligne:
HtmlWeb getHtmlWeb = new HtmlWeb();
HtmlDocument doc = getHtmlWeb.Load(txtbox.Text);
string d = "//td[@class='d']";
string h = "//td[@class='h']";
string a = "//td[@class='a']";
string p = "//table[@class='p']";
HtmlNodeCollection ds = doc.DocumentNode.SelectNodes(d);
HtmlNodeCollection hs = doc.DocumentNode.SelectNodes(h);
HtmlNodeCollection as = doc.DocumentNode.SelectNodes(a);
HtmlNodeCollection ps = doc.DocumentNode.SelectNodes(p);
foreach (HtmlNode n in ds)
{
Outputlabel.Text += n.InnerHtml + "<br />";
}
foreach (HtmlNode h in hs)
{
Outputlabel.Text += h.InnerHtml + "<br />";
}
foreach (HtmlNode a in as)
{
Outputlabel.Text += a.Attributes["href"].Value + "<br />";
}
foreach (HtmlNode p in ps)
{
Outputlabel.Text += p.Attributes["title"].Value + "<br />";
}