I tried to scrap the page using HtmlAgilityPack.dll but some url get into the function, I got the error and I can't catch it in try-catch block. So can anyone help me out?
Error: An unhandled exception of type 'System.StackOverflowException' occurred in HtmlAgilityPack.dll
public void HtmlLoad(string url)
{
try
{
HttpWebRequest myHttpWebRequest = null; //Declare an HTTP-specific implementation of the WebRequest class.
HttpWebResponse myHttpWebResponse = null; //Declare an HTTP-specific implementation of the WebResponse class
//Create Request //
myHttpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url);
myHttpWebRequest.Method = "GET";
myHttpWebRequest.ContentType = "text/html; encoding='utf-8'";
//Get Response
myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
Stream data = myHttpWebResponse.GetResponseStream();//client.OpenRead(url);
doc.Load(data);
data.Close();
}
catch (Exception ex) { throw ex; }
}
You can try this clean
public static async Task<int> HtmlLoadAsync(string url/*, bool addUserAgent = false*/)
{
try
{
var client = new HttpClient();
//if (addUserAgent) OPTIONAL
//{
// client.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent);
//}
//client.Timeout = TimeOut;
var response = client.GetStringAsync(url);
var urlContents = await response;
var document = new HtmlAgilityPack.HtmlDocument();
document.LoadHtml(urlContents);
// process document now
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
return 0;
}
Now call it
private async void Process()
{
await HtmlLoadAsync("http://....");
}