htmlAgilityPack Load()方法什麼都不做

c# html-agility-pack winforms

我正在使用htmlAgilityPack來獲取網站的內容:

private String getImageUrl(String websiteUrl)
{
    HtmlAgilityPack.HtmlDocument docHtml = new HtmlWeb().Load(websiteUrl);
    // ...
}

我沒有到達第二行的斷點,第一行沒有拋出異常。該URL已存在且可用。

docHtml也不是null,該行似乎沒有執行,沒有殺死我的線程。

什麼可以使這種情況發生/如何獲得有關正在發生的事情的更多信息?

編輯:從一個類調用該函數,該類由我的main-form實例化。調用類運行一個線程。對於我的類的第一個實例,它正在工作,但不適用於第二個實例。

class Image
{
    BackgroundWorker downloadWorker = new BackgroundWorker();

    private String fileName;
    private String directory;
    private String url;

    RichTextBox rtxtStatus;


    public Image(String _fileName, String _directory, String _url)
    {
        fileName = _fileName;
        directory = _directory;
        url = _url;

        downloadWorker.WorkerReportsProgress = true;

        downloadWorker.WorkerSupportsCancellation = true;

        downloadWorker.DoWork += new DoWorkEventHandler(worker_doWork);
        downloadWorker.ProgressChanged += new ProgressChangedEventHandler(worker_progressChanged);
        downloadWorker.RunWorkerCompleted += new RunWorkerCompletedEventHandler(worker_runWorkerCompleted);
    }

    private void worker_doWork(object sender, DoWorkEventArgs e)
    {
        download();
    }

    private void download()
    {
        WebClient downloadClient = new WebClient();

        if (!Directory.Exists(directory))
        {
            MessageBox.Show("Directory to save image not found.");
        }
        else
        {
            HttpWebRequest HttpReq = (HttpWebRequest)WebRequest.Create(url);

            HttpWebResponse response;
            try
            {
                response = (HttpWebResponse)HttpReq.GetResponse();
            }
            catch (WebException ex)
            {
                response = (HttpWebResponse)ex.Response;
            }

            if (response.StatusCode == HttpStatusCode.OK)
            {
                string image = getImageUrl(url);

                // Replace HTML-Characters
                image = WebUtility.HtmlDecode(image);
                string saveName = directory + @"\" + fileName + ".png";
            }
            try
            {
                downloadClient.DownloadFile(image, saveName);
            }
            catch (Exception)
            {
                MessageBox.Show("Error while downloading");
            }                                       
        }
    }

    private void worker_progressChanged(object sender, ProgressChangedEventArgs e)
    {
        // Nothing to do
    }

    private void worker_runWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
    {
        if (e.Error != null)
        {
            MessageBox.Show("Download cancelled, please check URL.");
        }
        rtxtStatus.AppendText("\nDownload finished.");
    }



    Properties.Settings.Default.SaveFileLocation = directory;
    Properties.Settings.Default.Save();
}

// this method is called from outside on image-object.
public void downloadImage(RichTextBox _rtxtStatus)
{
    rtxtStatus = _rtxtStatus;
    if (!downloadWorker.IsBusy)
    {
        downloadWorker.RunWorkerAsync();
    }
    else
    {
        MessageBox.Show("Download already running.");
    }
}
private String getImageUrl(String websiteUrl)
{
    HtmlAgilityPack.HtmlDocument docHtml = new HtmlWeb().Load(websiteUrl);
    var nodes = docHtml.DocumentNode.SelectNodes("//img");
    return nodes[0].Attributes["src"].Value;
}

熱門答案

也許您嘗試訪問的網站需要啟用Cookie。將CookieContainer添加到HtmlWeb對象並嘗試Load方法。

HtmlWeb htmlWeb = new HtmlWeb();
htmlWeb.PreRequest += request =>
    {
        request.CookieContainer = new System.Net.CookieContainer();
        return true;
    };
var htmlDoc = htmlWeb.Load(yourUrl);


許可下: CC-BY-SA with attribution
不隸屬於 Stack Overflow
這個KB合法嗎? 是的,了解原因
許可下: CC-BY-SA with attribution
不隸屬於 Stack Overflow
這個KB合法嗎? 是的,了解原因