나는 이미 작동하지만 내게 내가 원하는 올바른 링크를주지 않는 내 코드 (내가 배울 수 있도록 도와 줘서 고마워)를 업데이 트했습니다.
여기 내 작업 코드입니다 :
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
Dim webClient As New System.Net.WebClient
Dim WebSource As String = webClient.DownloadString("http://www.google.com.ph/search?hl=en&as_q=test&as_epq=&as_oq=&as_eq=&as_nlo=&as_nhi=&lr=&cr=countryCA&as_qdr=all&as_sitesearch=&as_occt=any&safe=images&tbs=ctr%3AcountryCA&as_filetype=&as_rights=#as_qdr=all&cr=countryCA&fp=1&hl=en&lr=&q=test&start=20&tbs=ctr:countryCA")
Dim doc = New HtmlAgilityPack.HtmlDocument()
doc.LoadHtml(WebSource)
Dim links = GetLinks(doc, "test")
For Each Link In links
ListBox1.Items.Add(Link.ToString())
Next
End Sub
Public Class Link
Public Sub New(Uri As Uri, Text As String)
Me.Uri = Uri
Me.Text = Text
End Sub
Public Property Text As String
Public Property Uri As Uri
Public Overrides Function ToString() As String
Return String.Format(If(Uri Is Nothing, "", Uri.ToString()))
End Function
End Class
Public Function GetLinks(doc As HtmlAgilityPack.HtmlDocument, linkContains As String) As List(Of Link)
Dim uri As Uri = Nothing
Dim linksOnPage = From link In doc.DocumentNode.Descendants()
Where link.Name = "a" _
AndAlso link.Attributes("href") IsNot Nothing _
Let text = link.InnerText.Trim()
Let url = link.Attributes("href").Value
Where url.IndexOf(linkContains, StringComparison.OrdinalIgnoreCase) >= 0 _
AndAlso uri.TryCreate(url, UriKind.Absolute, uri)
Dim Uris As New List(Of Link)()
For Each link In linksOnPage
Uris.Add(New Link(New Uri(link.url, UriKind.Absolute), link.text))
Next
Return Uris
End Function
나의 주요 목표 :
샘플 링크 : http://www.google.com.ph/search?hl=en&as_q=test&as_epq=&as_oq=&as_eq=&as_nlo=&as_nhi=&lr=&cr=countryCA&as_qdr=all&as_sitesearch=&as_occt=any&safe=images&tbs=ctr%3AcountryCA&as_filetype=&as_rights=#as_qdr=all&cr=countryCA&fp=1&hl=en&lr=&q=test&start=20&tbs=ctr:countryCA
"test"라는 단어가 포함 된 예상 링크 출력 :
www.copetest.com/‎
www.testofhumanity.com/
www3.algonquincollege.com/testcentre/‎
www.lpitest.ca/‎
testtube.nfb.ca/‎
www.ieltscanada.ca/testdates.jsp‎
https://www.awinfosys.com/eassessment/fsa_fieldtest.htm‎
당신은 대신 속성 href
를 사용하고, .NET은 기본적으로 대소 문자를 구분합니다.
For Each link As HtmlNode In htmlDoc.DocumentNode.SelectNodes("//a[@href]")
Dim href = link.Attributes("href").Value
If href.IndexOf("test", StringComparison.OrdinalIgnoreCase) >= 0 Then
ListBox1.Items.Add(href)
' or
ListBox1.Items.Add(link.InnerText)
End If
Next
다음은 문서의 모든 링크를 List(Of Link)
로 반환하는 메서드입니다. Link
는 텍스트와 Uri
대한 두 개의 perties가있는 맞춤 클래스입니다.
Public Class Link
Public Sub New(Uri As Uri, Text As String)
Me.Uri = Uri
Me.Text = Text
End Sub
Public Property Text As String
Public Property Uri As Uri
Public Overrides Function ToString() As String
Return String.Format("{0} [{1}]", Text, If(Uri Is Nothing, "", Uri.ToString()))
End Function
End Class
Public Function GetLinks(doc As HtmlAgilityPack.HtmlDocument) As List(Of Link)
Dim uri As Uri = Nothing
Dim linksOnPage = From link In doc.DocumentNode.Descendants()
Where link.Name = "a" _
AndAlso link.Attributes("href") IsNot Nothing _
Let text = link.InnerText.Trim()
Let url = link.Attributes("href").Value
Where uri.TryCreate(url, UriKind.Absolute, uri)
Dim Uris As New List(Of Link)()
For Each link In linksOnPage
Uris.Add(New Link(New Uri(link.url, UriKind.Absolute), link.text))
Next
Return Uris
End Function
다음은 url에 주어진 텍스트가 포함되어 있는지 확인하는 요청 된 오버로드입니다.
Public Function GetLinks(doc As HtmlAgilityPack.HtmlDocument, linkContains As String) As List(Of Link)
Dim uri As Uri = Nothing
Dim linksOnPage = From link In doc.DocumentNode.Descendants()
Where link.Name = "a" _
AndAlso link.Attributes("href") IsNot Nothing _
Let text = link.InnerText.Trim()
Let url = link.Attributes("href").Value
Where url.IndexOf(linkContains, StringComparison.OrdinalIgnoreCase) >= 0 _
AndAlso uri.TryCreate(url, UriKind.Absolute, uri)
Dim Uris As New List(Of Link)()
For Each link In linksOnPage
Uris.Add(New Link(New Uri(link.url, UriKind.Absolute), link.text))
Next
Return Uris
End Function
지금 편집 , 작동, 다음과 같은 방법으로 사용 :
Dim site = File.ReadAllText("C:\Temp\website_test.htm")
Dim doc = New HtmlAgilityPack.HtmlDocument()
doc.LoadHtml(site)
Dim links = GetLinks(doc)
For Each Link In links
ListBox1.Items.Add(Link.ToString())
Next