Purpose
The purpose of this project is to demonstrate how to create a simple page crawler; you provide it a URL to crawl and it will retrieve the page for you.
<%@ Import Namespace="System" %>
<%@ Import Namespace="System.IO" %>
<%@ Import Namespace="System.Xml" %>
<%@ Import Namespace="System.Net" %>
<%@ Import Namespace="System.Data" %>
<script runat="server">
Private Sub Page_Load(ByVal sender As System.Object, ByVal e As System.EventArgs)
Dim buffSize As Integer = 2048
Dim crawlOutput As String = Nothing
Dim crawlMethod As String = "GET"
Dim crawlURL As String = "http://www.somesite.com/apage.php"
Dim crawlID As String = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"
Dim myRequest As HttpWebRequest = CType(WebRequest.Create(crawlURL), HttpWebRequest)
myRequest.UserAgent = crawlID
myRequest.Method = crawlMethod
Dim myResponse As HttpWebResponse = CType(myRequest.GetResponse(), HttpWebResponse)
Dim streamResponse As Stream = myResponse.GetResponseStream()
Dim streamRead As New StreamReader(streamResponse)
Dim readBuff(buffSize) As [Char]
Dim lineStep As Integer = streamRead.Read(readBuff, 0, buffSize)
While lineStep > 0
Dim outputData As New [String](readBuff, 0, lineStep)
crawlOutput = crawlOutput & outputData
lineStep = streamRead.Read(readBuff, 0, buffSize)
End While
streamRead.Close()
streamResponse.Close()
myResponse.Close()
Response.Write(crawlOutput)
End Sub
</script>