using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
namespace WebbrowserTest
{
public partial class WebBrowserSample : Form
{
public WebBrowserSample()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
WebBrowser web = new WebBrowser();
web.NewWindow += new CancelEventHandler(web_NewWindow);
web.Navigate("http://www.cnn.com");
//wait for the browser object to load the page
while (web.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
}
HtmlElementCollection ht = web.Document.Links;
List
foreach (HtmlElement h in ht)
{
try
{
if ((h.GetAttribute("href") != null) )
{
urls.Add(h.GetAttribute("href"));
}
}
catch
{ }
}
foreach (string url in urls)
{
Console.WriteLine(url);
}
}
void web_NewWindow(object sender, CancelEventArgs e)
{
e.Cancel = true;
}
}
}
The 'NewWindow' event is used to discard popups which may open while navigating a web page. The 'Document.Links' property of the WebBrowser is used to get the HtmlCollection of all of the links present in a webpage. Once you have the HtmlCollection of links, the links could be processes/viewed using 'href' attribute of each 'HtmlElement' in the 'HtmlCollection'.
Even though this is a pretty straight forward code, I found it a little slower. I think the reason is that you have to make sure that the webbrowser object has read the entire webpage before you can start using it (achieved by the while '(web.ReadyState != WebBrowserReadyState.Complete)' condition).
Also if you are thinking about link processing in a non-Window Forms application, then this program is not for you. For all the Window Forms application developers, enjoy the code :)