I have a web-crawler developed using webbrowser control. Sometimes it works fine – it reaches login page home page catalog menu page product detail page come back to catalog menu page. But at certain times, it just reaches upto the home page, afterwards it gives a blank page. It doesn’t get the required element ids.
I tried various suggestions as given below… They didn’t help. The interesting fact is this issue is an intermittent issue. Any thoughts what need to be done to overcome this intermittent issue?
Note: I won’t be able to share the url of the site since it is an intranet site.
REFERENCES
- Getting HTML body content in WinForms WebBrowser after body onload event executes
- Webbrowser Control Limitations
- Why is the Webbrowser control DocumentComplete event fired for top level frame first?
- WebBrowser control = wait for page load to complete in an Assembly
CODE
public partial class Form1 : Form
{
private System.Windows.Forms.WebBrowser wb = null;
private ListBox listBox1 = null;
List<string> visitedUrls = new List<string>();
List<string> visitedProducts = new List<string>();
bool isFirstPage = true;
string clickType = String.Empty;
bool isUnvisitedProductExist = true;
private void ExerciseApp(object sender, EventArgs e)
{
#region Listbox Data Filling
if (listBox1.Items.Count == 0)
{
listBox1.Items.Add("Start--" + DateTime.Now.ToString());
}
else
{
if (listBox1.Items.Count == 2)
{
listBox1.Items.RemoveAt(1);
}
listBox1.Items.Add("Now--" + DateTime.Now.ToString());
}
#endregion
WriteLogFunction(" -----------------------------------------------");
#region Login
//Check whether login page
if (isFirstPage)
{
HtmlElement logonId = this.wb.Document.GetElementById("logonId");
HtmlElement password = this.wb.Document.GetElementById("logonPassword");
HtmlElement btnLogin = this.wb.Document.GetElementById("WC_AccountDisplay_links_2");
if (logonId != null && password != null && btnLogin != null)
{
logonId.InnerText = ConfigValues.userName;
password.InnerText = ConfigValues.passwordText;
isFirstPage = false;
//Call click for login
btnLogin.InvokeMember("click");
}
}
#endregion
bool isClickCalled = false;
#region Specific Product Details
int catalogElementIterationCounter = 0;
var elementsToConsider = wb.Document.All;
bool isMenuPage = false;
foreach (HtmlElement e1 in elementsToConsider)
{
catalogElementIterationCounter++;
string x = e1.TagName;
String idStr = e1.GetAttribute("id");
if (!String.IsNullOrWhiteSpace(idStr))
{
//Each Product Navigation
if (idStr.Contains("catalogEntry_img"))
{
isMenuPage = true;
string productUrl = e1.GetAttribute("href");
if (!visitedProducts.Contains(productUrl))
{
WriteLogFunction("p__" + productUrl);
isUnvisitedProductExist = true;
visitedProducts.Add(productUrl);
isClickCalled = true;
clickType = "Product";
e1.InvokeMember("Click");
break;
}
}
}
if (isMenuPage)
{
//Even after traversing the page, there is no unvisited product pending.
//So good to go for next page
if (catalogElementIterationCounter == elementsToConsider.Count - 1)
{
isUnvisitedProductExist = false;
}
}
}
#endregion
#region Menu Page
if (!isClickCalled)
{
#region Time Delay
try
{
DateTime start = DateTime.Now;
if (!wb.IsDisposed)
{
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
System.Windows.Forms.Application.DoEvents();
if (wb.IsDisposed || DateTime.Now.Subtract(start).TotalSeconds > 2)
{
// Time limit break and dispose break
break;
}
}
}
}
catch (Exception ex)
{
WriteLogFunction(ex.Message);
//Supress the exception
}
#endregion
int menuPageIterationCounter = 0;
bool isMatchFound = false;
WriteLogFunction("Count-" + wb.Document.All.Count);
var elementsInMenuPage = wb.Document.All;
foreach (HtmlElement e1 in elementsInMenuPage)
{
menuPageIterationCounter++;
string x = e1.TagName;
String idStr = e1.GetAttribute("id");
WriteLogFunction("Before--"+idStr);
#region time Delay
try
{
DateTime start = DateTime.Now;
if (!wb.IsDisposed)
{
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
System.Windows.Forms.Application.DoEvents();
if (wb.IsDisposed || DateTime.Now.Subtract(start).TotalSeconds > 50)
{
// Time limit break and dispose break
break;
}
}
}
}
catch (Exception ex)
{
WriteLogFunction(ex.Message);
//Supress the exception
}
#endregion
WriteLogFunction("After--" + idStr);
//Main Menu Item Navigation
if (idStr.Contains("WC_CachedHeaderDisplay_links"))
{
WriteLogFunction("*******INSIDE");
string url = e1.GetAttribute("href");
string latestUrl = String.Empty;
if (visitedUrls.Count > 0)
{
latestUrl = visitedUrls[visitedUrls.Count - 1];
}
WriteLogFunction("L__" + latestUrl);
WriteLogFunction("isUnvisitedProductExist__" + isUnvisitedProductExist.ToString());
if (visitedUrls.Contains(url) && isUnvisitedProductExist)
{
if (latestUrl == url)
{
isMatchFound = true;
clickType = "Menu";
WriteLogFunction("u1__" + url);
e1.InvokeMember("Click");
break;
}
}
else if (!visitedUrls.Contains(url))
{
isMatchFound = true;
//Reset visited Products
visitedProducts = new List<string>();
visitedUrls.Add(url);
clickType = "Menu";
WriteLogFunction("u2__" + url);
e1.InvokeMember("Click");
break;
}
if (!isMatchFound && (menuPageIterationCounter == elementsInMenuPage.Count - 1))
{
//wb.Navigate(websiteUrl);
//Application.Exit();
//Environment.Exit(0);
}
}
}
}
#endregion
}
public Form1()
{
// listBox1
listBox1 = new ListBox();
listBox1.Location = new Point(10, 10);
listBox1.Size = new Size(500, 50);
this.Controls.Add(listBox1);
// Web Browser
wb = new WebBrowser();
wb.Location = new Point(10, 80);
wb.Size = new Size(900, 900);
//wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(ExerciseApp);
wb.ScriptErrorsSuppressed = true;
wb.Url = new Uri(ConfigValues.websiteUrl);
// Form1
this.Text = "Web Browser Test";
this.Size = new Size(950, 950);
this.Controls.Add(wb);
this.Load += Form1_Load;
}
private void Form1_Load(object sender, EventArgs e)
{
this.wb.DocumentCompleted += delegate
{
// DocumentCompleted is fired before window.onload and body.onload
this.wb.Document.Window.AttachEventHandler("onload", delegate
{
// Defer this to make sure all possible onload event handlers got fired
System.Threading.SynchronizationContext.Current.Post(delegate
{
MessageBox.Show("window.onload was fired, can access DOM!");
ExerciseApp(null, null);
}, null);
});
};
this.wb.Navigate(ConfigValues.websiteUrl);
}
private void WriteLogFunction(string strMessage)
{
using (StreamWriter w = File.AppendText("log.txt"))
{
w.WriteLine("\r\n{0} {1} ", DateTime.Now.ToLongTimeString(), strMessage);
}
}
}