I need to do the following
I have 2 problems
- I don't know how to select the "Special Hospital" and "All Ambulatory Care Facilities **NOTE #2"
- When I manually select those 2 types and then click on some of the hospitals, the URL doesn't become selection specific.
It becomes http://healthapps.state.nj.us/facilities/acFacilityList.aspx after I select the 2 types, then stays that way when I click on the hospitals.
Therefore, I'm not able to write the code that will scrape those pages because I don't know how to specify the URL for each hospital.
I apologize, this has to be a very basic question but I wasn't able to google anything useful on it for Access VBA
here's the code that pulls data from a page, i didn't do the loops yet, so this is just a basic pull of the source data behind a page
Public Function btnGetWebData_Click()
Dim strURL
Dim HTML_Content As HTMLDocument
Dim dados As Object
'Create HTMLFile Object
Set HTML_Content = New HTMLDocument
'Get the WebPage Content to HTMLFile Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", "http://healthapps.state.nj.us/facilities/acFacilityList.aspx", False
'http://healthapps.state.nj.us/facilities/acFacilityList.aspx
.Send
HTML_Content.Body.innerHTML = .responseText
Debug.Print .responseText
Debug.Print HTML_Content.Body.innerHTML
End With
End Function
It navigates to each result page, and back to homepage in between so as to leverage the postBack links through clicks.
Option Explicit
Public Sub VisitPages()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "http://healthapps.state.nj.us/facilities/acSetSearch.aspx?by=county"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#middleContent_cbType_5").Click
.querySelector("#middleContent_cbType_12").Click
.querySelector("#middleContent_btnGetList").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim list As Object, i As Long
Set list = .document.querySelectorAll("#main_table [href*=doPostBack]")
For i = 0 To list.Length - 1
list.item(i).Click
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 3) '<== Delete me later. This is just to demo page changes
'do stuff with new page
.Navigate2 .document.URL '<== back to homepage
While .Busy Or .readyState < 4: DoEvents: Wend
Set list = .document.querySelectorAll("#main_table [href*=doPostBack]") 'reset list (often required in these scenarios)
Next
Stop '<== Delete me later
'.Quit '<== Remember to quit application
End With
End Sub
Same thing with executing the postBacks
Option Explicit
Public Sub VisitPages()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "http://healthapps.state.nj.us/facilities/acSetSearch.aspx?by=county"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#middleContent_cbType_5").Click
.querySelector("#middleContent_cbType_12").Click
.querySelector("#middleContent_btnGetList").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim list As Object, i As Long, col As Collection
Set col = New Collection
Set list = .document.querySelectorAll("#main_table [href*=doPostBack]")
For i = 0 To list.Length - 1
col.Add CStr(list.item(i))
Next
For i = 1 To col.Count
.document.parentWindow.execScript col.item(i)
While .Busy Or .readyState < 4: DoEvents: Wend
'Do stuff with page
.Navigate2 .document.URL
While .Busy Or .readyState < 4: DoEvents: Wend
Next
Stop '<== Delete me later
'.Quit '<== Remember to quit application
End With
End Sub