I cant seem able to scrape data form a website tha

2019-08-20 11:02发布

问题:

I cant seem to find the ID when i inspect the source of the website "rofex.primary.ventures". All i want to do is grab all the data below the Ult column and put it into an excel worksheet. Ive used firefox because it shows the HTLM code in a nicer way but i would like to scrape it from chrome using an excel Macro. How would i do this?

Sub Rofex()

Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Navigate "https://rofex.primary.ventures"
.Visible = True
End With
Do While appIE.Busy
DoEvents
Loop

Set allRowOfData = appIE.document.getElementById("rx:DO:2019:01:a")
Dim myValue As String: myValue = allRowOfData.Cells(6).innerHTML

appIE.Quit
Set appIE = Nothing
Range("A1").Value = myValue
End Sub

This is what i have but get all types of errors, im new to coding, needless to say. Thank you!

回答1:

Use the available API. There is a csv format xmlhttp response that you can target to extract this info. Note that the results are in 1000s so, for example, DOEne19 is ult 37,960 and the output is 37.96.

Option Explicit

Public Sub GetInfo()
    Const URL As String = "https://rofex.primary.ventures/api/v1/platform/market/md"
    Dim lines() As String
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        lines = Split(.responseText, vbLf)
    End With
    Dim output(), i As Long, rowCounter As Long, arr() As String
    ReDim output(1 To UBound(lines), 1 To 2)
    For i = 1 To UBound(lines)
        If InStr(lines(i), "|") > 0 Then
            rowCounter = rowCounter + 1
            arr = Split(lines(i), "|")
            output(rowCounter, 1) = Replace$(arr(0), "m;", vbNullString)
            output(rowCounter, 2) = arr(6)
        End If
    Next
    output = Application.Transpose(output)
    ReDim Preserve output(1 To 2, 1 To rowCounter)
    output = Application.Transpose(output)

    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1) = "Ticker": .Cells(1, 2) = "1000s"
        .Cells(2, 1).Resize(UBound(output, 1), UBound(output, 2)) = output
    End With
End Sub

Otherwise, you can download as csv as then use loop column A and use split to extract the columns of interest. Download part shown below.

Public Sub DownloadFile()
    Dim http As Object
    Const filepath As String = "C:\Users\User\Desktop\TestDownload.csv"
    Set http = CreateObject("MSXML2.XMLHTTP")
    http.Open "GET", "https://rofex.primary.ventures/api/v1/platform/market/md", False
    http.send
    On Error GoTo errhand
    With CreateObject("ADODB.Stream")
        .Open
        .Type = 1
        .Write http.responseBody
        .SaveToFile filepath '<== specify your path here
        .Close
    End With
    Debug.Print "FileDownloaded"
    TidyFile filepath
    Exit Sub
errhand:
    If Err.Number <> 0 Then
        Debug.Print Err.Number, Err.Description
        MsgBox "Download failed"
    End If
End Sub

Public Sub TidyFile(ByVal filepath As String)
    Dim wb As Workbook, lines(), i As Long, output(), rowCounter As Long, arr() As String
    Set wb = Workbooks.Open(filepath)

    With wb.Sheets(1)
        lines = Application.Transpose(.Range("A2:A" & .Cells(.Rows.Count, "A").End(xlUp).Row).Value)

        ReDim output(1 To UBound(lines), 1 To 2)
        For i = LBound(lines) To UBound(lines)
            If InStr(lines(i), "|") > 0 Then
                rowCounter = rowCounter + 1
                arr = Split(lines(i), "|")
                output(rowCounter, 1) = Replace$(arr(0), "m;", vbNullString)
                output(rowCounter, 2) = arr(6)
            End If
        Next
        output = Application.Transpose(output)
        ReDim Preserve output(1 To 2, 1 To rowCounter)
        output = Application.Transpose(output)
        .Cells.ClearContents
        .Cells(1, 1) = "Ticker": .Cells(1, 2) = "1000s"
        .Cells(2, 1).Resize(UBound(output, 1), UBound(output, 2)) = output
    End With
    wb.Close SaveChanges:=True
End Sub