Modifying the program for parsing

2019-08-19 09:08发布

There is a program that parse a certain table from the site . Works great . I want to parse another table from the site . By the tag number “table” they are the same . I am trying to use the same program , but it gives an error : Run-time error 91 in the line :

     If oRow.Cells(y).Children.Length > 0 Then

New table : http://allscores.ru/soccer/fstats.php?champ=2604&team=439&team2=420&tour=110

Old table : http://allscores.ru/soccer/new_ftour.php?champ=2604&f_team=439

New table : in the attached picture

Function extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
    Dim oDom As Object, oTable As Object, oRow As Object
    Dim iRows As Integer, iCols As Integer
    Dim x As Integer, y As Integer
    Dim data()
    Dim vata()
    Dim tata()
    Dim oHttp As Object
    Dim oRegEx As Object
    Dim sResponse As String
    Dim oRange As Range
    Dim odRange As Range

   ' get page
    Set oHttp = CreateObject("MSXML2.XMLHTTP")
    oHttp.Open "GET", Ssilka, False
    oHttp.Send

    ' cleanup response
    sResponse = StrConv(oHttp.responseBody, vbUnicode)
    Set oHttp = Nothing

    sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))

    Set oRegEx = CreateObject("vbscript.regexp")
    With oRegEx
        .MultiLine = True
        .Global = True
        .IgnoreCase = False
        .Pattern = "<(script|SCRIPT)[\w\W]+?</\1>"
        sResponse = .Replace(sResponse, "")
    End With
    Set oRegEx = Nothing

    ' create Document from response
    Set oDom = CreateObject("htmlFile")
    oDom.Write sResponse
    DoEvents

    ' table with results, indexes starts with zero
    Set oTable = oDom.getelementsbytagname("table")(3)

    DoEvents

    iRows = oTable.Rows.Length
    iCols = oTable.Rows(1).Cells.Length

    ' first row and first column contain no intresting data
    ReDim data(1 To iRows - 1, 1 To iCols - 1)
    ReDim vata(1 To iRows - 1, 1 To iCols - 1)
    ReDim tata(1 To iRows - 1, 1 To iCols - 1)
    ' fill in data array
    For x = 1 To iRows - 1
        Set oRow = oTable.Rows(x)
        For y = 1 To iCols - 1
            If oRow.Cells(y).Children.Length > 0 Then
                data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).getattribute("href")
                data(x, y) = Replace(data(x, y), "about:", "http://allscores.ru/soccer/")
                vata(x, y) = oRow.Cells(y).innerText
            End If
        Next y
    Next x

    Set oRow = Nothing
    Set oTable = Nothing
    Set oDom = Nothing

    Set oRange = book1.ActiveSheet.Cells(110, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
    oRange.NumberFormat = "@"
    oRange.Value = data

    Set odRange = book1.ActiveSheet.Cells(34, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
    odRange.NumberFormat = "@"
    odRange.Value = vata

    Set oRange = Nothing
    Set odRange = Nothing

End Function

New Table

1条回答
放我归山
2楼-- · 2019-08-19 09:20

This is not particularly robust but does grab the values from the table. iLoop is not used.

Option Explicit
Public Sub test()    
    extractTable "http://allscores.ru/soccer/fstats.php?champ=2604&team=439&team2=420&tour=110", ThisWorkbook, 1    
End Sub

Public Sub extractTable(Ssilka As String, book1 As Workbook)
    Dim oDom As Object, oTable As Object
    Dim oHttp As Object
    Dim oRegEx As Object
    Dim sResponse As String
    Set oHttp = CreateObject("MSXML2.XMLHTTP")
    oHttp.Open "GET", Ssilka, False
    oHttp.send
    sResponse = StrConv(oHttp.responseBody, vbUnicode)
    Set oHttp = Nothing

    sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))

    Set oRegEx = CreateObject("vbscript.regexp")
    With oRegEx
        .MultiLine = True
        .Global = True
        .IgnoreCase = False
        .Pattern = "<(script|SCRIPT)[\w\W]+?</\1>"
        sResponse = .Replace(sResponse, "")
    End With
    Set oRegEx = Nothing
    Set oDom = CreateObject("htmlFile")
    oDom.Write sResponse

    Set oTable = oDom.getElementsByTagName("table")(3)

    Dim b As Object, a As Object
    Set b = oTable.getElementsByTagName("TR")    'DispHTMLElementCollection

    Dim i As Long, y As Long
    With ActiveSheet
        For i = 3 To 17 '17-3 gives the 15 rows of interest. Start at 3 to avoid header and empty row.
            Set a = b(i).ChildNodes
            For y = 1 To a.Length - 1
                .Cells(i - 2, y) = a(y).innerText
            Next y
        Next i
    End With
End Sub
查看更多
登录 后发表回答