VBA Macro to download multiple files from links in

2019-02-20 22:18发布

问题:

I want to download multiple files from a list of links. The website where I find the links is protected. This is why I want to use IE (using the current session/cookie). The target of each link is a xml file. The files are too large to open and then save. So I need to save them directly (right-click, save target as).

The list of links looks like this:

<html>
<body>
<p> <a href="https://example.com/report?_hhhh=XML"Link A</a><br>> </p>
<p> <a href="https://example.com/report?_aaaa=XML"Link B</a><br>> </p>
...
</body>
</html>

I want to loop through all links and save each target. Currently I have problems with the "Save As". I don't really know how to do it. This is my code so far:

Sub DownloadAllLinks()

Dim IE As Object
Dim Document As Object
Dim List As Object
Dim Link As Object

' Before I logged in to the website
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.Navigate ("https:\\......\links.html")

Do While IE.Busy
  DoEvents
Loop

' Detect all links on website
Set Document = IE.Document
Set List = Document.getElementsByTagName("a")

' Loop through all links to download them

For Each Link In List

' Now I need to automate "save target as" / right-click and then "save as"
...

Next Link
End Sub

Do you have any ideas to automate "Save As" for each link?

Any help is appreciated. Many thanks, Uli

回答1:

Below is a quite common example I adapted for your case, it shows the usage of XHR and RegEx to retrieve webpage HTML content, extract all links from it, and download each link's target file:

Option Explicit

Sub Test()
    ' declare vars
    Dim sUrl As String
    Dim sReqProt As String
    Dim sReqAddr As String
    Dim sReqPath As String
    Dim sContent As String
    Dim oLinks As Object
    Dim oMatch As Object
    Dim sHref As String
    Dim sHrefProt As String
    Dim sHrefAddr As String
    Dim sHrefPath As String
    Dim sHrefFull As String
    Dim n As Long
    Dim aContent() As Byte
    ' set source URL
    sUrl = "https:\\......\links.html"
    ' process source URL
    SplitUrl sUrl, sReqProt, sReqAddr, sReqPath
    If sReqProt = "" Then sReqProt = "http:"
    sUrl = sReqProt & "//" & sReqAddr & "/" & sReqPath
    ' retrieve source page HTML content
    With CreateObject("Microsoft.XMLHTTP")
        .Open "GET", sUrl, False
        .Send
        sContent = .ResponseText
    End With
    ' parse source page HTML content to extract all links
    Set oLinks = CreateObject("Scripting.Dictionary")
    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .Pattern = "<a.*?href *= *(?:'|"")(.*?)(?:'|"").*?>"
        For Each oMatch In .Execute(sContent)
            sHref = oMatch.subMatches(0)
            SplitUrl sHref, sHrefProt, sHrefAddr, sHrefPath
            If sHrefProt = "" Then sHrefProt = sReqProt
            If sHrefAddr = "" Then sHrefAddr = sReqAddr
            sHrefFull = sHrefProt & "//" & sHrefAddr & "/" & sHrefPath
            oLinks(oLinks.Count) = sHrefFull
        Next
    End With
    ' save each link target into file
    For Each n In oLinks
        sHref = oLinks(n)
        With CreateObject("Microsoft.XMLHTTP")
            .Open "GET", sHref, False
            .Send
            aContent = .ResponseBody
        End With
        With CreateObject("ADODB.Stream")
            .Type = 1 ' adTypeBinary
            .Open
            .Write aContent
            .SaveToFile "C:\Test\" & n & ".xml", 2 ' adSaveCreateOverWrite
            .Close
        End With
    Next
End Sub

Sub SplitUrl(sUrl, sProt, sAddr, sPath)
    ' extract protocol, address and path from URL
    Dim aSplit
    aSplit = Split(sUrl, "//")
    If UBound(aSplit) = 0 Then
        sProt = ""
        sAddr = sUrl
    Else
        sProt = aSplit(0)
        sAddr = aSplit(1)
    End If
    aSplit = Split(sAddr, "/")
    If UBound(aSplit) = 0 Then
        sPath = sAddr
        sAddr = ""
    Else
        sPath = Mid(sAddr, Len(aSplit(0)) + 2)
        sAddr = aSplit(0)
    End If
End Sub

This method doesn't employ IE automation. Usually the IE's cookies which Microsoft.XMLHTTP processes are sufficient to refer to the current session, so if your website doesn't use additional procedures for authentication and generation the list of the links then the method should work for you.