Python URLRetrieve Limit Rate and Resume Partial D

2020-03-26 13:38发布

问题:

I'm using the code from this thread to limit my download rate.

How do I incorporate partial downloads resuming with the rate limiting code? The examples I've found use urlopen instead of urlretrieve, and the RateLimit class depends on urlretrieve.

I'd like to have an external function that controls the partial downloading, without having to change the RateLimit class:

from throttle import TokenBucket, RateLimit

def retrieve_limit_rate(url, filename, rate_limit):
    """Fetch the contents of urls"""
    bucket = TokenBucket(10*rate_limit, rate_limit)

    print "rate limit = %.1f kB/s" % (rate_limit,)

    print 'Downloading %s...' % filename
    rate_limiter = RateLimit(bucket, filename)
    #
    # What do I put here to allow resuming files?
    #
    return urllib.urlretrieve(url, filename, rate_limiter)

回答1:

May be able to use PyCurl instead:

def curl_progress(total, existing, upload_t, upload_d):
    try:
        frac = float(existing)/float(total)
    except:
        frac = 0
    print "Downloaded %d/%d (%0.2f%%)" % (existing, total, frac)

def curl_limit_rate(url, filename, rate_limit):
    """Rate limit in bytes"""
    import pycurl
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.MAX_RECV_SPEED_LARGE, rate_limit)
    if os.path.exists(filename):
        file_id = open(filename, "ab")
        c.setopt(c.RESUME_FROM, os.path.getsize(filename))
    else:
        file_id = open(filename, "wb")

    c.setopt(c.WRITEDATA, file_id)
    c.setopt(c.NOPROGRESS, 0)
    c.setopt(c.PROGRESSFUNCTION, curl_progress)
    c.perform()