Python download zip files from a public FTP server

2019-09-10 07:35发布

问题:

I need to download several (Digital Earth Model) zip files in a folder "C:\DEMDownload" on my PC (windows OS) from the public geodata base of Canada Government.

when i run my code at the line ftp.retrbinary('RETR %s' %file, open(local_file, 'wb').write) i get the following error message

Traceback (most recent call last):
  File "<input>", line 1, in <module>
  File "C:\Python27\lib\ftplib.py", line 414, in retrbinary
    conn = self.transfercmd(cmd, rest)
  File "C:\Python27\lib\ftplib.py", line 376, in transfercmd
    return self.ntransfercmd(cmd, rest)[0]
  File "C:\Python27\lib\ftplib.py", line 339, in ntransfercmd
    resp = self.sendcmd(cmd)
  File "C:\Python27\lib\ftplib.py", line 249, in sendcmd
    return self.getresp()
  File "C:\Python27\lib\ftplib.py", line 224, in getresp
    raise error_perm, resp
error_perm: 550 Failed to open file.

Second. Is It possible to avoid to write available_days list and create a list of all zip files to download

import os, ftplib
destdir='C:\DEMDownload'
ftp = ftplib.FTP('ftp2.cits.rncan.gc.ca')
ftp.login('anonymous', '')
available_days= ['001k11.zip',
    '001k12.zip',
    '001k13.zip',
    '001k14.zip',
    '001k15.zip',
    '001l13.zip',
    '001l14.zip',
    '001l16.zip',
    '001m01.zip',
    '001m02.zip',
    '001m03.zip',
    '001m04.zip',
    '001m05.zip',
    '001m06.zip',
    '001m07.zip',
    '001m08.zip',
    '001m09.zip',
    '001m10.zip',
    '001m11.zip',
    '001m12.zip',
    '001m13.zip',
    '001m14.zip',
    '001m15.zip',
    '001m16.zip',
    '001n02.zip',
    '001n03.zip',
    '001n04.zip',
    '001n05.zip',
    '001n06.zip',
    '001n07.zip',
    '001n10.zip',
    '001n11.zip',
    '001n12.zip',
    '001n13.zip',
    '001n14.zip',
    '001n15.zip']

  hdfs = list()
  for day in available_days :
      file = available_days[available_days.index(day)]
      print 'file=', file
      local_file = os.path.join(destdir, file)
      ftp.retrbinary('RETR %s' %file, open(local_file, 'wb').write)
      hdfs.append(os.path.abspath(local_file))
      ftp.cwd('..')
  ftp.quit()

回答1:

I was able to successfully download the zip files with your given url with this:

# connect to ftp
url = urlparse.urlparse("http://ftp2.cits.rncan.gc.ca/pub/geobase/official/cded/50k_dem/")
ftp = ftplib.FTP(url.netloc)
ftp.login()
ftp.cwd(ftp_dirname)

with open(filename, 'w') as fobj:
    ftp.retrbinary('RETR %s' % basename, fobj.write)

You can avoid the hardcoded dir/filenames by walking through the ftp directories similar to how you might walk through a local directory with some creative usage of ftplib.FTP.dir()

Full code below:

url = 'http://ftp2.cits.rncan.gc.ca/pub/geobase/official/cded/50k_dem/'
url = urlparse.urlparse(url)

local_root = os.path.expanduser("~/ftp_download") # change this to wherever you want to download to

def download(ftp, ftp_path, filename, check_cwd=True):
    """
    Using the given ftp connection, download from ftp_path to 
    filename. 

    If check_cwd is False, assume the ftp connection is already 
    in the correct current working directory (cwd)
    """
    basename = posixpath.basename(ftp_path)
    dirname = os.path.dirname(filename)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    if check_cwd:
        ftp_dirname = posixpath.dirname(ftp_path)
        if ftp_dirname != ftp.pwd():
            ftp.cwd(ftp_dirname)

    with open(filename, 'w') as fobj:
        ftp.retrbinary('RETR %s' % basename, fobj.write)

def ftp_dir(ftp):
    """
    Given a valid ftp connection, get a list of 2-tuples of the
    files in the ftp current working directory, where the first
    element is whether the file is a directory and the second 
    element is the filename.
    """
    # use a callback to grab the ftp.dir() output in a list
    dir_listing = []
    ftp.dir(lambda x: dir_listing.append(x))
    return [(line[0].upper() == 'D', line.rsplit()[-1]) for line in dir_listing]

# connect to ftp
ftp = ftplib.FTP(url.netloc)
ftp.login()

# recursively walk through the directory and download each file, depth first
stack = [url.path]
while stack:
    path = stack.pop()
    ftp.cwd(path)

    # add all directories to the queue
    children = ftp_dir(ftp)
    dirs = [posixpath.join(path, child[1]) for child in children if child[0]]
    files = [posixpath.join(path, child[1]) for child in children if not child[0]] 
    stack.extend(dirs[::-1]) # add dirs reversed so they are popped out in order

    # download all files in the directory
    for filepath in files:
        download(ftp, filepath, os.path.join(local_root, filepath.split(url.path,1)[-1]), 
                                             check_cwd=False)

# logout
ftp.quit()

You can condense this further through usage of one of the python ftp wrapper libraries such as ftptool or ftputil