I'm using urllib2 to make a resuming downloader, roughly based on this method. I can end the program and re-start it, and it starts downloading where it left off, downloading the file that ends up the same size as if it were downloaded all at once.
However, I have tested it when disabling and reenabling network, and it doesn't download correctly. The file size ends up longer than the file should be, and the file doesn't work correctly. Is there something I missed, or could this be a urllib2 bug?
import urllib2
opener = urllib2.build_opener();
self.count = 0 # Counts downloaded size.
self.downloading = True
while (not(self.success) and self.downloading):
try:
self.Err = ""
self._netfile = self.opener.open(self.url)
self.filesize = float(self._netfile.info()['Content-Length'])
if (os.path.exists(self.localfile) and os.path.isfile(self.localfile)):
self.count = os.path.getsize(self.localfile)
print self.count,"of",self.filesize,"downloaded."
if self.count >= self.filesize:
#already downloaded
self.downloading = False
self.success = True
self._netfile.close()
return
if (os.path.exists(self.localfile) and os.path.isfile(self.localfile)):
#File already exists, start where it left off:
#This seems to corrupt the file sometimes?
self._netfile.close()
req = urllib2.Request(self.url)
print "file downloading at byte: ",self.count
req.add_header("Range","bytes=%s-" % (self.count))
self._netfile = self.opener.open(req)
if (self.downloading): #Don't do it if cancelled, downloading=false.
next = self._netfile.read(1024)
self._outfile = open(self.localfile,"ab") #to append binary
self._outfile.write(next)
self.readsize = desc(self.filesize) # get size mb/kb
self.count += 1024
while (len(next)>0 and self.downloading):
next = self._netfile.read(1024)
self._outfile.write(next)
self.count += len(next)
self.success = True
except IOError, e:
print e
self.Err=("Download error, retrying in a few seconds: "+str(e))
try:
self._netfile.close()
except Exception:
pass
time.sleep(8) #Then repeat