Invalid url's throw an exception - python

2019-08-28 21:18发布

问题:

import httplib
import urlparse

def getUrl(url):
   try:
     parts = urlparse.urlsplit(url)
     server = parts[1]
     path = parts[2]
     obj = httplib.HTTPConnection(server,80)
     obj.connect()
     obj.putrequest('HEAD',path)
     obj.putheader('Accept','*/*')
     obj.endheaders()
     response = obj.getresponse()
     contentType = response.getheader("content-type", "unknown")
     obj.close()
     if response.status !=200:
       print 'Error'
     else:
       print 'Awesome'
   except Exception, e:
     print e

I wrote the code above to check if a given URL is valid or not. But somehow when I test it, for every invalid url it throws an exception.

>>> getUrl('http://www.idlebrfffain.com')
[Errno -2] Name or service not known

Python version:

chaitu@ubuntu:~$ python -V
Python 2.6.4

Can anyone help me find out where exactly is the mistake?

回答1:

You have to catch socket.error:

import httplib, socket
import urlparse

def getUrl(url):
    parts = urlparse.urlsplit(url)
    server = parts[1]
    path = parts[2]
    obj = httplib.HTTPConnection(server,80)

    try:
        obj.connect()
    except socket.gaierror:
        print "Host %s does not exist" % server
        return
    except socket.error:
        print "Cannot connect to %s:%s." % (server, 80)
        return

    obj.putrequest('HEAD',path)
    obj.putheader('Accept','*/*')
    obj.endheaders()
    response = obj.getresponse()
    contentType = response.getheader("content-type", "unknown")
    obj.close()
    if response.status !=200:
        print 'Error'
    else:
        print 'Awesome'


getUrl('http://www.idlebrfffain.com') # not a registered domain
getUrl('http://8.8.8.8') # not a http server

Only try: except: around specific lines and only if you know what happens. Python will show you tracebacks for uncaught exceptions, so you can find out where the problem is with ease.



回答2:

That's supposed to happen. An exception is being thrown because the URL cannot be resolved. This is thrown prior to your if response.status != 200 line which turns control over to your except block.

You need to spend some time looking into how Exceptions work. Here's an example of something you could try.

def getUrl(url):
    status = None
    try:
        # do your normal stuff...
        status = response.status
    except Exception, e:
        # do whatever you want here...
        pass
    finally:
        if status != 200:
            print "Error"
        else:
            print "Awesome"


回答3:

#The following code validates a url. This is a 2 step process, to do that. First I validate the domain and next the path attached to the domain.
from urlparse import urlparse
import urllib2
import socket
class ValidateURL:
    def __init__(self, url):
        self._url = url

    def startActivity(self):
        self._parts = urlparse(self._url)
        a = self._checkDomain(self._parts[1])
        if a:
            b = self._checkUrl(self._url)
            if b == 1:
                print self._url,' is valid'
            else:
                print 'The path ',self._parts[2],' is not valid'
        else:
            print self._parts[1],' domain does not exist'

    #Checks whether the domain is right or not
    def _checkDomain(self,domain):
        x = 1
        try:
            socket.gethostbyname_ex(domain)
        except socket.gaierror:
            x = 0
        except socket.error:
            x = 0
        finally:
            return x

    #Checks whether the path is right or not
    def _checkUrl(self,url):
        x = 1
        self._req = urllib2.Request(url)
        try: 
            urllib2.urlopen(self._req)
        except urllib2.URLError, e:
            #print e
            x = 0
        finally:
            return x

if __name__ == "__main__":
    valid = ValidateURL('http://stackoverflow.com/questions/invalid-urls-throw-an-exception-python')
    valid.startActivity()
    valid1 = ValidateURL('http://stackoverflow.com/questions/6414417/invalid-urls-throw-an-exception-python')
    valid1.startActivity()

Hope the solution I derived is sensible.