import httplib
import urlparse
def getUrl(url):
try:
parts = urlparse.urlsplit(url)
server = parts[1]
path = parts[2]
obj = httplib.HTTPConnection(server,80)
obj.connect()
obj.putrequest('HEAD',path)
obj.putheader('Accept','*/*')
obj.endheaders()
response = obj.getresponse()
contentType = response.getheader("content-type", "unknown")
obj.close()
if response.status !=200:
print 'Error'
else:
print 'Awesome'
except Exception, e:
print e
I wrote the code above to check if a given URL is valid or not. But somehow when I test it, for every invalid url it throws an exception.
>>> getUrl('http://www.idlebrfffain.com')
[Errno -2] Name or service not known
Python version:
chaitu@ubuntu:~$ python -V
Python 2.6.4
Can anyone help me find out where exactly is the mistake?
You have to catch socket.error
:
import httplib, socket
import urlparse
def getUrl(url):
parts = urlparse.urlsplit(url)
server = parts[1]
path = parts[2]
obj = httplib.HTTPConnection(server,80)
try:
obj.connect()
except socket.gaierror:
print "Host %s does not exist" % server
return
except socket.error:
print "Cannot connect to %s:%s." % (server, 80)
return
obj.putrequest('HEAD',path)
obj.putheader('Accept','*/*')
obj.endheaders()
response = obj.getresponse()
contentType = response.getheader("content-type", "unknown")
obj.close()
if response.status !=200:
print 'Error'
else:
print 'Awesome'
getUrl('http://www.idlebrfffain.com') # not a registered domain
getUrl('http://8.8.8.8') # not a http server
Only try: except:
around specific lines and only if you know what happens. Python will show you tracebacks for uncaught exceptions, so you can find out where the problem is with ease.
That's supposed to happen. An exception is being thrown because the URL cannot be resolved. This is thrown prior to your if response.status != 200
line which turns control over to your except
block.
You need to spend some time looking into how Exceptions work. Here's an example of something you could try.
def getUrl(url):
status = None
try:
# do your normal stuff...
status = response.status
except Exception, e:
# do whatever you want here...
pass
finally:
if status != 200:
print "Error"
else:
print "Awesome"
#The following code validates a url. This is a 2 step process, to do that. First I validate the domain and next the path attached to the domain.
from urlparse import urlparse
import urllib2
import socket
class ValidateURL:
def __init__(self, url):
self._url = url
def startActivity(self):
self._parts = urlparse(self._url)
a = self._checkDomain(self._parts[1])
if a:
b = self._checkUrl(self._url)
if b == 1:
print self._url,' is valid'
else:
print 'The path ',self._parts[2],' is not valid'
else:
print self._parts[1],' domain does not exist'
#Checks whether the domain is right or not
def _checkDomain(self,domain):
x = 1
try:
socket.gethostbyname_ex(domain)
except socket.gaierror:
x = 0
except socket.error:
x = 0
finally:
return x
#Checks whether the path is right or not
def _checkUrl(self,url):
x = 1
self._req = urllib2.Request(url)
try:
urllib2.urlopen(self._req)
except urllib2.URLError, e:
#print e
x = 0
finally:
return x
if __name__ == "__main__":
valid = ValidateURL('http://stackoverflow.com/questions/invalid-urls-throw-an-exception-python')
valid.startActivity()
valid1 = ValidateURL('http://stackoverflow.com/questions/6414417/invalid-urls-throw-an-exception-python')
valid1.startActivity()
Hope the solution I derived is sensible.