开发者

Invalid url's throw an exception - python

开发者 https://www.devze.com 2023-03-14 16:12 出处:网络
import httplib import urlparse def getUrl(url): try: parts = urlparse.urlsplit(url) server = parts[1] path = parts[2]
import httplib
import urlparse

def getUrl(url):
   try:
     parts = urlparse.urlsplit(url)
     server = parts[1]
     path = parts[2]
     obj = httplib.HTTPConnection(server,80)
     obj.connect()
     obj.putrequest('HEAD',path)
     obj.putheader('Accept','*/*')
     obj.endheaders()
     response = obj.getresponse()
     contentType = response.getheader("content-type", "unknown")
     obj.close()
     if response.status !=200开发者_如何学Go:
       print 'Error'
     else:
       print 'Awesome'
   except Exception, e:
     print e

I wrote the code above to check if a given URL is valid or not. But somehow when I test it, for every invalid url it throws an exception.

>>> getUrl('http://www.idlebrfffain.com')
[Errno -2] Name or service not known

Python version:

chaitu@ubuntu:~$ python -V
Python 2.6.4

Can anyone help me find out where exactly is the mistake?


You have to catch socket.error:

import httplib, socket
import urlparse

def getUrl(url):
    parts = urlparse.urlsplit(url)
    server = parts[1]
    path = parts[2]
    obj = httplib.HTTPConnection(server,80)

    try:
        obj.connect()
    except socket.gaierror:
        print "Host %s does not exist" % server
        return
    except socket.error:
        print "Cannot connect to %s:%s." % (server, 80)
        return

    obj.putrequest('HEAD',path)
    obj.putheader('Accept','*/*')
    obj.endheaders()
    response = obj.getresponse()
    contentType = response.getheader("content-type", "unknown")
    obj.close()
    if response.status !=200:
        print 'Error'
    else:
        print 'Awesome'


getUrl('http://www.idlebrfffain.com') # not a registered domain
getUrl('http://8.8.8.8') # not a http server

Only try: except: around specific lines and only if you know what happens. Python will show you tracebacks for uncaught exceptions, so you can find out where the problem is with ease.


That's supposed to happen. An exception is being thrown because the URL cannot be resolved. This is thrown prior to your if response.status != 200 line which turns control over to your except block.

You need to spend some time looking into how Exceptions work. Here's an example of something you could try.

def getUrl(url):
    status = None
    try:
        # do your normal stuff...
        status = response.status
    except Exception, e:
        # do whatever you want here...
        pass
    finally:
        if status != 200:
            print "Error"
        else:
            print "Awesome"


#The following code validates a url. This is a 2 step process, to do that. First I validate the domain and next the path attached to the domain.
from urlparse import urlparse
import urllib2
import socket
class ValidateURL:
    def __init__(self, url):
        self._url = url

    def startActivity(self):
        self._parts = urlparse(self._url)
        a = self._checkDomain(self._parts[1])
        if a:
            b = self._checkUrl(self._url)
            if b == 1:
                print self._url,' is valid'
            else:
                print 'The path ',self._parts[2],' is not valid'
        else:
            print self._parts[1],' domain does not exist'

    #Checks whether the domain is right or not
    def _checkDomain(self,domain):
        x = 1
        try:
            socket.gethostbyname_ex(domain)
        except socket.gaierror:
            x = 0
        except socket.error:
            x = 0
        finally:
            return x

    #Checks whether the path is right or not
    def _checkUrl(self,url):
        x = 1
        self._req = urllib2.Request(url)
        try: 
            urllib2.urlopen(self._req)
        except urllib2.URLError, e:
            #print e
            x = 0
        finally:
            return x

if __name__ == "__main__":
    valid = ValidateURL('http://stackoverflow.com/questions/invalid-urls-throw-an-exception-python')
    valid.startActivity()
    valid1 = ValidateURL('http://stackoverflow.com/questions/6414417/invalid-urls-throw-an-exception-python')
    valid1.startActivity()

Hope the solution I derived is sensible.

0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号