#---------- check_url.py ----------#
from httplib import HTTP
from urlparse import urlparse
def checkURL(url):
p = urlparse(url)
h = HTTP(p[1])
h.putrequest('HEAD', p[2])
h.endheaders()
return h.getreply()
if __name__ == '__main__':
for url in ('http://msnbc.com/nonsense','http://msnbc.com/',
'http://w3c.org/','http://w3c.org/nonsense',
'http://w3c.org/Consortium/','http://ibm.com/',
'http://ibm.com/nonsense'):
print url, checkURL(url)[:2]
------------------------------------------------------------------------
% python check_url.py
http://msnbc.com/nonsense (200, 'OK')
http://msnbc.com/ (302, 'Object moved')
http://w3c.org/ (301, 'Moved Permanently')
http://w3c.org/nonsense (301, 'Moved Permanently')
http://w3c.org/Consortium/ (301, 'Moved Permanently')
http://ibm.com/ (200, 'OK')
http://ibm.com/nonsense (404, 'Not Found')
虽然还有点小问题,不是100%准确。不过对于大多数情况是没有问题的。
0 comments:
Post a Comment