Package openid :: Module fetchers
[frames] | no frames]

Source Code for Module openid.fetchers

  1  # -*- test-case-name: openid.test.test_fetchers -*- 
  2  """ 
  3  This module contains the HTTP fetcher interface and several implementations. 
  4  """ 
  5   
  6  __all__ = ['fetch', 'getDefaultFetcher', 'setDefaultFetcher', 'HTTPResponse', 
  7             'HTTPFetcher', 'createHTTPFetcher', 'HTTPFetchingError', 'HTTPError'] 
  8   
  9  import urllib2 
 10  import time 
 11  import cStringIO 
 12  import sys 
 13   
 14  import openid 
 15  import openid.urinorm 
 16   
 17  # try to import pycurl, which will let us use CurlHTTPFetcher 
 18  try: 
 19      import pycurl 
 20  except ImportError: 
 21      pycurl = None 
 22   
 23  USER_AGENT = "python-openid/%s (%s)" % (openid.__version__, sys.platform) 
 24   
25 -def fetch(url, body=None, headers=None):
26 """Invoke the fetch method on the default fetcher. Most users 27 should need only this method. 28 29 @raises Exception: any exceptions that may be raised by the default fetcher 30 """ 31 fetcher = getDefaultFetcher() 32 return fetcher.fetch(url, body, headers)
33
34 -def createHTTPFetcher():
35 """Create a default HTTP fetcher instance 36 37 prefers Curl to urllib2.""" 38 if pycurl is None: 39 fetcher = Urllib2Fetcher() 40 else: 41 fetcher = CurlHTTPFetcher() 42 43 return fetcher
44 45 # Contains the currently set HTTP fetcher. If it is set to None, the 46 # library will call createHTTPFetcher() to set it. Do not access this 47 # variable outside of this module. 48 _default_fetcher = None 49
50 -def getDefaultFetcher():
51 """Return the default fetcher instance 52 if no fetcher has been set, it will create a default fetcher. 53 54 @return: the default fetcher 55 @rtype: HTTPFetcher 56 """ 57 global _default_fetcher 58 59 if _default_fetcher is None: 60 setDefaultFetcher(createHTTPFetcher()) 61 62 return _default_fetcher
63
64 -def setDefaultFetcher(fetcher, wrap_exceptions=True):
65 """Set the default fetcher 66 67 @param fetcher: The fetcher to use as the default HTTP fetcher 68 @type fetcher: HTTPFetcher 69 70 @param wrap_exceptions: Whether to wrap exceptions thrown by the 71 fetcher wil HTTPFetchingError so that they may be caught 72 easier. By default, exceptions will be wrapped. In general, 73 unwrapped fetchers are useful for debugging of fetching errors 74 or if your fetcher raises well-known exceptions that you would 75 like to catch. 76 @type wrap_exceptions: bool 77 """ 78 global _default_fetcher 79 if fetcher is None or not wrap_exceptions: 80 _default_fetcher = fetcher 81 else: 82 _default_fetcher = ExceptionWrappingFetcher(fetcher)
83
84 -def usingCurl():
85 """Whether the currently set HTTP fetcher is a Curl HTTP fetcher.""" 86 return isinstance(getDefaultFetcher(), CurlHTTPFetcher)
87
88 -class HTTPResponse(object):
89 """XXX document attributes""" 90 headers = None 91 status = None 92 body = None 93 final_url = None 94
95 - def __init__(self, final_url=None, status=None, headers=None, body=None):
96 self.final_url = final_url 97 self.status = status 98 self.headers = headers 99 self.body = body
100
101 - def __repr__(self):
102 return "<%s status %s for %s>" % (self.__class__.__name__, 103 self.status, 104 self.final_url)
105
106 -class HTTPFetcher(object):
107 """ 108 This class is the interface for openid HTTP fetchers. This 109 interface is only important if you need to write a new fetcher for 110 some reason. 111 """ 112
113 - def fetch(self, url, body=None, headers=None):
114 """ 115 This performs an HTTP POST or GET, following redirects along 116 the way. If a body is specified, then the request will be a 117 POST. Otherwise, it will be a GET. 118 119 120 @param headers: HTTP headers to include with the request 121 @type headers: {str:str} 122 123 @return: An object representing the server's HTTP response. If 124 there are network or protocol errors, an exception will be 125 raised. HTTP error responses, like 404 or 500, do not 126 cause exceptions. 127 128 @rtype: L{HTTPResponse} 129 130 @raise Exception: Different implementations will raise 131 different errors based on the underlying HTTP library. 132 """ 133 raise NotImplementedError
134
135 -def _allowedURL(url):
136 return url.startswith('http://') or url.startswith('https://')
137
138 -class HTTPFetchingError(Exception):
139 """Exception that is wrapped around all exceptions that are raised 140 by the underlying fetcher when using the ExceptionWrappingFetcher 141 142 @ivar why: The exception that caused this exception 143 """
144 - def __init__(self, why=None):
145 Exception.__init__(self, why) 146 self.why = why
147
148 -class ExceptionWrappingFetcher(HTTPFetcher):
149 """Fetcher that wraps another fetcher, causing all exceptions 150 151 @cvar uncaught_exceptions: Exceptions that should be exposed to the 152 user if they are raised by the fetch call 153 """ 154 155 uncaught_exceptions = (SystemExit, KeyboardInterrupt, MemoryError) 156
157 - def __init__(self, fetcher):
158 self.fetcher = fetcher
159
160 - def fetch(self, *args, **kwargs):
161 try: 162 return self.fetcher.fetch(*args, **kwargs) 163 except self.uncaught_exceptions: 164 raise 165 except: 166 exc_cls, exc_inst = sys.exc_info()[:2] 167 if exc_inst is None: 168 # string exceptions 169 exc_inst = exc_cls 170 171 raise HTTPFetchingError(why=exc_inst)
172
173 -class Urllib2Fetcher(HTTPFetcher):
174 """An C{L{HTTPFetcher}} that uses urllib2. 175 """
176 - def fetch(self, url, body=None, headers=None):
177 if not _allowedURL(url): 178 raise ValueError('Bad URL scheme: %r' % (url,)) 179 180 if headers is None: 181 headers = {} 182 183 headers.setdefault( 184 'User-Agent', 185 "%s Python-urllib/%s" % (USER_AGENT, urllib2.__version__,)) 186 187 req = urllib2.Request(url, data=body, headers=headers) 188 try: 189 f = urllib2.urlopen(req) 190 try: 191 return self._makeResponse(f) 192 finally: 193 f.close() 194 except urllib2.HTTPError, why: 195 try: 196 return self._makeResponse(why) 197 finally: 198 why.close()
199
200 - def _makeResponse(self, urllib2_response):
201 resp = HTTPResponse() 202 resp.body = urllib2_response.read() 203 resp.final_url = urllib2_response.geturl() 204 resp.headers = dict(urllib2_response.info().items()) 205 206 if hasattr(urllib2_response, 'code'): 207 resp.status = urllib2_response.code 208 else: 209 resp.status = 200 210 211 return resp
212
213 -class HTTPError(HTTPFetchingError):
214 """ 215 This exception is raised by the C{L{CurlHTTPFetcher}} when it 216 encounters an exceptional situation fetching a URL. 217 """ 218 pass
219 220 # XXX: define what we mean by paranoid, and make sure it is.
221 -class CurlHTTPFetcher(HTTPFetcher):
222 """ 223 An C{L{HTTPFetcher}} that uses pycurl for fetching. 224 See U{http://pycurl.sourceforge.net/}. 225 """ 226 ALLOWED_TIME = 20 # seconds 227
228 - def __init__(self):
229 HTTPFetcher.__init__(self) 230 if pycurl is None: 231 raise RuntimeError('Cannot find pycurl library')
232
233 - def _parseHeaders(self, header_file):
234 header_file.seek(0) 235 236 # Remove the status line from the beginning of the input 237 unused_http_status_line = header_file.readline() 238 lines = [line.strip() for line in header_file] 239 240 # and the blank line from the end 241 empty_line = lines.pop() 242 if empty_line: 243 raise HTTPError("No blank line at end of headers: %r" % (line,)) 244 245 headers = {} 246 for line in lines: 247 try: 248 name, value = line.split(':', 1) 249 except ValueError: 250 raise HTTPError( 251 "Malformed HTTP header line in response: %r" % (line,)) 252 253 value = value.strip() 254 255 # HTTP headers are case-insensitive 256 name = name.lower() 257 headers[name] = value 258 259 return headers
260
261 - def _checkURL(self, url):
262 # XXX: document that this can be overridden to match desired policy 263 # XXX: make sure url is well-formed and routeable 264 return _allowedURL(url)
265
266 - def fetch(self, url, body=None, headers=None):
267 stop = int(time.time()) + self.ALLOWED_TIME 268 off = self.ALLOWED_TIME 269 270 if headers is None: 271 headers = {} 272 273 headers.setdefault('User-Agent', 274 "%s %s" % (USER_AGENT, pycurl.version,)) 275 276 header_list = [] 277 if headers is not None: 278 for header_name, header_value in headers.iteritems(): 279 header_list.append('%s: %s' % (header_name, header_value)) 280 281 c = pycurl.Curl() 282 try: 283 c.setopt(pycurl.NOSIGNAL, 1) 284 285 if header_list: 286 c.setopt(pycurl.HTTPHEADER, header_list) 287 288 # Presence of a body indicates that we should do a POST 289 if body is not None: 290 c.setopt(pycurl.POST, 1) 291 c.setopt(pycurl.POSTFIELDS, body) 292 293 while off > 0: 294 if not self._checkURL(url): 295 raise HTTPError("Fetching URL not allowed: %r" % (url,)) 296 297 data = cStringIO.StringIO() 298 response_header_data = cStringIO.StringIO() 299 c.setopt(pycurl.WRITEFUNCTION, data.write) 300 c.setopt(pycurl.HEADERFUNCTION, response_header_data.write) 301 c.setopt(pycurl.TIMEOUT, off) 302 c.setopt(pycurl.URL, openid.urinorm.urinorm(url)) 303 304 c.perform() 305 306 response_headers = self._parseHeaders(response_header_data) 307 code = c.getinfo(pycurl.RESPONSE_CODE) 308 if code in [301, 302, 303, 307]: 309 url = response_headers.get('location') 310 if url is None: 311 raise HTTPError( 312 'Redirect (%s) returned without a location' % code) 313 314 # Redirects are always GETs 315 c.setopt(pycurl.POST, 0) 316 317 # There is no way to reset POSTFIELDS to empty and 318 # reuse the connection, but we only use it once. 319 else: 320 resp = HTTPResponse() 321 resp.headers = response_headers 322 resp.status = code 323 resp.final_url = url 324 resp.body = data.getvalue() 325 return resp 326 327 off = stop - int(time.time()) 328 329 raise HTTPError("Timed out fetching: %r" % (url,)) 330 finally: 331 c.close()
332