diff --git a/.gitignore b/.gitignore index 21b2f65e..cde75130 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.cache +web_cache .DS_Store *.pyc *.pyo @@ -10,4 +12,4 @@ include .Python docs/_build build/ -.tox \ No newline at end of file +.tox diff --git a/README.rst b/README.rst index 7063ebfb..098631b0 100644 --- a/README.rst +++ b/README.rst @@ -38,5 +38,5 @@ result in a simple dictionary. For more info, check out the docs_ .. _docs: http://cachecontrol.readthedocs.org/en/latest/ -.. _httplib2: https://github.com/jcgregorio/httplib2 +.. _httplib2: https://github.com/httplib2/httplib2 .. _requests: http://docs.python-requests.org/ diff --git a/cachecontrol/cache.py b/cachecontrol/cache.py index 7389a73f..73e1bf70 100644 --- a/cachecontrol/cache.py +++ b/cachecontrol/cache.py @@ -10,7 +10,7 @@ class BaseCache(object): def get(self, key): raise NotImplemented() - def set(self, key, value): + def set(self, key, value, expires=None): raise NotImplemented() def delete(self, key): @@ -29,7 +29,7 @@ def __init__(self, init_dict=None): def get(self, key): return self.data.get(key, None) - def set(self, key, value): + def set(self, key, value, expires=None): with self.lock: self.data.update({key: value}) diff --git a/cachecontrol/caches/file_cache.py b/cachecontrol/caches/file_cache.py index d4d9e21f..66df9368 100644 --- a/cachecontrol/caches/file_cache.py +++ b/cachecontrol/caches/file_cache.py @@ -101,7 +101,7 @@ def get(self, key): with open(name, 'rb') as fh: return fh.read() - def set(self, key, value): + def set(self, key, value, expires=None): name = self._fn(key) # Make sure the directory exists diff --git a/cachecontrol/caches/redis_cache.py b/cachecontrol/caches/redis_cache.py index a5a15aa8..ca4b7bb6 100644 --- a/cachecontrol/caches/redis_cache.py +++ b/cachecontrol/caches/redis_cache.py @@ -26,9 +26,24 @@ def set(self, key, value, expires=None): if not expires: self.conn.set(key, value) else: - expires = expires - datetime.utcnow() - self.conn.setex(key, total_seconds(expires), value) - + # the keyword arguments are to account for a Redis v StrictRedis issue + # with pyredis being a mess. this is compatible with both. + + redis_class = self.conn.__class__ + if redis_class == 'redis.client.StrictRedis': + # StrictRedis + self.conn.setex(key, expires, value) + elif redis_class == 'redis.client.Redis': + # Redis + self.conn.setex(key, value, expires) + else: + # unknown redis client type. give it a shot. + + try: + self.conn.setex(key, expires, value) + except Exception as e: + # complete failure. give up and don't set a date. + self.conn.set(key, value) def delete(self, key): self.conn.delete(key) diff --git a/cachecontrol/controller.py b/cachecontrol/controller.py index 8436b955..31c5b0f0 100644 --- a/cachecontrol/controller.py +++ b/cachecontrol/controller.py @@ -164,9 +164,14 @@ def cached_request(self, request): return False now = time.time() - date = calendar.timegm( - parsedate_tz(headers['date']) - ) + + if 'date' in headers: + date = calendar.timegm( + parsedate_tz(headers['date']) + ) + else: + date = 0 + current_age = max(0, now - date) logger.debug('Current age based on date: %i', current_age) @@ -242,6 +247,7 @@ def conditional_headers(self, request): def cache_response(self, request, response, body=None, status_codes=None): + """ Algorithm for caching requests. @@ -260,6 +266,16 @@ def cache_response(self, request, response, body=None, response_headers = CaseInsensitiveDict(response.headers) + # what time is it now and from the header + + now = time.time() + if 'date' in response_headers: + date = calendar.timegm( + parsedate_tz(response_headers['date']) + ) + else: + date = 0 + # If we've been given a body, our response has a Content-Length, that # Content-Length is valid then we can check to see if the body we've # been given matches the expected size, and if it doesn't we'll just @@ -291,9 +307,22 @@ def cache_response(self, request, response, body=None, # If we've been given an etag, then keep the response if self.cache_etags and 'etag' in response_headers: logger.debug('Caching due to etag') + + if response_headers.get('expires'): + expires = parsedate_tz(response_headers['expires']) + if expires is not None: + expire_time = calendar.timegm(expires) - date + else: + expire_time = 0 + + expire_time = max(expire_time, 14*86400) + + logger.debug('etag object cached for {0} seconds'.format(expire_time)) + self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), + expires=expire_time ) # Add to the cache any 301s. We do this before looking that @@ -315,16 +344,25 @@ def cache_response(self, request, response, body=None, self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), + expires=cc['max-age'], ) # If the request can expire, it means we should cache it # in the meantime. elif 'expires' in response_headers: if response_headers['expires']: - logger.debug('Caching b/c of expires header') + + expires = parsedate_tz(response_headers['expires']) + if expires is not None: + expire_time = calendar.timegm(expires) - date + else: + expire_time = None + + logger.debug('Caching b/c of expires header. expires in {0} seconds'.format(expire_time)) self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), + expires=expire_time, ) def update_cached_response(self, request, response): diff --git a/tests/test_cache_control.py b/tests/test_cache_control.py index 77b8f90d..d62b815d 100644 --- a/tests/test_cache_control.py +++ b/tests/test_cache_control.py @@ -91,7 +91,7 @@ def test_cache_response_cache_max_age(self, cc): req = self.req() cc.cache_response(req, resp) cc.serializer.dumps.assert_called_with(req, resp, body=None) - cc.cache.set.assert_called_with(self.url, ANY) + cc.cache.set.assert_called_with(self.url, ANY, expires=3600) def test_cache_response_cache_max_age_with_invalid_value_not_cached(self, cc): now = time.strftime(TIME_FMT, time.gmtime())