diff --git a/pybikes/contrib.py b/pybikes/contrib.py index 2a83dc7ec..6df776ab1 100644 --- a/pybikes/contrib.py +++ b/pybikes/contrib.py @@ -34,9 +34,14 @@ def __getitem__(self, key): raise KeyError('%s' % key) if key not in self.store: raise KeyError('%s' % key) + ts_value = self.store[key] - if time.time() - ts_value['ts'] > self.delta: + the_time = time.time() + delta = ts_value.get('delta', self.delta) + + if the_time - ts_value['ts'] > delta: raise KeyError('%s' % key) + return ts_value['value'] def __contains__(self, key): @@ -58,3 +63,11 @@ def __test_key__(self, key): def __transform_key__(self, key): return key + + def set_with_delta(self, key, value, delta): + """ Set a key-value with a specific delta """ + self.store[key] = { + 'value': value, + 'ts': time.time(), + 'delta': delta, + } diff --git a/pybikes/deutschebahn.py b/pybikes/deutschebahn.py index f94026251..62be5877e 100644 --- a/pybikes/deutschebahn.py +++ b/pybikes/deutschebahn.py @@ -14,6 +14,18 @@ class DB(Gbfs): authed = True + cache = True + cache_deltas = { + # 12 hours + 'gbfs': 12 * 60 * 60, + # 1 hour + 'station_information': 60 * 60, + # 60 seconds + 'station_status': 60, + # 12 hours + 'vehicle_types': 12 * 60 * 60, + } + meta = { 'company': ['Deutsche Bahn AG'], 'system': 'deutschebahn', @@ -52,12 +64,5 @@ class Callabike(DB): provider = 'CallABike' - # caches the feed for 60s - cache = TSTCache(delta=60) - def __init__(self, * args, ** kwargs): super(Callabike, self).__init__(* args, provider=Callabike.provider, ** kwargs) - - def update(self, scraper=None): - scraper = scraper or PyBikesScraper(self.cache) - super(Callabike, self).update(scraper) diff --git a/pybikes/gbfs.py b/pybikes/gbfs.py index 1110a5c10..145253767 100644 --- a/pybikes/gbfs.py +++ b/pybikes/gbfs.py @@ -12,6 +12,7 @@ from pybikes import BikeShareSystem, BikeShareStation, exceptions from pybikes.utils import PyBikesScraper, filter_bounds +from pybikes.contrib import TSTCache try: # Python 2 @@ -25,6 +26,18 @@ class Gbfs(BikeShareSystem): station_cls = None + # Specific deltas can be configured here to cache parts of the feed that do + # not change so often, like vehicle_types or station_information + # XXX: Additionally, some responses come with a ttl, which could be + # respected too + cache = False + cache_deltas = { + 'gbfs': None, + 'station_information': None, + 'station_status': None, + 'vehicle_types': None, + } + def __init__( self, tag, @@ -34,6 +47,9 @@ def __init__( station_information=False, station_status=False, ignore_errors=False, + cache=False, + cache_default_delta=60, + cache_deltas=None, retry=None, bbox=None, ): @@ -46,6 +62,9 @@ def __init__( self.retry = retry self.bbox = bbox + self.cache = (self.cache or cache) and TSTCache(delta=cache_default_delta) + self.cache_deltas.update(cache_deltas or {}) + # Allow hardcoding feed urls on initialization self.feeds = {} if station_information: @@ -80,7 +99,11 @@ def get_feeds(self, url, scraper, force_https): if self.feeds: return self.feeds - feed_data = scraper.request(url, raw=True) + feed_data = scraper.request( + url, + raw=True, + cache_with_delta=self.cache_deltas['gbfs'], + ) # do not hide Unauthorized or Too many requests status codes if scraper.last_request.status_code in [401, 429]: @@ -114,23 +137,30 @@ def get_feeds(self, url, scraper, force_https): def update(self, scraper=None): - scraper = scraper or PyBikesScraper() + scraper = scraper or PyBikesScraper(self.cache or None) if self.retry: scraper.retry = True scraper.retry_opts.update(self.retry) feeds = self.get_feeds(self.feed_url, scraper, self.force_https) - # Station Information and Station Status data retrieval - station_information = json.loads( - scraper.request(feeds['station_information']) - )['data']['stations'] - station_status = json.loads( - scraper.request(feeds['station_status']) - )['data']['stations'] + cache_d = self.cache_deltas + + info_rq = scraper.request(feeds['station_information'], + cache_with_delta=cache_d['station_information'], + ) + station_information = json.loads(info_rq)['data']['stations'] + + status_rq = scraper.request(feeds['station_status'], + cache_with_delta=cache_d['station_status'], + ) + station_status = json.loads(status_rq)['data']['stations'] if 'vehicle_types' in feeds: - vehicle_info = json.loads(scraper.request(feeds['vehicle_types'])) + vehicle_rq = scraper.request(feeds['vehicle_types'], + cache_with_delta=cache_d['vehicle_types'], + ) + vehicle_info = json.loads(vehicle_rq) # map vehicle id to vehicle info AND extra info resolver # for direct access vehicles = { diff --git a/pybikes/utils.py b/pybikes/utils.py index 68890aab3..3ff22afc6 100644 --- a/pybikes/utils.py +++ b/pybikes/utils.py @@ -57,7 +57,8 @@ def setUserAgent(self, user_agent): self.headers['User-Agent'] = user_agent def request(self, url, method='GET', params=None, data=None, raw=False, - headers=None, default_encoding='UTF-8', skip_cache=False): + headers=None, default_encoding='UTF-8', skip_cache=False, + cache_with_delta=None): if self.retry: retries = Retry(** self.retry_opts) @@ -66,8 +67,10 @@ def request(self, url, method='GET', params=None, data=None, raw=False, _headers = self.headers.copy() _headers.update(headers or {}) + cached = self.cachedict and url in self.cachedict and not skip_cache + # XXX proper encode arguments for proper call args -> response - if self.cachedict and url in self.cachedict and not skip_cache: + if cached: response = self.cachedict[url] else: response = self.session.request( @@ -99,8 +102,11 @@ def request(self, url, method='GET', params=None, data=None, raw=False, self.headers['Cookie'] = response.headers['set-cookie'] self.last_request = response - if self.cachedict is not None: - self.cachedict[url] = response + if not cached and self.cachedict is not None and response.status_code in [200, 206]: + if cache_with_delta: + self.cachedict.set_with_delta(url, response, delta=cache_with_delta) + else: + self.cachedict[url] = response return data diff --git a/tests/test_instances.py b/tests/test_instances.py index 70030c440..d1358155c 100644 --- a/tests/test_instances.py +++ b/tests/test_instances.py @@ -60,7 +60,7 @@ def test_uses_scraper(self, instance, i_data, cls, mod): def test_update(self, instance, i_data, cls, mod, record_property): scraper = pybikes.PyBikesScraper( # use a simple dict cache for systems that use a single endpoint - cachedict=cache if instance.unifeed else None, + cachedict=cache if (instance.unifeed or instance.cache) else None, # reuse headers per mod headers=headers.setdefault(mod, {}), )