From 810b6d81c8675562e2118402088ec17ce3ed0301 Mon Sep 17 00:00:00 2001 From: eskerda Date: Fri, 13 Oct 2023 23:23:32 +0200 Subject: [PATCH 1/3] [gbfs] add configurable cache deltas for feeds --- pybikes/contrib.py | 15 ++++++++++++++- pybikes/gbfs.py | 27 +++++++++++++++++++++++---- pybikes/utils.py | 14 ++++++++++---- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/pybikes/contrib.py b/pybikes/contrib.py index 2a83dc7ec..6df776ab1 100644 --- a/pybikes/contrib.py +++ b/pybikes/contrib.py @@ -34,9 +34,14 @@ def __getitem__(self, key): raise KeyError('%s' % key) if key not in self.store: raise KeyError('%s' % key) + ts_value = self.store[key] - if time.time() - ts_value['ts'] > self.delta: + the_time = time.time() + delta = ts_value.get('delta', self.delta) + + if the_time - ts_value['ts'] > delta: raise KeyError('%s' % key) + return ts_value['value'] def __contains__(self, key): @@ -58,3 +63,11 @@ def __test_key__(self, key): def __transform_key__(self, key): return key + + def set_with_delta(self, key, value, delta): + """ Set a key-value with a specific delta """ + self.store[key] = { + 'value': value, + 'ts': time.time(), + 'delta': delta, + } diff --git a/pybikes/gbfs.py b/pybikes/gbfs.py index 1110a5c10..82898b91b 100644 --- a/pybikes/gbfs.py +++ b/pybikes/gbfs.py @@ -25,6 +25,17 @@ class Gbfs(BikeShareSystem): station_cls = None + # Specific deltas can be configured here to cache parts of the feed that do + # not change so often, like vehicle_types or station_information + # XXX: Additionally, some responses come with a ttl, which could be + # respected too + cache_deltas = { + 'gbfs': None, + 'station_information': None, + 'station_status': None, + 'vehicle_types': None, + } + def __init__( self, tag, @@ -80,7 +91,11 @@ def get_feeds(self, url, scraper, force_https): if self.feeds: return self.feeds - feed_data = scraper.request(url, raw=True) + feed_data = scraper.request( + url, + raw=True, + cache_with_delta=self.cache_deltas['gbfs'], + ) # do not hide Unauthorized or Too many requests status codes if scraper.last_request.status_code in [401, 429]: @@ -121,16 +136,20 @@ def update(self, scraper=None): feeds = self.get_feeds(self.feed_url, scraper, self.force_https) + info_delta = self.cache_deltas['station_information'] + status_delta = self.cache_deltas['station_status'] + # Station Information and Station Status data retrieval station_information = json.loads( - scraper.request(feeds['station_information']) + scraper.request(feeds['station_information'], cache_with_delta=info_delta) )['data']['stations'] station_status = json.loads( - scraper.request(feeds['station_status']) + scraper.request(feeds['station_status'], cache_with_delta=status_delta) )['data']['stations'] if 'vehicle_types' in feeds: - vehicle_info = json.loads(scraper.request(feeds['vehicle_types'])) + vehicle_delta = self.cache_deltas['vehicle_types'] + vehicle_info = json.loads(scraper.request(feeds['vehicle_types'], cache_with_delta=vehicle_delta)) # map vehicle id to vehicle info AND extra info resolver # for direct access vehicles = { diff --git a/pybikes/utils.py b/pybikes/utils.py index 68890aab3..3ff22afc6 100644 --- a/pybikes/utils.py +++ b/pybikes/utils.py @@ -57,7 +57,8 @@ def setUserAgent(self, user_agent): self.headers['User-Agent'] = user_agent def request(self, url, method='GET', params=None, data=None, raw=False, - headers=None, default_encoding='UTF-8', skip_cache=False): + headers=None, default_encoding='UTF-8', skip_cache=False, + cache_with_delta=None): if self.retry: retries = Retry(** self.retry_opts) @@ -66,8 +67,10 @@ def request(self, url, method='GET', params=None, data=None, raw=False, _headers = self.headers.copy() _headers.update(headers or {}) + cached = self.cachedict and url in self.cachedict and not skip_cache + # XXX proper encode arguments for proper call args -> response - if self.cachedict and url in self.cachedict and not skip_cache: + if cached: response = self.cachedict[url] else: response = self.session.request( @@ -99,8 +102,11 @@ def request(self, url, method='GET', params=None, data=None, raw=False, self.headers['Cookie'] = response.headers['set-cookie'] self.last_request = response - if self.cachedict is not None: - self.cachedict[url] = response + if not cached and self.cachedict is not None and response.status_code in [200, 206]: + if cache_with_delta: + self.cachedict.set_with_delta(url, response, delta=cache_with_delta) + else: + self.cachedict[url] = response return data From 8dd6cce8e2c67b13aac901df66730fc2ae39b007 Mon Sep 17 00:00:00 2001 From: eskerda Date: Sat, 14 Oct 2023 00:22:16 +0200 Subject: [PATCH 2/3] wip --- pybikes/gbfs.py | 35 +++++++++++++++++++++++------------ tests/test_instances.py | 2 +- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pybikes/gbfs.py b/pybikes/gbfs.py index 82898b91b..145253767 100644 --- a/pybikes/gbfs.py +++ b/pybikes/gbfs.py @@ -12,6 +12,7 @@ from pybikes import BikeShareSystem, BikeShareStation, exceptions from pybikes.utils import PyBikesScraper, filter_bounds +from pybikes.contrib import TSTCache try: # Python 2 @@ -29,6 +30,7 @@ class Gbfs(BikeShareSystem): # not change so often, like vehicle_types or station_information # XXX: Additionally, some responses come with a ttl, which could be # respected too + cache = False cache_deltas = { 'gbfs': None, 'station_information': None, @@ -45,6 +47,9 @@ def __init__( station_information=False, station_status=False, ignore_errors=False, + cache=False, + cache_default_delta=60, + cache_deltas=None, retry=None, bbox=None, ): @@ -57,6 +62,9 @@ def __init__( self.retry = retry self.bbox = bbox + self.cache = (self.cache or cache) and TSTCache(delta=cache_default_delta) + self.cache_deltas.update(cache_deltas or {}) + # Allow hardcoding feed urls on initialization self.feeds = {} if station_information: @@ -129,27 +137,30 @@ def get_feeds(self, url, scraper, force_https): def update(self, scraper=None): - scraper = scraper or PyBikesScraper() + scraper = scraper or PyBikesScraper(self.cache or None) if self.retry: scraper.retry = True scraper.retry_opts.update(self.retry) feeds = self.get_feeds(self.feed_url, scraper, self.force_https) - info_delta = self.cache_deltas['station_information'] - status_delta = self.cache_deltas['station_status'] + cache_d = self.cache_deltas + + info_rq = scraper.request(feeds['station_information'], + cache_with_delta=cache_d['station_information'], + ) + station_information = json.loads(info_rq)['data']['stations'] - # Station Information and Station Status data retrieval - station_information = json.loads( - scraper.request(feeds['station_information'], cache_with_delta=info_delta) - )['data']['stations'] - station_status = json.loads( - scraper.request(feeds['station_status'], cache_with_delta=status_delta) - )['data']['stations'] + status_rq = scraper.request(feeds['station_status'], + cache_with_delta=cache_d['station_status'], + ) + station_status = json.loads(status_rq)['data']['stations'] if 'vehicle_types' in feeds: - vehicle_delta = self.cache_deltas['vehicle_types'] - vehicle_info = json.loads(scraper.request(feeds['vehicle_types'], cache_with_delta=vehicle_delta)) + vehicle_rq = scraper.request(feeds['vehicle_types'], + cache_with_delta=cache_d['vehicle_types'], + ) + vehicle_info = json.loads(vehicle_rq) # map vehicle id to vehicle info AND extra info resolver # for direct access vehicles = { diff --git a/tests/test_instances.py b/tests/test_instances.py index 70030c440..d1358155c 100644 --- a/tests/test_instances.py +++ b/tests/test_instances.py @@ -60,7 +60,7 @@ def test_uses_scraper(self, instance, i_data, cls, mod): def test_update(self, instance, i_data, cls, mod, record_property): scraper = pybikes.PyBikesScraper( # use a simple dict cache for systems that use a single endpoint - cachedict=cache if instance.unifeed else None, + cachedict=cache if (instance.unifeed or instance.cache) else None, # reuse headers per mod headers=headers.setdefault(mod, {}), ) From 8f9410b00ecb2c09775d6493d7e5e5907cce4960 Mon Sep 17 00:00:00 2001 From: eskerda Date: Sat, 14 Oct 2023 00:22:32 +0200 Subject: [PATCH 3/3] [gbfs] set DB cache deltas --- pybikes/deutschebahn.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pybikes/deutschebahn.py b/pybikes/deutschebahn.py index f94026251..62be5877e 100644 --- a/pybikes/deutschebahn.py +++ b/pybikes/deutschebahn.py @@ -14,6 +14,18 @@ class DB(Gbfs): authed = True + cache = True + cache_deltas = { + # 12 hours + 'gbfs': 12 * 60 * 60, + # 1 hour + 'station_information': 60 * 60, + # 60 seconds + 'station_status': 60, + # 12 hours + 'vehicle_types': 12 * 60 * 60, + } + meta = { 'company': ['Deutsche Bahn AG'], 'system': 'deutschebahn', @@ -52,12 +64,5 @@ class Callabike(DB): provider = 'CallABike' - # caches the feed for 60s - cache = TSTCache(delta=60) - def __init__(self, * args, ** kwargs): super(Callabike, self).__init__(* args, provider=Callabike.provider, ** kwargs) - - def update(self, scraper=None): - scraper = scraper or PyBikesScraper(self.cache) - super(Callabike, self).update(scraper)