From 8f064c97cf0e694ff83f02e35bf479799044c1d6 Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 8 Mar 2024 09:17:59 -0500 Subject: [PATCH 1/5] Add capability to block user agents --- webservices/rest.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/webservices/rest.py b/webservices/rest.py index 2ba181c1b..f8cfe5236 100644 --- a/webservices/rest.py +++ b/webservices/rest.py @@ -145,9 +145,13 @@ def handle_error(error): RESTRICT_MESSAGE = "We apologize for the inconvenience, but we are temporarily " \ "blocking API traffic. Please contact apiinfo@fec.gov if this is an urgent issue." +# list of blocked user agent strings: ex: Googlebot, Bingbot, etc that will be result in the request +# being blocked if the user-agent header contains any of the specified strings +BLOCKED_USER_AGENTS = utils.split_env_var(env.get_credential('FEC_API_BLOCKED_USER_AGENTS', '')) + @app.before_request -def limit_remote_addr(): +def limit_access_based_on_request(): """ If `FEC_API_USE_PROXY` is set: - Reject all requests that are not routed through the API Umbrella @@ -176,6 +180,10 @@ def limit_remote_addr(): if request_api_key_id not in BYPASS_RESTRICTION_API_KEY_IDS: # Service unavailable abort(503, RESTRICT_MESSAGE) + user_agent = request.headers.get('User-Agent') + for blocked_agent in BLOCKED_USER_AGENTS: + if len(blocked_agent) > 0 and blocked_agent in user_agent: + abort(429) # Too many requests def get_cache_header(url): From e104bdc4ec5fa5ca01d5cdfc178c6151e6857bcd Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 8 Mar 2024 09:58:10 -0500 Subject: [PATCH 2/5] update to 403 and add check for user-agent first --- webservices/rest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/webservices/rest.py b/webservices/rest.py index f8cfe5236..a7b1f09c0 100644 --- a/webservices/rest.py +++ b/webservices/rest.py @@ -181,9 +181,10 @@ def limit_access_based_on_request(): # Service unavailable abort(503, RESTRICT_MESSAGE) user_agent = request.headers.get('User-Agent') - for blocked_agent in BLOCKED_USER_AGENTS: - if len(blocked_agent) > 0 and blocked_agent in user_agent: - abort(429) # Too many requests + if user_agent and BLOCKED_USER_AGENTS: + for blocked_agent in BLOCKED_USER_AGENTS: + if len(blocked_agent) > 0 and blocked_agent in user_agent: + abort(403) # Too many requests def get_cache_header(url): From 1d71e42ea4c9163e8dfc0343f92954f9fd1a9250 Mon Sep 17 00:00:00 2001 From: Laura Beaufort Date: Fri, 8 Mar 2024 10:12:13 -0500 Subject: [PATCH 3/5] Streamline logic --- webservices/rest.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/webservices/rest.py b/webservices/rest.py index a7b1f09c0..b757144ad 100644 --- a/webservices/rest.py +++ b/webservices/rest.py @@ -146,7 +146,7 @@ def handle_error(error): "blocking API traffic. Please contact apiinfo@fec.gov if this is an urgent issue." # list of blocked user agent strings: ex: Googlebot, Bingbot, etc that will be result in the request -# being blocked if the user-agent header contains any of the specified strings +# being blocked if the user-agent header contains any of the specified strings BLOCKED_USER_AGENTS = utils.split_env_var(env.get_credential('FEC_API_BLOCKED_USER_AGENTS', '')) @@ -182,9 +182,8 @@ def limit_access_based_on_request(): abort(503, RESTRICT_MESSAGE) user_agent = request.headers.get('User-Agent') if user_agent and BLOCKED_USER_AGENTS: - for blocked_agent in BLOCKED_USER_AGENTS: - if len(blocked_agent) > 0 and blocked_agent in user_agent: - abort(403) # Too many requests + if user_agent in BLOCKED_USER_AGENTS: + abort(403) # Forbidden def get_cache_header(url): From 20230c4429ad9c94a860c465ae10aedc270150ac Mon Sep 17 00:00:00 2001 From: Laura Beaufort Date: Fri, 8 Mar 2024 10:55:16 -0500 Subject: [PATCH 4/5] Check if agent contains blocked agent keywords --- webservices/rest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/webservices/rest.py b/webservices/rest.py index b757144ad..1bea41194 100644 --- a/webservices/rest.py +++ b/webservices/rest.py @@ -182,8 +182,10 @@ def limit_access_based_on_request(): abort(503, RESTRICT_MESSAGE) user_agent = request.headers.get('User-Agent') if user_agent and BLOCKED_USER_AGENTS: - if user_agent in BLOCKED_USER_AGENTS: - abort(403) # Forbidden + for blocked_agent in BLOCKED_USER_AGENTS: + # user agent contains blocked agent string + if blocked_agent in user_agent: + abort(403) # Forbidden def get_cache_header(url): From cde0616ab2475298effc8ae69307bc267da23da2 Mon Sep 17 00:00:00 2001 From: Laura Beaufort Date: Fri, 8 Mar 2024 12:08:40 -0500 Subject: [PATCH 5/5] Modify split_env_var to return empty list when env var unset --- tests/test_utils.py | 12 ++++++++++++ webservices/rest.py | 2 +- webservices/utils.py | 10 ++++++++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 83eff6acf..bc6b3999c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -326,6 +326,18 @@ def test_env_var_split(self): result = utils.split_env_var(test_case) self.assertEqual(result, expected) + def test_empty_env_var_split(self): + "When env var is blank or falsy, return empty list" + test_cases = [ + None, + "", + [], + ] + expected = [] + for test_case in test_cases: + result = utils.split_env_var(test_case) + self.assertEqual(result, expected) + class TestPercentages(TestCase): def test_get_percentage(self): diff --git a/webservices/rest.py b/webservices/rest.py index 1bea41194..2a720f1a3 100644 --- a/webservices/rest.py +++ b/webservices/rest.py @@ -147,7 +147,7 @@ def handle_error(error): # list of blocked user agent strings: ex: Googlebot, Bingbot, etc that will be result in the request # being blocked if the user-agent header contains any of the specified strings -BLOCKED_USER_AGENTS = utils.split_env_var(env.get_credential('FEC_API_BLOCKED_USER_AGENTS', '')) +BLOCKED_USER_AGENTS = utils.split_env_var(env.get_credential('FEC_API_BLOCKED_USER_AGENTS')) @app.before_request diff --git a/webservices/utils.py b/webservices/utils.py index 1540b8613..d0d8f430e 100644 --- a/webservices/utils.py +++ b/webservices/utils.py @@ -572,8 +572,14 @@ def post_to_slack(message, channel): def split_env_var(env_var): - """ Remove whitespace and split to a list based of comma delimiter""" - return env_var.replace(" ", "").split(",") + """ + Remove whitespace and split to a list based of comma delimiter. + If env var is None or blank string, return empty list + """ + if env_var: + return env_var.replace(" ", "").split(",") + else: + return [] # To display the open_date and close_date of JSON format inside object "mur"