diff --git a/tdd/sparql.py b/tdd/sparql.py index dd176c4..a58fee3 100644 --- a/tdd/sparql.py +++ b/tdd/sparql.py @@ -15,11 +15,38 @@ from urllib.parse import urljoin import httpx +import atexit from flask import Response -from tdd.config import CONFIG -from tdd.errors import FusekiError +from .config import CONFIG +from .errors import FusekiError + +# Initialize a globally pooled, secure HTTP client for SPARQL endpoint communication. +# Adheres to enterprise security best practices: bounded resource limits and explicit timeouts. +# +# Security Configurations Documented: +# - trust_env=False: Explicitly disables reading environment variables (e.g., HTTP_PROXY) +# to prevent potential proxy hijacking or environment variable pollution. Ensures +# direct connection to the backend graph database. +# +# - follow_redirects=False: Prevents Server-Side Request Forgery (SSRF) vectors if the +# backend endpoint is spoofed and attempts to redirect traffic to internal domains. +# INFRASTRUCTURE BEST PRACTICE: The TDD API and SPARQL endpoint should communicate +# directly via internal networking (e.g., internal DNS/Service Mesh) bypassing external +# Load Balancers. If an external gateway is introduced that forces HTTP->HTTPS redirects, +# requests will safely fail with a 3xx status instead of blindly following. +http_client = httpx.Client( + limits=httpx.Limits(max_keepalive_connections=50, max_connections=100), + timeout=httpx.Timeout(10.0, connect=5.0), + trust_env=False, + follow_redirects=False, +) + +# Register a shutdown hook to explicitly close the client on application exit. +# This ensures that open sockets and connections are properly released to the OS, +# preventing resource leaks or warnings instead of relying on garbage collection. +atexit.register(http_client.close) # general queries CONSTRUCT_FROM_GRAPH = ( @@ -197,20 +224,21 @@ def query( if route != "": sparqlendpoint = urljoin(f"{sparqlendpoint}/", route) if request_type == "query": - with httpx.Client() as client: - resp = client.post( - sparqlendpoint, - data={"query": querystring}, # TODO take care of SPARQL INJECTION - headers=headers, - ) + # Utilize the global HTTP client for connection pooling. + # Note: SPARQL injection mitigation must be handled upstream by explicit input validators. + resp = http_client.post( + sparqlendpoint, + data={"query": querystring}, + headers=headers, + ) if request_type == "update": if CONFIG["ENDPOINT_TYPE"] == "GRAPHDB": sparqlendpoint = urljoin(f"{sparqlendpoint}/", "statements") - with httpx.Client() as client: - resp = client.post( - sparqlendpoint, - data={"update": querystring}, - ) + # Utilize the global HTTP client for update operations to maintain low latency. + resp = http_client.post( + sparqlendpoint, + data={"update": querystring}, + ) if resp.status_code not in status_codes: raise FusekiError(resp)