Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 41 additions & 13 deletions tdd/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,38 @@

from urllib.parse import urljoin
import httpx
import atexit
from flask import Response


from tdd.config import CONFIG
from tdd.errors import FusekiError
from .config import CONFIG
from .errors import FusekiError

# Initialize a globally pooled, secure HTTP client for SPARQL endpoint communication.
# Adheres to enterprise security best practices: bounded resource limits and explicit timeouts.
#
# Security Configurations Documented:
# - trust_env=False: Explicitly disables reading environment variables (e.g., HTTP_PROXY)
# to prevent potential proxy hijacking or environment variable pollution. Ensures
# direct connection to the backend graph database.
#
# - follow_redirects=False: Prevents Server-Side Request Forgery (SSRF) vectors if the
# backend endpoint is spoofed and attempts to redirect traffic to internal domains.
# INFRASTRUCTURE BEST PRACTICE: The TDD API and SPARQL endpoint should communicate
# directly via internal networking (e.g., internal DNS/Service Mesh) bypassing external
# Load Balancers. If an external gateway is introduced that forces HTTP->HTTPS redirects,
# requests will safely fail with a 3xx status instead of blindly following.
http_client = httpx.Client(
limits=httpx.Limits(max_keepalive_connections=50, max_connections=100),
timeout=httpx.Timeout(10.0, connect=5.0),
trust_env=False,
follow_redirects=False,
)

# Register a shutdown hook to explicitly close the client on application exit.
# This ensures that open sockets and connections are properly released to the OS,
# preventing resource leaks or warnings instead of relying on garbage collection.
atexit.register(http_client.close)

# general queries
CONSTRUCT_FROM_GRAPH = (
Expand Down Expand Up @@ -197,20 +224,21 @@ def query(
if route != "":
sparqlendpoint = urljoin(f"{sparqlendpoint}/", route)
if request_type == "query":
with httpx.Client() as client:
resp = client.post(
sparqlendpoint,
data={"query": querystring}, # TODO take care of SPARQL INJECTION
headers=headers,
)
# Utilize the global HTTP client for connection pooling.
# Note: SPARQL injection mitigation must be handled upstream by explicit input validators.
resp = http_client.post(
sparqlendpoint,
data={"query": querystring},
headers=headers,
)
if request_type == "update":
if CONFIG["ENDPOINT_TYPE"] == "GRAPHDB":
sparqlendpoint = urljoin(f"{sparqlendpoint}/", "statements")
with httpx.Client() as client:
resp = client.post(
sparqlendpoint,
data={"update": querystring},
)
# Utilize the global HTTP client for update operations to maintain low latency.
resp = http_client.post(
sparqlendpoint,
data={"update": querystring},
)

if resp.status_code not in status_codes:
raise FusekiError(resp)
Expand Down
Loading