Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tdd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
get_check_schema_from_url_params,
)
from tdd.sparql import query, sparql_query
from .validators import validate_sort_order
from tdd.utils import (
POSSIBLE_MIMETYPES,
create_link_params,
Expand Down Expand Up @@ -285,12 +286,15 @@ def describe_tds():

sort_by = request.args.get("sort_by")
sort_order = request.args.get("sort_order")
if sort_order is not None:
sort_order = validate_sort_order(sort_order)

number_total = get_total_number()

sort_params = {}
if sort_order:
sort_params["sort_order"] = sort_order
if sort_order is not None:
# Use lowercase for URL parameters (API convention)
sort_params["sort_order"] = sort_order.lower()
if sort_by:
sort_params["sort_by"] = sort_by

Expand Down
1 change: 1 addition & 0 deletions tdd/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def frame_nt_content(nt_content, frame):
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
encoding="utf-8",
)
p.stdin.write(input_data)
p.stdin.flush()
Expand Down
12 changes: 12 additions & 0 deletions tdd/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,15 @@ def __init__(self, provided_mimetype):

class IncorrectlyDefinedParameter(AppException):
title = "Incorrectly defined parameter"


class SecurityValidationError(AppException):
title = "Security Validation Error"
status_code = 400

def __init__(self, message="Malformed or unsafe input detected."):
super().__init__(
message=message,
message_fr="Entrée mal formée ou non sécurisée détectée.",
message_de="Fehlerhafte oder unsichere Eingabe erkannt.",
)
9 changes: 7 additions & 2 deletions tdd/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from tdd.errors import TTLMandatoryError
from tdd.utils import TDD
from tdd.validators import validate_uri


def validate_ttl(ld_content, mandate_ttl):
Expand All @@ -30,11 +31,13 @@ def validate_ttl(ld_content, mandate_ttl):


def get_registration_dict(uri, rdf_graph):
# Upstream validation: Secure the URI before placing it in the SPARQL query string
safe_uri = validate_uri(uri)
registration_query = (
"PREFIX discovery: <https://www.w3.org/2022/wot/discovery-ontology#>"
"SELECT DISTINCT ?created ?modified ?expires ?ttl "
"WHERE {"
f" <{uri}> discovery:hasRegistrationInformation ?reg."
f" <{safe_uri}> discovery:hasRegistrationInformation ?reg."
" OPTIONAL{?reg discovery:dateCreated ?created}"
" OPTIONAL{?reg discovery:dateModified ?modified}"
" OPTIONAL{?reg discovery:expires ?expires}"
Expand Down Expand Up @@ -66,7 +69,9 @@ def get_registration_dict(uri, rdf_graph):


def delete_registration_information(uri, rdf_graph):
rdf_graph.remove((URIRef(uri), TDD.hasRegistrationInformation, None))
# Sanitize before processing
safe_uri = validate_uri(uri)
rdf_graph.remove((URIRef(safe_uri), TDD.hasRegistrationInformation, None))
rdf_graph.remove((None, TDD.dateCreated, None))
rdf_graph.remove((None, TDD.dateModified, None))
rdf_graph.remove((None, TDD.expires, None))
Expand Down
18 changes: 15 additions & 3 deletions tdd/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import atexit
from flask import Response


from .config import CONFIG
from .errors import FusekiError

Expand Down Expand Up @@ -223,27 +222,40 @@ def query(

if route != "":
sparqlendpoint = urljoin(f"{sparqlendpoint}/", route)

if request_type == "query":
# Utilize the global HTTP client for connection pooling.
# Note: SPARQL injection mitigation must be handled upstream by explicit input validators.
resp = http_client.post(
sparqlendpoint,
data={"query": querystring},
headers=headers,
)
if request_type == "update":
elif request_type == "update":
if CONFIG["ENDPOINT_TYPE"] == "GRAPHDB":
sparqlendpoint = urljoin(f"{sparqlendpoint}/", "statements")
# Utilize the global HTTP client for update operations to maintain low latency.
resp = http_client.post(
sparqlendpoint,
data={"update": querystring},
)
else:
raise ValueError(f"Invalid request_type: {request_type}")

if resp.status_code not in status_codes:
raise FusekiError(resp)
return resp


def delete_named_graph(named_graph):
"""
Delete a named graph from the SPARQL endpoint.

Args:
named_graph: Graph URI to delete (from internal system, not user input)

Note:
This function is called with graph URIs from internal database queries,
not from user input. No external validation is needed as these are
trusted internal values that already passed validation when stored.
"""
query(f"DROP SILENT GRAPH <{named_graph}>", request_type="update")
87 changes: 70 additions & 17 deletions tdd/td.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
frame_nt_content,
get_id_description,
)
from .validators import validate_uri

with files(__package__).joinpath("data/td-json-schema-validation.json").open() as strm:
schema = json.load(strm)
Expand Down Expand Up @@ -107,7 +108,7 @@ def use_custom_context(ld_content):
# No need for now, since the published context is up to date
overwrite_thing_context(ld_content)

# replace discovery context uri witht the fixed discovery context
# replace discovery context uri with the fixed discovery context
overwrite_discovery_context(ld_content)

return ld_content
Expand Down Expand Up @@ -161,8 +162,10 @@ def validate_tds(tds):


def get_already_existing_td(uri):
# Upstream validation: Ensure URI is safe before injecting into SPARQL template
safe_uri = validate_uri(uri)
resp = query(
GET_TD_CREATION_DATE.format(uri=uri),
GET_TD_CREATION_DATE.format(uri=safe_uri),
)
if resp.status_code == 200:
if len(resp.json()["results"]["bindings"]) > 0:
Expand All @@ -183,6 +186,8 @@ def put_td_rdf_in_sparql(
if uri is None:
raise RDFValidationError(f"Did not find any {TD['Thing']}")

safe_uri = validate_uri(uri)

if check_schema:
ontology_graph = create_binded_graph()
with path("tdd.data", "td.ttl") as onto_path:
Expand All @@ -200,37 +205,38 @@ def put_td_rdf_in_sparql(
raise RDFValidationError(
"The RDF triples are not conform with the SHACL validation : \n"
f" {text_reports}",
td_id=uri,
td_id=safe_uri,
errors=graph_reports,
td_graph=g,
)

registration = get_registration_dict(uri, g)
delete_registration_information(uri, g)
registration = get_registration_dict(safe_uri, g)
delete_registration_information(safe_uri, g)

created_date = get_already_existing_td(uri)
created_date = get_already_existing_td(safe_uri)
registration = update_registration(registration, created_date, CONFIG["MAX_TTL"])
for triple in yield_registration_triples(uri, registration):
for triple in yield_registration_triples(safe_uri, registration):
g.add(triple)
put_rdf_in_sparql(
g,
uri,
safe_uri,
[DEFAULT_THING_CONTEXT_URI, DEFAULT_DISCOVERY_CONTEXT_URI],
delete_if_exists,
ONTOLOGY,
forced_type=TYPE,
)
return (created_date is not None, uri)
return (created_date is not None, safe_uri)


def get_td_description(id, content_type="application/td+json", context=None):
safe_id = validate_uri(id)
if not content_type.endswith("json"):
return get_id_description(id, content_type, ONTOLOGY)
content = get_id_description(id, "application/n-triples", ONTOLOGY)
return get_id_description(safe_id, content_type, ONTOLOGY)
content = get_id_description(safe_id, "application/n-triples", ONTOLOGY)
if not context:
context = get_context(id, ONTOLOGY)
context = get_context(safe_id, ONTOLOGY)
try:
td_description = frame_td_nt_content(id, content, context)
td_description = frame_td_nt_content(safe_id, content, context)
return td_description
except ExpireTDError:
return ""
Expand All @@ -245,7 +251,8 @@ def put_td_json_in_sparql(td_content, uri=None, delete_if_exists=True):
registration = td_content.get("registration", {})
td_content = sanitize_td(td_content)
original_context = copy(td_content["@context"])
uri = uri if uri is not None else td_content["id"]
# Upstream validation: Sanitize the URI whether it comes from args or the payload ID
uri = validate_uri(uri if uri is not None else td_content["id"])
td_content = use_custom_context(td_content)

created_date = get_already_existing_td(uri)
Expand All @@ -260,6 +267,23 @@ def put_td_json_in_sparql(td_content, uri=None, delete_if_exists=True):


def delete_graphs(ids):
"""
Delete multiple graphs by their IDs.

Args:
ids: List of graph IDs to delete

Note:
This function is called with IDs from internal database queries
(e.g., expired TDs from clear_expired_td()). These IDs are trusted
internal values, not user input, so no external validation is needed.

Applying validate_uri() here would be incorrect because:
1. These URIs already passed validation when originally stored
2. Legitimate stored URIs might contain characters outside the strict
allowlist (e.g., certain URN formats)
3. Validation should only occur at the trust boundary (user input)
"""
graph_ids_str = ", ".join([f"<{graph_id}>" for graph_id in ids])
delete_td_query = DELETE_GRAPHS.format(graph_ids_str=graph_ids_str)
resp = query(delete_td_query, request_type="update")
Expand Down Expand Up @@ -322,18 +346,43 @@ def get_total_number():


def get_paginated_tds(limit, offset, sort_by, sort_order):
all_tds = []
"""
Get a paginated list of Thing Descriptions.

Args:
limit (int): Maximum number of TDs to return (pre-validated at controller layer)
offset (int): Offset for pagination (pre-validated at controller layer)
sort_by (str): Field to sort by (pre-validated at controller layer)
sort_order (str): Sort direction "ASC" or "DESC" (pre-validated at controller layer)

Returns:
List[dict]: List of Thing Description dictionaries in the order specified by SPARQL query

Note:
All parameters are assumed to be pre-validated and type-converted at the
controller layer (__init__.py). No redundant validation is performed here.

Thread Safety:
Uses ThreadPoolExecutor for concurrent TD retrieval. Results are collected
in the main thread in the original task submission order to preserve the
SPARQL ORDER BY sequence.
"""
tasks = []

def send_request(id, context):
td = get_td_description(id, context=context)
all_tds.append(td)
"""
Fetch a single TD description.

Returns the TD instead of appending to a shared list for thread safety.
"""
return get_td_description(id, context=context)

contexts = get_all_contexts()

if sort_by is not None and sort_by not in ORDERBY:
raise OrderbyError(sort_by)

# No redundant validation - parameters already validated in __init__.py
resp = query(
GET_URI_BY_ONTOLOGY.format(
limit=limit,
Expand Down Expand Up @@ -366,6 +415,10 @@ def send_request(id, context):
contexts[result["graph"]["value"]],
)
)
# Wait for all tasks to complete in submission order to preserve SPARQL ORDER BY
all_tds = []
for task in tasks:
all_tds.append(task.result())

return all_tds

Expand Down
Loading
Loading