Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b9f8b59
NRL-1215 Add json duplicate checker and tests, use it for request
axelkrastek1-nhs Jan 8, 2025
cd586dc
NRL-1215 Fix implementation, refine type hints
axelkrastek1-nhs Jan 9, 2025
2d38b6a
NRL-1215 Add extra edge case
axelkrastek1-nhs Jan 9, 2025
4e77c0a
NRL-1215 Add tests for duplicates of each field in DocumentReference
axelkrastek1-nhs Jan 9, 2025
d079bce
NRL-1215 Fix linting issues
axelkrastek1-nhs Jan 9, 2025
bfbbf8a
NRL-1215 Fix error code for duplicates
axelkrastek1-nhs Jan 9, 2025
4fab5ab
NRL-1215 Fix possible intermittent related to dict order
axelkrastek1-nhs Jan 13, 2025
af54474
NRL-1215 Add logging for duplicate key checks and integration tests f…
axelkrastek1-nhs Jan 13, 2025
03a820c
NRL-1215 Add test for edge case with duplicate keys in JSON arrays
axelkrastek1-nhs Jan 13, 2025
4467cf7
NRL-1215 Enhance duplicate detection for JSON arrays and add correspo…
axelkrastek1-nhs Jan 14, 2025
705b372
NRL-1215 Remove commented-out code for duplicate detection in JSON ar…
axelkrastek1-nhs Jan 14, 2025
fce0d9a
NRL-1215 Fix linting issues
axelkrastek1-nhs Jan 14, 2025
b07d007
[NRL-1215] WIP - Using hooks instead of class for dupe detection
mattdean3-nhs Jan 16, 2025
20ba00e
NRL-1215 Alternative implementation passing all tests
axelkrastek1-nhs Jan 22, 2025
633c565
NRL-1215 Fix linting, remove redundant code
axelkrastek1-nhs Jan 22, 2025
688b2a7
NRL-1215 Simplify implementation
axelkrastek1-nhs Jan 23, 2025
f017a8f
NRL-1215 Add extra docstring
axelkrastek1-nhs Jan 23, 2025
8c54886
NRL-1215 Fix case sensitive detection
axelkrastek1-nhs Jan 23, 2025
8bf8776
NRL-1215 Add more prints for test report
axelkrastek1-nhs Jan 23, 2025
5dbbd92
NRL-1215 Clean up prints
axelkrastek1-nhs Jan 23, 2025
5bc99cb
NRL-1215 Fix remove extra prints
axelkrastek1-nhs Jan 24, 2025
0235c76
NRL-1215 Add docstrings
axelkrastek1-nhs Jan 24, 2025
ef8f45a
NRL-1215 Log when duplicate key in post body
axelkrastek1-nhs Jan 24, 2025
023e514
[NRL-1215] Switch root to DocumentReference in json dupe checker and …
mattdean3-nhs Jan 28, 2025
9601c3f
[NRL-1215] Add __str__ to OperationOutcomeError to allow logging of e…
mattdean3-nhs Jan 28, 2025
e8d35e4
[NRL-1215] Fixup integ tests for root. fix
mattdean3-nhs Jan 28, 2025
67af076
Merge branch 'develop' into feature/axkr1-NRL-1215-duplicate-fields
mattdean3-nhs Jan 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions layer/nrlf/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def response(self) -> Response:
body=self.operation_outcome.model_dump_json(exclude_none=True, indent=2),
)

def __str__(self):
return f"OperationOutcomeError: {self.operation_outcome}"


class ParseError(Exception):
issues: List[OperationOutcomeIssue]
Expand Down
74 changes: 74 additions & 0 deletions layer/nrlf/core/json_duplicate_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import json
from typing import Any


def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
"""Custom JSON object_pairs_hook that checks for duplicate keys."""
keys: dict[str, Any] = {}
dupes: dict[str, Any] = {}
normalized_keys: list[str] = []

for key, value in pairs:
normalized_key = key.lower()
if normalized_key in normalized_keys:
dupes.setdefault(key, []).append(value)
else:
keys[key] = value
normalized_keys += [normalized_key]

if dupes:
keys["__duplicates__"] = dupes

return keys


def flatten_duplicates(data: dict[str, Any] | list[Any]) -> list[str]:
"""Flattens a JSON structure and returns a list of duplicate paths."""
duplicates: list[str] = []
items = data.items() if isinstance(data, dict) else enumerate(data)

for key, value in items:
if key == "__duplicates__":
duplicates.extend(value.keys())
elif isinstance(value, (dict, list)):
path = f"{key}" if isinstance(data, dict) else f"[{key}]"
dupes = flatten_duplicates(value)
duplicates.extend([f"{path}.{dupe}" for dupe in dupes])

return duplicates


def format_path(path: str) -> str:
"""Transforms a path like key1.[2].key2 into key1[2].key2"""
parts = path.split(".")
formatted_parts: list[str] = []
for part in parts:
if part.startswith("["):
formatted_parts[-1] += part
else:
formatted_parts.append(part)
return ".".join(formatted_parts)


def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]:
"""Find all duplicate keys in a JSON string.

Traverses the entire JSON structure and reports:
- List of keys that appear multiple times at the same level
- Full paths to each duplicate key occurrkeysence

A key is considered duplicate if it appears multiple times within
the same object, regardless of nesting level or array position.
"""
try:
dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys)
duplicate_paths = [
f"DocumentReference.{format_path(path)}"
for path in flatten_duplicates(dupe_data)
]
duplicate_keys = list(
dict.fromkeys([key.split(".")[-1] for key in duplicate_paths])
)
return duplicate_keys, duplicate_paths
except json.JSONDecodeError:
raise ValueError("Error: Invalid JSON format")
2 changes: 2 additions & 0 deletions layer/nrlf/core/log_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class LogReference(Enum):
)
HANDLER016 = _Reference("INFO", "Set response headers")
HANDLER017 = _Reference("WARN", "Correlation ID not found in request headers")
HANDLER018 = _Reference("INFO", "Checking for duplicate keys in request body")
HANDLER019 = _Reference("ERROR", "Duplicate keys found in the request body")
HANDLER999 = _Reference("INFO", "Request handler returned successfully")

# Error Logs
Expand Down
20 changes: 20 additions & 0 deletions layer/nrlf/core/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from nrlf.core.codes import SpineErrorConcept
from nrlf.core.constants import CLIENT_RP_DETAILS, CONNECTION_METADATA
from nrlf.core.errors import OperationOutcomeError, ParseError
from nrlf.core.json_duplicate_checker import check_duplicate_keys
from nrlf.core.logger import LogReference, logger
from nrlf.core.model import ClientRpDetails, ConnectionMetadata

Expand Down Expand Up @@ -88,6 +89,7 @@ def parse_body(

try:
result = model.model_validate_json(body)
raise_when_duplicate_keys(body)
logger.log(LogReference.HANDLER009, parsed_body=result.model_dump())
return result

Expand All @@ -99,6 +101,24 @@ def parse_body(
) from None


def raise_when_duplicate_keys(json_content: str) -> None:
"""
Raises an error if duplicate keys are found in the JSON content.
"""
logger.log(LogReference.HANDLER018)
duplicates, paths = check_duplicate_keys(json_content)
if duplicates:
error = OperationOutcomeError(
severity="error",
code="invalid",
details=SpineErrorConcept.from_code("MESSAGE_NOT_WELL_FORMED"),
diagnostics=f"Duplicate keys found in FHIR document: {duplicates}",
expression=paths,
)
logger.log(LogReference.HANDLER019, error=str(error))
raise error


def parse_path(
model: Type[BaseModel] | None,
path_params: Dict[str, str] | None,
Expand Down
Loading