From b9f8b59818ffb3f150d616c9d9727be8d6452728 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Wed, 8 Jan 2025 07:47:07 +0000 Subject: [PATCH 01/26] NRL-1215 Add json duplicate checker and tests, use it for request --- layer/nrlf/core/json_duplicate_checker.py | 104 +++++++ layer/nrlf/core/request.py | 17 ++ .../core/tests/test_json_duplicate_checker.py | 287 ++++++++++++++++++ layer/nrlf/core/tests/test_request.py | 35 +++ 4 files changed, 443 insertions(+) create mode 100644 layer/nrlf/core/json_duplicate_checker.py create mode 100644 layer/nrlf/core/tests/test_json_duplicate_checker.py diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py new file mode 100644 index 000000000..698db1bb9 --- /dev/null +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -0,0 +1,104 @@ +import json +from typing import List, Tuple, Set, Any, Dict, Union +from nrlf.core.errors import OperationOutcomeError +from nrlf.core.response import SpineErrorConcept + +JsonValue = List[Any] | Tuple[Any, ...] | Any +JsonPair = Tuple[str, JsonValue] + +class DuplicateKeyChecker: + """JSON structure duplicate key detector. + + Tracks duplicate keys by maintaining path context during traversal. + Paths are recorded in dot notation with array indices: + - Objects: parent.child + - Arrays: parent.array[0] + - Nested: parent.array[0].child[1].key + """ + + def __init__(self): + self.duplicate_keys: Set[str] = set() + self.duplicate_paths: Set[str] = set() + # Track keys at each path level to detect duplicates + self.key_registry: Dict[str, Dict[str, bool]] = {} + + def check_key(self, key: str, path: List[str]) -> None: + """Check if a key at the current path is a duplicate. + + A duplicate occurs when the same key appears twice at the same + nesting level, even if the values differ. + """ + current_level = '.'.join(path) + + if current_level not in self.key_registry: + self.key_registry[current_level] = {} + + if key in self.key_registry[current_level]: + self.duplicate_keys.add(key) + full_path = '.'.join(path + [key]) + self.duplicate_paths.add(full_path) + print(f"Found duplicate key: {key} at path: {full_path}") + else: + self.key_registry[current_level][key] = True + + def traverse_json(self, data: List[JsonPair], path: List[str]) -> None: + """Traverse JSON structure and check for duplicate keys. + + Handles both objects and arrays, maintaining proper path context + during traversal. + """ + for key, value in data: + print(f"Processing key: {key}, value: {value}") + self.check_key(key, path) + + if isinstance(value, (list, tuple)): + if value and isinstance(value[0], tuple): + # Handle nested object + self.traverse_json(value, path + [key]) + else: + # Handle array + self.traverse_array(value, path + [key]) + + def traverse_array(self, items: List[Any], path: List[str]) -> None: + """Process array items while tracking their indices in the path.""" + array_path = path[-1] + base_path = path[:-1] + + for idx, item in enumerate(items): + if not isinstance(item, (tuple, list)): + continue + + current_path = base_path + [f"{array_path}[{idx}]"] + if item and isinstance(item[0], tuple): + # Handle object in array + pairs = [item] if isinstance(item, tuple) else item + self.traverse_json(pairs, current_path) + else: + # Handle nested array + self.traverse_array(item, current_path) + +def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: + """Find all duplicate keys in a JSON string. + + Traverses the entire JSON structure and reports: + - List of keys that appear multiple times at the same level + - Full paths to each duplicate key occurrence + + A key is considered duplicate if it appears multiple times within + the same object, regardless of nesting level or array position. + """ + try: + parsed_data = json.loads(json_content, object_pairs_hook=lambda pairs: pairs) + print("Parsed JSON:", parsed_data) + except json.JSONDecodeError: + raise ValueError("Error: Invalid JSON format") + + checker = DuplicateKeyChecker() + checker.traverse_json(parsed_data, ['root']) + + duplicates = list(checker.duplicate_keys) + paths = list(checker.duplicate_paths) + print("Final duplicates:", duplicates) + print("Final paths:", paths) + + return duplicates, paths diff --git a/layer/nrlf/core/request.py b/layer/nrlf/core/request.py index 7878711be..9e2576a38 100644 --- a/layer/nrlf/core/request.py +++ b/layer/nrlf/core/request.py @@ -8,6 +8,7 @@ from nrlf.core.errors import OperationOutcomeError, ParseError from nrlf.core.logger import LogReference, logger from nrlf.core.model import ClientRpDetails, ConnectionMetadata +from nrlf.core.json_duplicate_checker import check_duplicate_keys def parse_headers(headers: Dict[str, str]) -> ConnectionMetadata: @@ -88,6 +89,7 @@ def parse_body( try: result = model.model_validate_json(body) + raise_when_duplicate_keys(body) logger.log(LogReference.HANDLER009, parsed_body=result.model_dump()) return result @@ -98,6 +100,20 @@ def parse_body( msg="Request body could not be parsed", ) from None +def raise_when_duplicate_keys(json_content: str) -> None: + """ + Raises an error if duplicate keys are found in the JSON content. + """ + duplicates, paths = check_duplicate_keys(json_content) + if duplicates: + raise OperationOutcomeError( + severity="error", + code="required", + details=SpineErrorConcept.from_code("MESSAGE_NOT_WELL_FORMED"), + diagnostics=f"Duplicate keys found in FHIR document: {duplicates}", + expression=paths, + ) + def parse_path( model: Type[BaseModel] | None, @@ -123,3 +139,4 @@ def parse_path( details=SpineErrorConcept.from_code("INVALID_PARAMETER"), msg="Invalid path parameter", ) from None + diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py new file mode 100644 index 000000000..6e6125f52 --- /dev/null +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -0,0 +1,287 @@ +import unittest +from json_duplicate_checker import check_duplicate_keys + +class TestJsonDuplicateChecker(unittest.TestCase): + def test_no_duplicates(self): + json_content = '{"a": 1, "b": 2, "c": {"d": 3, "e": 4}}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, []) + self.assertEqual(paths, []) + + def test_simple_duplicates(self): + json_content = '{"a": 1, "b": 2, "a": 3}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, ['a']) + self.assertEqual(paths, ['root.a']) + + def test_nested_duplicates(self): + # This JSON has no duplicates because the 'b' keys are at different levels + json_content = '{"a": {"b": 1}, "c": {"b": 2}, "d": {"e": {"b": 3}}}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, []) + self.assertEqual(paths, []) + + def test_same_level_duplicates(self): + # This JSON has duplicates because there are two 'b' keys at the same level + json_content = '{"a": {"b": 1, "b": 2}, "c": {"d": 3}}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, ['b']) + self.assertEqual(paths, ['root.a.b']) + + def test_multiple_level_duplicates(self): + # This JSON has duplicates at multiple levels + json_content = '{"a": 1, "b": {"c": 2, "c": 3}, "a": 4}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(sorted(duplicates), sorted(['a', 'c'])) + self.assertEqual(sorted(paths), sorted(['root.a', 'root.b.c'])) + + def test_invalid_json(self): + json_content = '{invalid json}' + with self.assertRaises(ValueError): + check_duplicate_keys(json_content) + + def test_complex_nested_duplicates(self): + json_content = '{"a": {"b": 1, "c": {"d": 2, "c": 3}}, "a": {"e": 4}}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(sorted(duplicates), sorted(['a'])) + self.assertEqual(sorted(paths), sorted(['root.a'])) + + def test_multiple_duplicates_same_path(self): + json_content = ''' + { + "a": 1, + "b": { + "c": 2, + "c": 3, + "d": { + "e": 4, + "e": 5, + "f": { + "g": 6, + "g": 7 + } + } + }, + "b": { + "h": 8 + } + } + ''' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(sorted(duplicates), sorted(['b', 'c', 'e', 'g'])) + self.assertEqual(sorted(paths), sorted(['root.b', 'root.b.c', 'root.b.d.e', 'root.b.d.f.g'])) + + def test_no_duplicates_deeply_nested(self): + json_content = ''' + { + "a": { + "b": { + "c": 1 + }, + "d": { + "e": 2 + } + }, + "f": { + "g": { + "h": 3 + } + } + } + ''' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, []) + self.assertEqual(paths, []) + + def test_duplicates_with_arrays(self): + json_content = ''' + { + "a": [ + {"b": 1, "b": 2}, + {"c": 3, "c": 4} + ], + "d": 5 + } + ''' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(sorted(duplicates), sorted(['b', 'c'])) + self.assertEqual(sorted(paths), sorted(['root.a[0].b', 'root.a[1].c'])) + + def test_large_json_with_mixed_duplicates(self): + json_content = ''' + { + "a": 1, + "b": { + "c": 2, + "d": 3, + "c": 4, + "e": { + "f": 5, + "f": 6, + "g": { + "h": 7, + "h": 8 + } + } + }, + "i": { + "j": 10, + "j": 11 + } + } + ''' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(sorted(duplicates), sorted(['c', 'f', 'h', 'j'])) + self.assertEqual(sorted(paths), sorted(['root.b.c', 'root.b.e.f', 'root.b.e.g.h', 'root.i.j'])) + + def test_complex_nested_arrays_with_duplicates(self): + json_content = ''' + { + "level1": { + "arrays": [ + { + "a": 1, + "a": 2, + "nested": { + "b": [ + {"c": 3, "c": 4}, + {"d": 5} + ], + "b": "duplicate" + } + }, + { + "mixed": [ + {"e": 6}, + {"e": 7, "f": [ + {"g": 8, "g": 9}, + {"h": {"i": 10, "i": 11}} + ]} + ], + "mixed": "duplicate" + } + ], + "arrays": "duplicate_at_parent" + } + } + ''' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual( + sorted(duplicates), + sorted(['a', 'b', 'c', 'g', 'i', 'mixed', 'arrays']) + ) + self.assertEqual( + sorted(paths), + sorted([ + 'root.level1.arrays', + 'root.level1.arrays[0].a', + 'root.level1.arrays[0].nested.b', + 'root.level1.arrays[0].nested.b[0].c', + 'root.level1.arrays[1].mixed', + 'root.level1.arrays[1].mixed[1].f[0].g', + 'root.level1.arrays[1].mixed[1].f[1].h.i' + ]) + ) + + def test_deep_nested_array_object_duplicates(self): + json_content = ''' + { + "root": { + "level1": [ + { + "level2": [ + [ + { + "data": 1, + "data": 2, + "unique": 3 + } + ], + [ + { + "other": 4, + "other": 5 + } + ] + ] + } + ] + } + } + ''' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual( + sorted(duplicates), + sorted(['data', 'other']) + ) + self.assertEqual( + sorted(paths), + sorted([ + 'root.root.level1[0].level2[0][0].data', + 'root.root.level1[0].level2[1][0].other' + ]) + ) + + def generate_nested_json(self, depth, current_level=0): + """Helper function to generate nested JSON with duplicates at each level.""" + if depth == 0: + return '{"key": 1, "key": 2}' + + next_json = self.generate_nested_json(depth - 1, current_level + 1) + return f'''{{ + "level{current_level}": [ + {{ + "array_obj": {next_json}, + "array_obj": "duplicate_at_depth_{current_level}" + }} + ] + }}''' + + def get_expected_duplicates(self, max_depth): + """Helper function to get expected duplicate keys.""" + duplicates = ['array_obj'] # array_obj appears at each level + duplicates.extend(['key']) # key appears at the innermost level + return sorted(list(set(duplicates))) + + def get_expected_paths(self, max_depth): + """Helper function to get expected duplicate paths.""" + paths = [] + current_path = 'root' + + # Start from level0 and increment + for i in range(max_depth): + current_path += f'.level{i}[0]' + paths.append(f'{current_path}.array_obj') + if i < max_depth - 1: # If not at the last level + current_path += '.array_obj' # Navigate into the nested object + + # Add the key duplicate at the innermost level + if max_depth > 0: + paths.append(f'{current_path}.array_obj.key') + + return sorted(paths) + + def test_parametrized_nested_arrays(self): + """Test different depths of nested arrays with duplicates at each level.""" + for depth in range(1, 11): # Test depths 1 through 10 + with self.subTest(depth=depth): + json_content = self.generate_nested_json(depth) + print(f"\n=== Testing depth {depth} ===") + print("Generated JSON:") + print(json_content) + + duplicates, paths = check_duplicate_keys(json_content) + + expected_duplicates = self.get_expected_duplicates(depth) + expected_paths = self.get_expected_paths(depth) + + print("\nActual duplicates:", sorted(duplicates)) + print("Expected duplicates:", expected_duplicates) + print("\nActual paths:", sorted(paths)) + print("Expected paths:", expected_paths) + + self.assertEqual(sorted(duplicates), expected_duplicates, + f"Failed for depth {depth} - duplicates mismatch") + self.assertEqual(sorted(paths), expected_paths, + f"Failed for depth {depth} - paths mismatch") + print("=== Test passed for depth", depth, "===\n") diff --git a/layer/nrlf/core/tests/test_request.py b/layer/nrlf/core/tests/test_request.py index 6f4ec8d92..7bc661af9 100644 --- a/layer/nrlf/core/tests/test_request.py +++ b/layer/nrlf/core/tests/test_request.py @@ -150,6 +150,41 @@ def test_parse_body_valid_docref(): assert isinstance(result, DocumentReference) +# another test similar to test_parse_body_valid_docref but with a duplicate key +def test_parse_body_valid_docref_with_duplicate_key(): + model = DocumentReference + docref_body = load_document_reference_data("Y05868-736253002-Valid") + + str_to_duplicate = '"docStatus": "final",' + docref_body = docref_body.replace(str_to_duplicate, str_to_duplicate*2) + + with pytest.raises(OperationOutcomeError) as error: + parse_body(model, docref_body) + + response = error.value.response + + assert response.statusCode == "400" + assert json.loads(response.body) == { + "resourceType": "OperationOutcome", + "issue": [ + { + "severity": "error", + "code": "required", + "details": { + "coding": [ + { + "system": "https://fhir.nhs.uk/ValueSet/Spine-ErrorOrWarningCode-1", + "code": "MESSAGE_NOT_WELL_FORMED", + "display": "Message not well formed", + } + ] + }, + "diagnostics": "Duplicate keys found in FHIR document: ['docStatus']", + "expression": ["root.docStatus"], + } + ], + } + def test_parse_body_no_body(): model = DocumentReference From cd586dc26d5e1132d9793081347fab5f0fd15044 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 9 Jan 2025 15:39:57 +0000 Subject: [PATCH 02/26] NRL-1215 Fix implementation, refine type hints --- layer/nrlf/core/json_duplicate_checker.py | 81 ++++++++++++----------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 698db1bb9..dd2d3a2fa 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -1,10 +1,11 @@ import json -from typing import List, Tuple, Set, Any, Dict, Union -from nrlf.core.errors import OperationOutcomeError -from nrlf.core.response import SpineErrorConcept +from typing import List, Tuple, Set, Dict -JsonValue = List[Any] | Tuple[Any, ...] | Any -JsonPair = Tuple[str, JsonValue] +JsonPrimitive = str | int | float | bool | None +type JsonValue = JsonPrimitive | JsonObject | JsonArray +JsonPair = tuple[str, JsonValue] +JsonObject = list[JsonPair] +JsonArray = list[JsonValue] class DuplicateKeyChecker: """JSON structure duplicate key detector. @@ -21,6 +22,20 @@ def __init__(self): self.duplicate_paths: Set[str] = set() # Track keys at each path level to detect duplicates self.key_registry: Dict[str, Dict[str, bool]] = {} + self.current_duplicate_index: Dict[str, int] = {} + + def get_path_with_index(self, path: List[str], key: str) -> List[str]: + current_level = '.'.join(path) + index_map = self.current_duplicate_index.setdefault(current_level, {}) + count = index_map.get(key, 0) + index_map[key] = count + 1 + + # If it's the first occurrence, keep the key as is. + # Subsequent occurrences get bracket-indexed. + if count == 0: + return path + [key] + else: + return path + [f"{key}[{count - 1}]"] def check_key(self, key: str, path: List[str]) -> None: """Check if a key at the current path is a duplicate. @@ -29,53 +44,39 @@ def check_key(self, key: str, path: List[str]) -> None: nesting level, even if the values differ. """ current_level = '.'.join(path) - - if current_level not in self.key_registry: - self.key_registry[current_level] = {} - - if key in self.key_registry[current_level]: + current_keys = self.key_registry.setdefault(current_level, {}) + if key in current_keys: self.duplicate_keys.add(key) - full_path = '.'.join(path + [key]) - self.duplicate_paths.add(full_path) - print(f"Found duplicate key: {key} at path: {full_path}") + self.duplicate_paths.add('.'.join(path + [key])) + print(f"Found duplicate key: {key} at path: {'.'.join(path + [key])}") else: - self.key_registry[current_level][key] = True + current_keys[key] = True - def traverse_json(self, data: List[JsonPair], path: List[str]) -> None: - """Traverse JSON structure and check for duplicate keys. - - Handles both objects and arrays, maintaining proper path context - during traversal. - """ + def process_collection(self, value: JsonObject | JsonArray, path: list[str], key: str) -> None: + """Determine if the given 'value' is an object or an array and handle it.""" + new_path = self.get_path_with_index(path, key) + if value and isinstance(value[0], tuple): + self.traverse_json(value, new_path) + else: + self.traverse_array(value, new_path) + + def traverse_json(self, data: JsonObject, path: list[str]) -> None: + """Traverse JSON object and check for duplicate keys.""" for key, value in data: print(f"Processing key: {key}, value: {value}") self.check_key(key, path) - if isinstance(value, (list, tuple)): - if value and isinstance(value[0], tuple): - # Handle nested object - self.traverse_json(value, path + [key]) - else: - # Handle array - self.traverse_array(value, path + [key]) + self.process_collection(value, path, key) - def traverse_array(self, items: List[Any], path: List[str]) -> None: - """Process array items while tracking their indices in the path.""" + def traverse_array(self, items: JsonArray, path: list[str]) -> None: + """Process JSON array items while updating the path for duplicates.""" array_path = path[-1] base_path = path[:-1] - + for idx, item in enumerate(items): - if not isinstance(item, (tuple, list)): + if not isinstance(item, (list, tuple)): continue - - current_path = base_path + [f"{array_path}[{idx}]"] - if item and isinstance(item[0], tuple): - # Handle object in array - pairs = [item] if isinstance(item, tuple) else item - self.traverse_json(pairs, current_path) - else: - # Handle nested array - self.traverse_array(item, current_path) + self.process_collection(item, base_path, f"{array_path}[{idx}]") def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: """Find all duplicate keys in a JSON string. From 2d38b6a15bc2b8809255091258724124642fb727 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 9 Jan 2025 15:40:20 +0000 Subject: [PATCH 03/26] NRL-1215 Add extra edge case --- layer/nrlf/core/tests/test_json_duplicate_checker.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 6e6125f52..704172e0a 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -28,6 +28,14 @@ def test_same_level_duplicates(self): self.assertEqual(duplicates, ['b']) self.assertEqual(paths, ['root.a.b']) + def test_same_level_duplicates_objects(self): + # This JSON has duplicates because there are two 'b' keys at the same level + # The difference with above is that the 'b' keys are objects and every element in the object is the same + json_content = '{"a": {"b": { "f": 4, "g": 5 }, "b": { "f": 4, "g": 5 } }, "c": {"d": 3}}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, ['b']) + self.assertEqual(paths, ['root.a.b']) + def test_multiple_level_duplicates(self): # This JSON has duplicates at multiple levels json_content = '{"a": 1, "b": {"c": 2, "c": 3}, "a": 4}' From 4e77c0a3e00b5d6fc98f0630d3fcbe767627f5b8 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 9 Jan 2025 15:41:11 +0000 Subject: [PATCH 04/26] NRL-1215 Add tests for duplicates of each field in DocumentReference --- .../tests/test_json_duplicate_checker_nrlf.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py b/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py new file mode 100644 index 000000000..7f0d994f6 --- /dev/null +++ b/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py @@ -0,0 +1,55 @@ +import json +import pytest +from json_duplicate_checker import check_duplicate_keys +from layer.nrlf.tests.data import load_document_reference_data + + +def get_all_fields_from_json(json_str): + def extract_fields(data, parent_key=''): + fields = [] + if isinstance(data, dict): + for k, v in data.items(): + full_key = f"{parent_key}.{k}" if parent_key else k + fields.append(full_key) + fields.extend(extract_fields(v, full_key)) + elif isinstance(data, list): + for i, item in enumerate(data): + full_key = f"{parent_key}[{i}]" + fields.extend(extract_fields(item, full_key)) + return fields + + data = json.loads(json_str) + return extract_fields(data) + +def duplicate_field_in_json(json_str, field_path): + data = json.loads(json_str) + path = field_path.replace(']', '').replace('[', '.').split('.') + current = data + for key in path[:-1]: + current = current[int(key) if key.isdigit() else key] + field = path[-1] + + if field in current: + duplicate_field = f"{field}_duplicate" + current[duplicate_field] = current[field] + modified_json_str = json.dumps(data) + # Replace the duplicate field name with the original field name to simulate duplication + duplicated_json_str = modified_json_str.replace(f'"{duplicate_field}":', f'"{field}":', 1) + return duplicated_json_str + return json_str + +def load_document_reference_data_with_all_fields(): + docref_body = load_document_reference_data("Y05868-736253002-Valid") + return get_all_fields_from_json(docref_body) + +@pytest.mark.parametrize("field", load_document_reference_data_with_all_fields()) +def test_parse_body_valid_docref_with_duplicate_keys(field): + docref_body = load_document_reference_data("Y05868-736253002-Valid") + + docref_body = duplicate_field_in_json(docref_body, field) + + result = check_duplicate_keys(docref_body) + + node = field.split(".")[-1] + assert result[0] == [node] + assert result[1] == [f"root.{field}"] \ No newline at end of file From d079bced8c927dd0bce32d5e665859bdb8440275 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 9 Jan 2025 16:55:11 +0000 Subject: [PATCH 05/26] NRL-1215 Fix linting issues --- layer/nrlf/core/json_duplicate_checker.py | 28 +-- layer/nrlf/core/request.py | 4 +- .../core/tests/test_json_duplicate_checker.py | 167 ++++++++++-------- .../tests/test_json_duplicate_checker_nrlf.py | 17 +- layer/nrlf/core/tests/test_request.py | 3 +- 5 files changed, 123 insertions(+), 96 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index dd2d3a2fa..978eb48f5 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -1,5 +1,5 @@ import json -from typing import List, Tuple, Set, Dict +from typing import Dict, List, Set, Tuple JsonPrimitive = str | int | float | bool | None type JsonValue = JsonPrimitive | JsonObject | JsonArray @@ -7,9 +7,10 @@ JsonObject = list[JsonPair] JsonArray = list[JsonValue] + class DuplicateKeyChecker: """JSON structure duplicate key detector. - + Tracks duplicate keys by maintaining path context during traversal. Paths are recorded in dot notation with array indices: - Objects: parent.child @@ -25,7 +26,7 @@ def __init__(self): self.current_duplicate_index: Dict[str, int] = {} def get_path_with_index(self, path: List[str], key: str) -> List[str]: - current_level = '.'.join(path) + current_level = ".".join(path) index_map = self.current_duplicate_index.setdefault(current_level, {}) count = index_map.get(key, 0) index_map[key] = count + 1 @@ -39,20 +40,22 @@ def get_path_with_index(self, path: List[str], key: str) -> List[str]: def check_key(self, key: str, path: List[str]) -> None: """Check if a key at the current path is a duplicate. - + A duplicate occurs when the same key appears twice at the same nesting level, even if the values differ. """ - current_level = '.'.join(path) + current_level = ".".join(path) current_keys = self.key_registry.setdefault(current_level, {}) if key in current_keys: self.duplicate_keys.add(key) - self.duplicate_paths.add('.'.join(path + [key])) + self.duplicate_paths.add(".".join(path + [key])) print(f"Found duplicate key: {key} at path: {'.'.join(path + [key])}") else: current_keys[key] = True - def process_collection(self, value: JsonObject | JsonArray, path: list[str], key: str) -> None: + def process_collection( + self, value: JsonObject | JsonArray, path: list[str], key: str + ) -> None: """Determine if the given 'value' is an object or an array and handle it.""" new_path = self.get_path_with_index(path, key) if value and isinstance(value[0], tuple): @@ -78,13 +81,14 @@ def traverse_array(self, items: JsonArray, path: list[str]) -> None: continue self.process_collection(item, base_path, f"{array_path}[{idx}]") + def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: """Find all duplicate keys in a JSON string. - + Traverses the entire JSON structure and reports: - List of keys that appear multiple times at the same level - Full paths to each duplicate key occurrence - + A key is considered duplicate if it appears multiple times within the same object, regardless of nesting level or array position. """ @@ -93,10 +97,10 @@ def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: print("Parsed JSON:", parsed_data) except json.JSONDecodeError: raise ValueError("Error: Invalid JSON format") - + checker = DuplicateKeyChecker() - checker.traverse_json(parsed_data, ['root']) - + checker.traverse_json(parsed_data, ["root"]) + duplicates = list(checker.duplicate_keys) paths = list(checker.duplicate_paths) print("Final duplicates:", duplicates) diff --git a/layer/nrlf/core/request.py b/layer/nrlf/core/request.py index 9e2576a38..578408a60 100644 --- a/layer/nrlf/core/request.py +++ b/layer/nrlf/core/request.py @@ -6,9 +6,9 @@ from nrlf.core.codes import SpineErrorConcept from nrlf.core.constants import CLIENT_RP_DETAILS, CONNECTION_METADATA from nrlf.core.errors import OperationOutcomeError, ParseError +from nrlf.core.json_duplicate_checker import check_duplicate_keys from nrlf.core.logger import LogReference, logger from nrlf.core.model import ClientRpDetails, ConnectionMetadata -from nrlf.core.json_duplicate_checker import check_duplicate_keys def parse_headers(headers: Dict[str, str]) -> ConnectionMetadata: @@ -100,6 +100,7 @@ def parse_body( msg="Request body could not be parsed", ) from None + def raise_when_duplicate_keys(json_content: str) -> None: """ Raises an error if duplicate keys are found in the JSON content. @@ -139,4 +140,3 @@ def parse_path( details=SpineErrorConcept.from_code("INVALID_PARAMETER"), msg="Invalid path parameter", ) from None - diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 704172e0a..493ff94c2 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -1,6 +1,8 @@ import unittest + from json_duplicate_checker import check_duplicate_keys + class TestJsonDuplicateChecker(unittest.TestCase): def test_no_duplicates(self): json_content = '{"a": 1, "b": 2, "c": {"d": 3, "e": 4}}' @@ -11,8 +13,8 @@ def test_no_duplicates(self): def test_simple_duplicates(self): json_content = '{"a": 1, "b": 2, "a": 3}' duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(duplicates, ['a']) - self.assertEqual(paths, ['root.a']) + self.assertEqual(duplicates, ["a"]) + self.assertEqual(paths, ["root.a"]) def test_nested_duplicates(self): # This JSON has no duplicates because the 'b' keys are at different levels @@ -25,37 +27,39 @@ def test_same_level_duplicates(self): # This JSON has duplicates because there are two 'b' keys at the same level json_content = '{"a": {"b": 1, "b": 2}, "c": {"d": 3}}' duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(duplicates, ['b']) - self.assertEqual(paths, ['root.a.b']) + self.assertEqual(duplicates, ["b"]) + self.assertEqual(paths, ["root.a.b"]) def test_same_level_duplicates_objects(self): # This JSON has duplicates because there are two 'b' keys at the same level # The difference with above is that the 'b' keys are objects and every element in the object is the same - json_content = '{"a": {"b": { "f": 4, "g": 5 }, "b": { "f": 4, "g": 5 } }, "c": {"d": 3}}' + json_content = ( + '{"a": {"b": { "f": 4, "g": 5 }, "b": { "f": 4, "g": 5 } }, "c": {"d": 3}}' + ) duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(duplicates, ['b']) - self.assertEqual(paths, ['root.a.b']) + self.assertEqual(duplicates, ["b"]) + self.assertEqual(paths, ["root.a.b"]) def test_multiple_level_duplicates(self): # This JSON has duplicates at multiple levels json_content = '{"a": 1, "b": {"c": 2, "c": 3}, "a": 4}' duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(sorted(duplicates), sorted(['a', 'c'])) - self.assertEqual(sorted(paths), sorted(['root.a', 'root.b.c'])) + self.assertEqual(sorted(duplicates), sorted(["a", "c"])) + self.assertEqual(sorted(paths), sorted(["root.a", "root.b.c"])) def test_invalid_json(self): - json_content = '{invalid json}' + json_content = "{invalid json}" with self.assertRaises(ValueError): check_duplicate_keys(json_content) def test_complex_nested_duplicates(self): json_content = '{"a": {"b": 1, "c": {"d": 2, "c": 3}}, "a": {"e": 4}}' duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(sorted(duplicates), sorted(['a'])) - self.assertEqual(sorted(paths), sorted(['root.a'])) - + self.assertEqual(sorted(duplicates), sorted(["a"])) + self.assertEqual(sorted(paths), sorted(["root.a"])) + def test_multiple_duplicates_same_path(self): - json_content = ''' + json_content = """ { "a": 1, "b": { @@ -74,13 +78,15 @@ def test_multiple_duplicates_same_path(self): "h": 8 } } - ''' + """ duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(sorted(duplicates), sorted(['b', 'c', 'e', 'g'])) - self.assertEqual(sorted(paths), sorted(['root.b', 'root.b.c', 'root.b.d.e', 'root.b.d.f.g'])) - + self.assertEqual(sorted(duplicates), sorted(["b", "c", "e", "g"])) + self.assertEqual( + sorted(paths), sorted(["root.b", "root.b.c", "root.b.d.e", "root.b.d.f.g"]) + ) + def test_no_duplicates_deeply_nested(self): - json_content = ''' + json_content = """ { "a": { "b": { @@ -96,13 +102,13 @@ def test_no_duplicates_deeply_nested(self): } } } - ''' + """ duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, []) self.assertEqual(paths, []) - + def test_duplicates_with_arrays(self): - json_content = ''' + json_content = """ { "a": [ {"b": 1, "b": 2}, @@ -110,13 +116,13 @@ def test_duplicates_with_arrays(self): ], "d": 5 } - ''' + """ duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(sorted(duplicates), sorted(['b', 'c'])) - self.assertEqual(sorted(paths), sorted(['root.a[0].b', 'root.a[1].c'])) - + self.assertEqual(sorted(duplicates), sorted(["b", "c"])) + self.assertEqual(sorted(paths), sorted(["root.a[0].b", "root.a[1].c"])) + def test_large_json_with_mixed_duplicates(self): - json_content = ''' + json_content = """ { "a": 1, "b": { @@ -137,13 +143,16 @@ def test_large_json_with_mixed_duplicates(self): "j": 11 } } - ''' + """ duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(sorted(duplicates), sorted(['c', 'f', 'h', 'j'])) - self.assertEqual(sorted(paths), sorted(['root.b.c', 'root.b.e.f', 'root.b.e.g.h', 'root.i.j'])) + self.assertEqual(sorted(duplicates), sorted(["c", "f", "h", "j"])) + self.assertEqual( + sorted(paths), + sorted(["root.b.c", "root.b.e.f", "root.b.e.g.h", "root.i.j"]), + ) def test_complex_nested_arrays_with_duplicates(self): - json_content = ''' + json_content = """ { "level1": { "arrays": [ @@ -172,27 +181,28 @@ def test_complex_nested_arrays_with_duplicates(self): "arrays": "duplicate_at_parent" } } - ''' + """ duplicates, paths = check_duplicate_keys(json_content) self.assertEqual( - sorted(duplicates), - sorted(['a', 'b', 'c', 'g', 'i', 'mixed', 'arrays']) + sorted(duplicates), sorted(["a", "b", "c", "g", "i", "mixed", "arrays"]) ) self.assertEqual( - sorted(paths), - sorted([ - 'root.level1.arrays', - 'root.level1.arrays[0].a', - 'root.level1.arrays[0].nested.b', - 'root.level1.arrays[0].nested.b[0].c', - 'root.level1.arrays[1].mixed', - 'root.level1.arrays[1].mixed[1].f[0].g', - 'root.level1.arrays[1].mixed[1].f[1].h.i' - ]) + sorted(paths), + sorted( + [ + "root.level1.arrays", + "root.level1.arrays[0].a", + "root.level1.arrays[0].nested.b", + "root.level1.arrays[0].nested.b[0].c", + "root.level1.arrays[1].mixed", + "root.level1.arrays[1].mixed[1].f[0].g", + "root.level1.arrays[1].mixed[1].f[1].h.i", + ] + ), ) def test_deep_nested_array_object_duplicates(self): - json_content = ''' + json_content = """ { "root": { "level1": [ @@ -216,57 +226,56 @@ def test_deep_nested_array_object_duplicates(self): ] } } - ''' + """ duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(sorted(duplicates), sorted(["data", "other"])) self.assertEqual( - sorted(duplicates), - sorted(['data', 'other']) - ) - self.assertEqual( - sorted(paths), - sorted([ - 'root.root.level1[0].level2[0][0].data', - 'root.root.level1[0].level2[1][0].other' - ]) + sorted(paths), + sorted( + [ + "root.root.level1[0].level2[0][0].data", + "root.root.level1[0].level2[1][0].other", + ] + ), ) def generate_nested_json(self, depth, current_level=0): """Helper function to generate nested JSON with duplicates at each level.""" if depth == 0: return '{"key": 1, "key": 2}' - + next_json = self.generate_nested_json(depth - 1, current_level + 1) - return f'''{{ + return f"""{{ "level{current_level}": [ {{ "array_obj": {next_json}, "array_obj": "duplicate_at_depth_{current_level}" }} ] - }}''' + }}""" def get_expected_duplicates(self, max_depth): """Helper function to get expected duplicate keys.""" - duplicates = ['array_obj'] # array_obj appears at each level - duplicates.extend(['key']) # key appears at the innermost level + duplicates = ["array_obj"] # array_obj appears at each level + duplicates.extend(["key"]) # key appears at the innermost level return sorted(list(set(duplicates))) def get_expected_paths(self, max_depth): """Helper function to get expected duplicate paths.""" paths = [] - current_path = 'root' - + current_path = "root" + # Start from level0 and increment for i in range(max_depth): - current_path += f'.level{i}[0]' - paths.append(f'{current_path}.array_obj') + current_path += f".level{i}[0]" + paths.append(f"{current_path}.array_obj") if i < max_depth - 1: # If not at the last level - current_path += '.array_obj' # Navigate into the nested object - + current_path += ".array_obj" # Navigate into the nested object + # Add the key duplicate at the innermost level if max_depth > 0: - paths.append(f'{current_path}.array_obj.key') - + paths.append(f"{current_path}.array_obj.key") + return sorted(paths) def test_parametrized_nested_arrays(self): @@ -277,19 +286,25 @@ def test_parametrized_nested_arrays(self): print(f"\n=== Testing depth {depth} ===") print("Generated JSON:") print(json_content) - + duplicates, paths = check_duplicate_keys(json_content) - + expected_duplicates = self.get_expected_duplicates(depth) expected_paths = self.get_expected_paths(depth) - + print("\nActual duplicates:", sorted(duplicates)) print("Expected duplicates:", expected_duplicates) print("\nActual paths:", sorted(paths)) print("Expected paths:", expected_paths) - - self.assertEqual(sorted(duplicates), expected_duplicates, - f"Failed for depth {depth} - duplicates mismatch") - self.assertEqual(sorted(paths), expected_paths, - f"Failed for depth {depth} - paths mismatch") + + self.assertEqual( + sorted(duplicates), + expected_duplicates, + f"Failed for depth {depth} - duplicates mismatch", + ) + self.assertEqual( + sorted(paths), + expected_paths, + f"Failed for depth {depth} - paths mismatch", + ) print("=== Test passed for depth", depth, "===\n") diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py b/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py index 7f0d994f6..f3da51748 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py @@ -1,11 +1,13 @@ import json + import pytest from json_duplicate_checker import check_duplicate_keys + from layer.nrlf.tests.data import load_document_reference_data def get_all_fields_from_json(json_str): - def extract_fields(data, parent_key=''): + def extract_fields(data, parent_key=""): fields = [] if isinstance(data, dict): for k, v in data.items(): @@ -21,9 +23,10 @@ def extract_fields(data, parent_key=''): data = json.loads(json_str) return extract_fields(data) + def duplicate_field_in_json(json_str, field_path): data = json.loads(json_str) - path = field_path.replace(']', '').replace('[', '.').split('.') + path = field_path.replace("]", "").replace("[", ".").split(".") current = data for key in path[:-1]: current = current[int(key) if key.isdigit() else key] @@ -34,16 +37,20 @@ def duplicate_field_in_json(json_str, field_path): current[duplicate_field] = current[field] modified_json_str = json.dumps(data) # Replace the duplicate field name with the original field name to simulate duplication - duplicated_json_str = modified_json_str.replace(f'"{duplicate_field}":', f'"{field}":', 1) + duplicated_json_str = modified_json_str.replace( + f'"{duplicate_field}":', f'"{field}":', 1 + ) return duplicated_json_str return json_str + def load_document_reference_data_with_all_fields(): docref_body = load_document_reference_data("Y05868-736253002-Valid") return get_all_fields_from_json(docref_body) + @pytest.mark.parametrize("field", load_document_reference_data_with_all_fields()) -def test_parse_body_valid_docref_with_duplicate_keys(field): +def test_parse_body_valid_docref_with_duplicate_keys(field): docref_body = load_document_reference_data("Y05868-736253002-Valid") docref_body = duplicate_field_in_json(docref_body, field) @@ -52,4 +59,4 @@ def test_parse_body_valid_docref_with_duplicate_keys(field): node = field.split(".")[-1] assert result[0] == [node] - assert result[1] == [f"root.{field}"] \ No newline at end of file + assert result[1] == [f"root.{field}"] diff --git a/layer/nrlf/core/tests/test_request.py b/layer/nrlf/core/tests/test_request.py index 7bc661af9..17d8f8bd7 100644 --- a/layer/nrlf/core/tests/test_request.py +++ b/layer/nrlf/core/tests/test_request.py @@ -150,13 +150,14 @@ def test_parse_body_valid_docref(): assert isinstance(result, DocumentReference) + # another test similar to test_parse_body_valid_docref but with a duplicate key def test_parse_body_valid_docref_with_duplicate_key(): model = DocumentReference docref_body = load_document_reference_data("Y05868-736253002-Valid") str_to_duplicate = '"docStatus": "final",' - docref_body = docref_body.replace(str_to_duplicate, str_to_duplicate*2) + docref_body = docref_body.replace(str_to_duplicate, str_to_duplicate * 2) with pytest.raises(OperationOutcomeError) as error: parse_body(model, docref_body) From bfbbf8ad4d76907110959adca8a576034a4c57d7 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 9 Jan 2025 17:10:32 +0000 Subject: [PATCH 06/26] NRL-1215 Fix error code for duplicates --- layer/nrlf/core/request.py | 2 +- layer/nrlf/core/tests/test_request.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/layer/nrlf/core/request.py b/layer/nrlf/core/request.py index 578408a60..2ff21ad89 100644 --- a/layer/nrlf/core/request.py +++ b/layer/nrlf/core/request.py @@ -109,7 +109,7 @@ def raise_when_duplicate_keys(json_content: str) -> None: if duplicates: raise OperationOutcomeError( severity="error", - code="required", + code="invalid", details=SpineErrorConcept.from_code("MESSAGE_NOT_WELL_FORMED"), diagnostics=f"Duplicate keys found in FHIR document: {duplicates}", expression=paths, diff --git a/layer/nrlf/core/tests/test_request.py b/layer/nrlf/core/tests/test_request.py index 17d8f8bd7..d05337676 100644 --- a/layer/nrlf/core/tests/test_request.py +++ b/layer/nrlf/core/tests/test_request.py @@ -170,7 +170,7 @@ def test_parse_body_valid_docref_with_duplicate_key(): "issue": [ { "severity": "error", - "code": "required", + "code": "invalid", "details": { "coding": [ { From 4fab5abf588ccaadb8b8dbbe12549d6c8fae0af2 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Mon, 13 Jan 2025 09:56:17 +0000 Subject: [PATCH 07/26] NRL-1215 Fix possible intermittent related to dict order --- layer/nrlf/core/json_duplicate_checker.py | 16 +++++++++------- .../core/tests/test_json_duplicate_checker.py | 8 ++++---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 978eb48f5..2250a6edc 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -1,5 +1,6 @@ import json -from typing import Dict, List, Set, Tuple +from collections import OrderedDict +from typing import Dict, List, Tuple JsonPrimitive = str | int | float | bool | None type JsonValue = JsonPrimitive | JsonObject | JsonArray @@ -19,8 +20,8 @@ class DuplicateKeyChecker: """ def __init__(self): - self.duplicate_keys: Set[str] = set() - self.duplicate_paths: Set[str] = set() + # Here a list of paths because the same key name could be at different levels + self.duplicate_keys_and_paths: OrderedDict[str, list[str]] = OrderedDict() # Track keys at each path level to detect duplicates self.key_registry: Dict[str, Dict[str, bool]] = {} self.current_duplicate_index: Dict[str, int] = {} @@ -47,8 +48,8 @@ def check_key(self, key: str, path: List[str]) -> None: current_level = ".".join(path) current_keys = self.key_registry.setdefault(current_level, {}) if key in current_keys: - self.duplicate_keys.add(key) - self.duplicate_paths.add(".".join(path + [key])) + duplicate_path = ".".join(path + [key]) + self.duplicate_keys_and_paths.setdefault(key, []).append(duplicate_path) print(f"Found duplicate key: {key} at path: {'.'.join(path + [key])}") else: current_keys[key] = True @@ -101,8 +102,9 @@ def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: checker = DuplicateKeyChecker() checker.traverse_json(parsed_data, ["root"]) - duplicates = list(checker.duplicate_keys) - paths = list(checker.duplicate_paths) + duplicates = list(checker.duplicate_keys_and_paths.keys()) + # flatten the list of paths + paths = sum(checker.duplicate_keys_and_paths.values(), []) print("Final duplicates:", duplicates) print("Final paths:", paths) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 493ff94c2..b45c6b481 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -292,19 +292,19 @@ def test_parametrized_nested_arrays(self): expected_duplicates = self.get_expected_duplicates(depth) expected_paths = self.get_expected_paths(depth) - print("\nActual duplicates:", sorted(duplicates)) + print("\nActual duplicates:", duplicates) print("Expected duplicates:", expected_duplicates) - print("\nActual paths:", sorted(paths)) + print("\nActual paths:", paths) print("Expected paths:", expected_paths) self.assertEqual( sorted(duplicates), - expected_duplicates, + sorted(expected_duplicates), f"Failed for depth {depth} - duplicates mismatch", ) self.assertEqual( sorted(paths), - expected_paths, + sorted(expected_paths), f"Failed for depth {depth} - paths mismatch", ) print("=== Test passed for depth", depth, "===\n") From af54474b56c7e8208326f4fb70119ddb29970b98 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Mon, 13 Jan 2025 17:19:26 +0000 Subject: [PATCH 08/26] NRL-1215 Add logging for duplicate key checks and integration tests for create, upsert, update --- layer/nrlf/core/log_references.py | 1 + layer/nrlf/core/request.py | 1 + ...teDocumentReference-duplicateField.feature | 128 +++++++++++++++ ...teDocumentReference-duplicateField.feature | 150 ++++++++++++++++++ ...rtDocumentReference-duplicateField.feature | 128 +++++++++++++++ tests/features/steps/2_request.py | 20 +++ tests/utilities/api_clients.py | 8 + 7 files changed, 436 insertions(+) create mode 100644 tests/features/producer/createDocumentReference-duplicateField.feature create mode 100644 tests/features/producer/updateDocumentReference-duplicateField.feature create mode 100644 tests/features/producer/upsertDocumentReference-duplicateField.feature diff --git a/layer/nrlf/core/log_references.py b/layer/nrlf/core/log_references.py index 295cea4ac..698dcd691 100644 --- a/layer/nrlf/core/log_references.py +++ b/layer/nrlf/core/log_references.py @@ -35,6 +35,7 @@ class LogReference(Enum): ) HANDLER016 = _Reference("INFO", "Set response headers") HANDLER017 = _Reference("WARN", "Correlation ID not found in request headers") + HANDLER018 = _Reference("INFO", "Checking for duplicate keys in request body") HANDLER999 = _Reference("INFO", "Request handler returned successfully") # Error Logs diff --git a/layer/nrlf/core/request.py b/layer/nrlf/core/request.py index 2ff21ad89..22826e60e 100644 --- a/layer/nrlf/core/request.py +++ b/layer/nrlf/core/request.py @@ -105,6 +105,7 @@ def raise_when_duplicate_keys(json_content: str) -> None: """ Raises an error if duplicate keys are found in the JSON content. """ + logger.log(LogReference.HANDLER018) duplicates, paths = check_duplicate_keys(json_content) if duplicates: raise OperationOutcomeError( diff --git a/tests/features/producer/createDocumentReference-duplicateField.feature b/tests/features/producer/createDocumentReference-duplicateField.feature new file mode 100644 index 000000000..b9a25b114 --- /dev/null +++ b/tests/features/producer/createDocumentReference-duplicateField.feature @@ -0,0 +1,128 @@ +Feature: Producer - createDocumentReference - Duplicate Field Scenarios + + Scenario: Duplicate url field in attachment + Given the application 'DataShare' (ID 'z00z-y11y-x22x') is registered to access the API + And the organisation 'TSTCUS' is authorised to access pointer types: + | system | value | + | http://snomed.info/sct | 736253002 | + When producer 'TSTCUS' requests creation of a DocumentReference with default test values except 'content' is: + """ + "content": [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf", + "url": "https://example.org/duplicate-url.pdf" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] + } + } + ] + } + ] + """ + Then the response status code is 400 + And the response is an OperationOutcome with 1 issue + And the OperationOutcome contains the issue: + """ + { + "severity": "error", + "code": "invalid", + "details": { + "coding": [ + { + "system": "https://fhir.nhs.uk/ValueSet/Spine-ErrorOrWarningCode-1", + "code": "MESSAGE_NOT_WELL_FORMED", + "display": "Message not well formed" + } + ] + }, + "diagnostics": "Duplicate keys found in FHIR document: ['url']", + "expression": [ + "root.content[0].attachment.url" + ] + } + """ + + Scenario: Duplicate format and attachement field in content + Given the application 'DataShare' (ID 'z00z-y11y-x22x') is registered to access the API + And the organisation 'TSTCUS' is authorised to access pointer types: + | system | value | + | http://snomed.info/sct | 736253002 | + When producer 'TSTCUS' requests creation of a DocumentReference with default test values except 'content' is: + """ + "content": [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf" + }, + "attachment": { + "contentType": "text/html", + "url": "https://example.org/contact-details.html" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:record-contact", + "display": "Contact details (HTTP Unsecured)" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] + } + } + ] + } + ] + """ + Then the response status code is 400 + And the response is an OperationOutcome with 1 issue + And the OperationOutcome contains the issue: + """ + { + "severity": "error", + "code": "invalid", + "details": { + "coding": [ + { + "system": "https://fhir.nhs.uk/ValueSet/Spine-ErrorOrWarningCode-1", + "code": "MESSAGE_NOT_WELL_FORMED", + "display": "Message not well formed" + } + ] + }, + "diagnostics": "Duplicate keys found in FHIR document: ['attachment', 'format']", + "expression": [ + "root.content[0].attachment", + "root.content[0].format" + ] + } + """ diff --git a/tests/features/producer/updateDocumentReference-duplicateField.feature b/tests/features/producer/updateDocumentReference-duplicateField.feature new file mode 100644 index 000000000..3209c6857 --- /dev/null +++ b/tests/features/producer/updateDocumentReference-duplicateField.feature @@ -0,0 +1,150 @@ +Feature: Producer - updateDocumentReference - Duplicate Field Scenarios + + Scenario: Duplicate url field in attachment + Given the application 'DataShare' (ID 'z00z-y11y-x22x') is registered to access the API + And the organisation 'TSTCUS' is authorised to access pointer types: + | system | value | + | http://snomed.info/sct | 736253002 | + And a DocumentReference resource exists with values: + | property | value | + | id | TSTCUS-updateDuplicateTest-1234 | + | subject | 9999999999 | + | status | current | + | type | 736253002 | + | category | 734163000 | + | contentType | application/pdf | + | url | https://example.org/my-doc.pdf | + | custodian | TSTCUS | + | author | TSTCUS | + When producer 'TSTCUS' requests update of a DocumentReference with pointerId 'TSTCUS-updateDuplicateTest-1234' but replacing 'content': + """ + [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf", + "url": "https://example.org/duplicate-url.pdf" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] + } + } + ] + } + ] + """ + Then the response status code is 400 + And the response is an OperationOutcome with 1 issue + And the OperationOutcome contains the issue: + """ + { + "severity": "error", + "code": "invalid", + "details": { + "coding": [ + { + "system": "https://fhir.nhs.uk/ValueSet/Spine-ErrorOrWarningCode-1", + "code": "MESSAGE_NOT_WELL_FORMED", + "display": "Message not well formed" + } + ] + }, + "diagnostics": "Duplicate keys found in FHIR document: ['url']", + "expression": [ + "root.content[0].attachment.url" + ] + } + """ + + Scenario: Duplicate format and attachment field in content + Given the application 'DataShare' (ID 'z00z-y11y-x22x') is registered to access the API + And the organisation 'TSTCUS' is authorised to access pointer types: + | system | value | + | http://snomed.info/sct | 736253002 | + And a DocumentReference resource exists with values: + | property | value | + | id | TSTCUS-updateDuplicateTest-1235 | + | subject | 9999999999 | + | status | current | + | type | 736253002 | + | category | 734163000 | + | contentType | application/pdf | + | url | https://example.org/my-doc.pdf | + | custodian | TSTCUS | + | author | TSTCUS | + When producer 'TSTCUS' requests update of a DocumentReference with pointerId 'TSTCUS-updateDuplicateTest-1235' but replacing 'content': + """ + [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf" + }, + "attachment": { + "contentType": "text/html", + "url": "https://example.org/contact-details.html" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:record-contact", + "display": "Contact details (HTTP Unsecured)" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] + } + } + ] + } + ] + """ + Then the response status code is 400 + And the response is an OperationOutcome with 1 issue + And the OperationOutcome contains the issue: + """ + { + "severity": "error", + "code": "invalid", + "details": { + "coding": [ + { + "system": "https://fhir.nhs.uk/ValueSet/Spine-ErrorOrWarningCode-1", + "code": "MESSAGE_NOT_WELL_FORMED", + "display": "Message not well formed" + } + ] + }, + "diagnostics": "Duplicate keys found in FHIR document: ['attachment', 'format']", + "expression": [ + "root.content[0].attachment", + "root.content[0].format" + ] + } + """ diff --git a/tests/features/producer/upsertDocumentReference-duplicateField.feature b/tests/features/producer/upsertDocumentReference-duplicateField.feature new file mode 100644 index 000000000..94230df9d --- /dev/null +++ b/tests/features/producer/upsertDocumentReference-duplicateField.feature @@ -0,0 +1,128 @@ +Feature: Producer - upsertDocumentReference - Duplicate Field Scenarios + + Scenario: Duplicate url field in attachment + Given the application 'DataShare' (ID 'z00z-y11y-x22x') is registered to access the API + And the organisation 'TSTCUS' is authorised to access pointer types: + | system | value | + | http://snomed.info/sct | 736253002 | + When producer 'TSTCUS' requests upsert of a DocumentReference with pointerId 'TSTCUS-testduplicates-upsert-0001-0001' and default test values except 'content' is: + """ + "content": [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf", + "url": "https://example.org/duplicate-url.pdf" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] + } + } + ] + } + ] + """ + Then the response status code is 400 + And the response is an OperationOutcome with 1 issue + And the OperationOutcome contains the issue: + """ + { + "severity": "error", + "code": "invalid", + "details": { + "coding": [ + { + "system": "https://fhir.nhs.uk/ValueSet/Spine-ErrorOrWarningCode-1", + "code": "MESSAGE_NOT_WELL_FORMED", + "display": "Message not well formed" + } + ] + }, + "diagnostics": "Duplicate keys found in FHIR document: ['url']", + "expression": [ + "root.content[0].attachment.url" + ] + } + """ + + Scenario: Duplicate format and attachment field in content + Given the application 'DataShare' (ID 'z00z-y11y-x22x') is registered to access the API + And the organisation 'TSTCUS' is authorised to access pointer types: + | system | value | + | http://snomed.info/sct | 736253002 | + When producer 'TSTCUS' requests upsert of a DocumentReference with pointerId 'TSTCUS-testduplicates-upsert-0001-0002' and default test values except 'content' is: + """ + "content": [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf" + }, + "attachment": { + "contentType": "text/html", + "url": "https://example.org/contact-details.html" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:record-contact", + "display": "Contact details (HTTP Unsecured)" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] + } + } + ] + } + ] + """ + Then the response status code is 400 + And the response is an OperationOutcome with 1 issue + And the OperationOutcome contains the issue: + """ + { + "severity": "error", + "code": "invalid", + "details": { + "coding": [ + { + "system": "https://fhir.nhs.uk/ValueSet/Spine-ErrorOrWarningCode-1", + "code": "MESSAGE_NOT_WELL_FORMED", + "display": "Message not well formed" + } + ] + }, + "diagnostics": "Duplicate keys found in FHIR document: ['attachment', 'format']", + "expression": [ + "root.content[0].attachment", + "root.content[0].format" + ] + } + """ diff --git a/tests/features/steps/2_request.py b/tests/features/steps/2_request.py index 993fae1c7..261f524d3 100644 --- a/tests/features/steps/2_request.py +++ b/tests/features/steps/2_request.py @@ -133,6 +133,26 @@ def create_post_body_step(context: Context, section: str): def upsert_post_body_step(context: Context, section: str, pointer_id: str): _create_or_upsert_body_step(context, "upsert_text", section, pointer_id) +@when( + "producer 'TSTCUS' requests update of a DocumentReference with pointerId '{pointer_id}' but replacing '{section}'" +) +def update_post_body_step(context: Context, section: str, pointer_id: str): + """ This can only update top level fields """ + consumer_client = consumer_client_from_context(context, "TSTCUS") + context.response = consumer_client.read(pointer_id) + + if context.response.status_code != 200: + raise ValueError(f"Failed to read existing pointer: {context.response.text}") + + doc_ref = context.response.json() + doc_ref[section] = "placeholder" + doc_ref_text = json.dumps(doc_ref) + doc_ref_text = doc_ref_text.replace('"placeholder"', context.text) + print(doc_ref_text) + + producer_client = producer_client_from_context(context, "TSTCUS") + context.response = producer_client.update_text(doc_ref_text, pointer_id) + @when( "producer 'TSTCUS' requests update of a DocumentReference with pointerId '{pointer_id}' and only changing" diff --git a/tests/utilities/api_clients.py b/tests/utilities/api_clients.py index 3b1b78e9f..762b1c1b4 100644 --- a/tests/utilities/api_clients.py +++ b/tests/utilities/api_clients.py @@ -229,6 +229,14 @@ def update(self, doc_ref, doc_ref_id: str): cert=self.config.client_cert, ) + def update_text(self, doc_ref, doc_ref_id: str): + return requests.put( + f"{self.api_url}/DocumentReference/{doc_ref_id}", + data=doc_ref, + headers=self.request_headers, + cert=self.config.client_cert, + ) + def delete(self, doc_ref_id: str): return requests.delete( f"{self.api_url}/DocumentReference/{doc_ref_id}", From 03a820c9cabdd245de7ecb176468d61be292bf1a Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Mon, 13 Jan 2025 18:01:50 +0000 Subject: [PATCH 09/26] NRL-1215 Add test for edge case with duplicate keys in JSON arrays --- .../core/tests/test_json_duplicate_checker.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index b45c6b481..870643a48 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -308,3 +308,19 @@ def test_parametrized_nested_arrays(self): f"Failed for depth {depth} - paths mismatch", ) print("=== Test passed for depth", depth, "===\n") + + def test_array_edge_case_duplicate(self): + json_content = """ + { + "array": [ + 1, + "string", + {"key": "value"}, + [1, 2, 3] + ], + "array": "duplicate" + } + """ + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, ["array"]) + self.assertEqual(paths, ["root.array"]) \ No newline at end of file From 4467cf741b8a0ba4f0b196f05a5385d2dd6bb92f Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Tue, 14 Jan 2025 09:29:11 +0000 Subject: [PATCH 10/26] NRL-1215 Enhance duplicate detection for JSON arrays and add corresponding tests --- layer/nrlf/core/json_duplicate_checker.py | 18 +++++++ .../core/tests/test_json_duplicate_checker.py | 54 ++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 2250a6edc..2b16045cd 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -25,6 +25,8 @@ def __init__(self): # Track keys at each path level to detect duplicates self.key_registry: Dict[str, Dict[str, bool]] = {} self.current_duplicate_index: Dict[str, int] = {} + # Track seen array elements to detect duplicates + self.seen_array_elements: Dict[str, List[JsonValue]] = {} def get_path_with_index(self, path: List[str], key: str) -> List[str]: current_level = ".".join(path) @@ -76,8 +78,24 @@ def traverse_array(self, items: JsonArray, path: list[str]) -> None: """Process JSON array items while updating the path for duplicates.""" array_path = path[-1] base_path = path[:-1] + seen_elements = self.seen_array_elements.setdefault(".".join(path), set()) for idx, item in enumerate(items): + serialized_item = json.dumps(item, sort_keys=True) + if serialized_item in seen_elements: + element = f"{array_path}[{idx}]" + duplicate_path = ".".join(base_path + [element]) + self.duplicate_keys_and_paths.setdefault(element, []).append(duplicate_path) + print(f"Found duplicate array element at path: {duplicate_path}") + else: + seen_elements.add(serialized_item) + + # if item in seen_elements: + # duplicate_path = f"{array_path}[{idx}]" + # self.duplicate_keys_and_paths.setdefault(duplicate_path, []).append(f"{base_path[0]}.{duplicate_path}") + # print(f"Found duplicate array element at path: {duplicate_path}") + # else: + # seen_elements.append(item) if not isinstance(item, (list, tuple)): continue self.process_collection(item, base_path, f"{array_path}[{idx}]") diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 870643a48..7337999ac 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -323,4 +323,56 @@ def test_array_edge_case_duplicate(self): """ duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["array"]) - self.assertEqual(paths, ["root.array"]) \ No newline at end of file + self.assertEqual(paths, ["root.array"]) + + def test_array_element_duplicate(self): + json_content = """ + { + "array": [ + 1, + 2, + 3, + 1 + ] + } + """ + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, ["array[3]"]) + self.assertEqual(paths, ["root.array[3]"]) + + # deeply nested object with a deeply nested array with a duplicate + def test_deeply_nested_object_with_deeply_nested_array_duplicate(self): + json_content = """ + { + "root": { + "level1": { + "level2": { + "level3": { + "level4": { + "level5": { + "level6": { + "level7": { + "level8": { + "level9": { + "level10": { + "array": [ + {"key1": 1, "key2": 2}, + {"key1": 1, "key2": 2} + ] + } + } + } + } + } + } + } + } + } + } + } + } + """ + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, ["array[1]"]) + # duplicate root here needs fixing in traverse_array loop + self.assertEqual(paths, ["root.root.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.array[1]"]) From 705b372d4244487b588f731436690e81a552dab5 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Tue, 14 Jan 2025 09:29:45 +0000 Subject: [PATCH 11/26] NRL-1215 Remove commented-out code for duplicate detection in JSON arrays --- layer/nrlf/core/json_duplicate_checker.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 2b16045cd..65a067058 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -90,12 +90,6 @@ def traverse_array(self, items: JsonArray, path: list[str]) -> None: else: seen_elements.add(serialized_item) - # if item in seen_elements: - # duplicate_path = f"{array_path}[{idx}]" - # self.duplicate_keys_and_paths.setdefault(duplicate_path, []).append(f"{base_path[0]}.{duplicate_path}") - # print(f"Found duplicate array element at path: {duplicate_path}") - # else: - # seen_elements.append(item) if not isinstance(item, (list, tuple)): continue self.process_collection(item, base_path, f"{array_path}[{idx}]") From fce0d9a4ef8a1391779b20624cc9192481584995 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Tue, 14 Jan 2025 09:43:51 +0000 Subject: [PATCH 12/26] NRL-1215 Fix linting issues --- layer/nrlf/core/json_duplicate_checker.py | 4 +- .../core/tests/test_json_duplicate_checker.py | 7 +- ...teDocumentReference-duplicateField.feature | 124 +++++++++--------- tests/features/steps/2_request.py | 5 +- 4 files changed, 74 insertions(+), 66 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 65a067058..c2cec9620 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -85,7 +85,9 @@ def traverse_array(self, items: JsonArray, path: list[str]) -> None: if serialized_item in seen_elements: element = f"{array_path}[{idx}]" duplicate_path = ".".join(base_path + [element]) - self.duplicate_keys_and_paths.setdefault(element, []).append(duplicate_path) + self.duplicate_keys_and_paths.setdefault(element, []).append( + duplicate_path + ) print(f"Found duplicate array element at path: {duplicate_path}") else: seen_elements.add(serialized_item) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 7337999ac..7fb857cf3 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -375,4 +375,9 @@ def test_deeply_nested_object_with_deeply_nested_array_duplicate(self): duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["array[1]"]) # duplicate root here needs fixing in traverse_array loop - self.assertEqual(paths, ["root.root.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.array[1]"]) + self.assertEqual( + paths, + [ + "root.root.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.array[1]" + ], + ) diff --git a/tests/features/producer/updateDocumentReference-duplicateField.feature b/tests/features/producer/updateDocumentReference-duplicateField.feature index 3209c6857..298c56e9a 100644 --- a/tests/features/producer/updateDocumentReference-duplicateField.feature +++ b/tests/features/producer/updateDocumentReference-duplicateField.feature @@ -18,34 +18,34 @@ Feature: Producer - updateDocumentReference - Duplicate Field Scenarios | author | TSTCUS | When producer 'TSTCUS' requests update of a DocumentReference with pointerId 'TSTCUS-updateDuplicateTest-1234' but replacing 'content': """ - [ - { - "attachment": { - "contentType": "application/pdf", - "url": "https://example.org/my-doc.pdf", - "url": "https://example.org/duplicate-url.pdf" - }, - "format": { - "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", - "code": "urn:nhs-ic:unstructured", - "display": "Unstructured Document" - }, - "extension": [ - { - "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", - "valueCodeableConcept": { - "coding": [ - { - "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", - "code": "static", - "display": "Static" - } - ] - } + [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf", + "url": "https://example.org/duplicate-url.pdf" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] } - ] - } - ] + } + ] + } + ] """ Then the response status code is 400 And the response is an OperationOutcome with 1 issue @@ -88,42 +88,42 @@ Feature: Producer - updateDocumentReference - Duplicate Field Scenarios | author | TSTCUS | When producer 'TSTCUS' requests update of a DocumentReference with pointerId 'TSTCUS-updateDuplicateTest-1235' but replacing 'content': """ - [ - { - "attachment": { - "contentType": "application/pdf", - "url": "https://example.org/my-doc.pdf" - }, - "attachment": { - "contentType": "text/html", - "url": "https://example.org/contact-details.html" - }, - "format": { - "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", - "code": "urn:nhs-ic:unstructured", - "display": "Unstructured Document" - }, - "format": { - "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", - "code": "urn:nhs-ic:record-contact", - "display": "Contact details (HTTP Unsecured)" - }, - "extension": [ - { - "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", - "valueCodeableConcept": { - "coding": [ - { - "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", - "code": "static", - "display": "Static" - } - ] - } + [ + { + "attachment": { + "contentType": "application/pdf", + "url": "https://example.org/my-doc.pdf" + }, + "attachment": { + "contentType": "text/html", + "url": "https://example.org/contact-details.html" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:record-contact", + "display": "Contact details (HTTP Unsecured)" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] } - ] - } - ] + } + ] + } + ] """ Then the response status code is 400 And the response is an OperationOutcome with 1 issue diff --git a/tests/features/steps/2_request.py b/tests/features/steps/2_request.py index 261f524d3..3897f7547 100644 --- a/tests/features/steps/2_request.py +++ b/tests/features/steps/2_request.py @@ -133,17 +133,18 @@ def create_post_body_step(context: Context, section: str): def upsert_post_body_step(context: Context, section: str, pointer_id: str): _create_or_upsert_body_step(context, "upsert_text", section, pointer_id) + @when( "producer 'TSTCUS' requests update of a DocumentReference with pointerId '{pointer_id}' but replacing '{section}'" ) def update_post_body_step(context: Context, section: str, pointer_id: str): - """ This can only update top level fields """ + """This can only update top level fields""" consumer_client = consumer_client_from_context(context, "TSTCUS") context.response = consumer_client.read(pointer_id) if context.response.status_code != 200: raise ValueError(f"Failed to read existing pointer: {context.response.text}") - + doc_ref = context.response.json() doc_ref[section] = "placeholder" doc_ref_text = json.dumps(doc_ref) From b07d0074541fee8f1af17f4d7b4feeba74410458 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Thu, 16 Jan 2025 13:56:23 +0000 Subject: [PATCH 13/26] [NRL-1215] WIP - Using hooks instead of class for dupe detection --- layer/nrlf/core/json_duplicate_checker.py | 54 ++++++++++++++++++- .../core/tests/test_json_duplicate_checker.py | 28 +++++----- 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index c2cec9620..4af78ce08 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -1,6 +1,6 @@ import json from collections import OrderedDict -from typing import Dict, List, Tuple +from typing import Any, Dict, List, Tuple JsonPrimitive = str | int | float | bool | None type JsonValue = JsonPrimitive | JsonObject | JsonArray @@ -97,6 +97,44 @@ def traverse_array(self, items: JsonArray, path: list[str]) -> None: self.process_collection(item, base_path, f"{array_path}[{idx}]") +def check_for_duplicate_keys(pairs: list[tuple[str, Any]]): + keys = {} + dupes = {} + for key, value in pairs: + print(f"Processing key: {key}, value: {value}") + if key in keys: + if key not in dupes: + dupes[key] = [] + dupes[key].append(value) + continue + + keys[key] = value + + if dupes: + keys["__duplicates__"] = dupes + + return keys + + +def flatten_duplicates(data: dict | list) -> list[str]: + duplicates = [] + + for key, value in data.items() if isinstance(data, dict) else enumerate(data): + if key == "__duplicates__": + duplicates.extend([f"{dupe_key}" for dupe_key in value.keys()]) + continue + + if isinstance(value, (dict, list)): + dupes = flatten_duplicates(value) + + path = f"{key}" if isinstance(data, dict) else f"[{key}]" + duplicates.extend([f"{path_key}.{dupe}" for dupe in dupes]) + + print(f"flatten_duplicates data={data} dupes={duplicates}") + + return duplicates + + def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: """Find all duplicate keys in a JSON string. @@ -107,6 +145,20 @@ def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: A key is considered duplicate if it appears multiple times within the same object, regardless of nesting level or array position. """ + + use_hooks_approach = True + + if use_hooks_approach: + try: + dupe_data = json.loads( + json_content, object_pairs_hook=check_for_duplicate_keys + ) + duplicate_paths = [f"root.{path}" for path in flatten_duplicates(dupe_data)] + duplicate_keys = [key.split(".")[-1] for key in duplicate_paths] + return duplicate_keys, duplicate_paths + except json.JSONDecodeError: + raise ValueError("Error: Invalid JSON format") + try: parsed_data = json.loads(json_content, object_pairs_hook=lambda pairs: pairs) print("Parsed JSON:", parsed_data) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 7fb857cf3..b7bddd4d5 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -325,20 +325,20 @@ def test_array_edge_case_duplicate(self): self.assertEqual(duplicates, ["array"]) self.assertEqual(paths, ["root.array"]) - def test_array_element_duplicate(self): - json_content = """ - { - "array": [ - 1, - 2, - 3, - 1 - ] - } - """ - duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(duplicates, ["array[3]"]) - self.assertEqual(paths, ["root.array[3]"]) + # def test_array_element_duplicate(self): + # json_content = """ + # { + # "array": [ + # 1, + # 2, + # 3, + # 1 + # ] + # } + # """ + # duplicates, paths = check_duplicate_keys(json_content) + # self.assertEqual(duplicates, ["array[3]"]) + # self.assertEqual(paths, ["root.array[3]"]) # deeply nested object with a deeply nested array with a duplicate def test_deeply_nested_object_with_deeply_nested_array_duplicate(self): From 20ba00eca0355451e22382721c69843593cece2e Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Wed, 22 Jan 2025 12:13:26 +0000 Subject: [PATCH 14/26] NRL-1215 Alternative implementation passing all tests --- layer/nrlf/core/json_duplicate_checker.py | 136 +++--------------- .../core/tests/test_json_duplicate_checker.py | 59 +------- 2 files changed, 18 insertions(+), 177 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 4af78ce08..a0694bd4d 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -1,101 +1,5 @@ import json -from collections import OrderedDict -from typing import Any, Dict, List, Tuple - -JsonPrimitive = str | int | float | bool | None -type JsonValue = JsonPrimitive | JsonObject | JsonArray -JsonPair = tuple[str, JsonValue] -JsonObject = list[JsonPair] -JsonArray = list[JsonValue] - - -class DuplicateKeyChecker: - """JSON structure duplicate key detector. - - Tracks duplicate keys by maintaining path context during traversal. - Paths are recorded in dot notation with array indices: - - Objects: parent.child - - Arrays: parent.array[0] - - Nested: parent.array[0].child[1].key - """ - - def __init__(self): - # Here a list of paths because the same key name could be at different levels - self.duplicate_keys_and_paths: OrderedDict[str, list[str]] = OrderedDict() - # Track keys at each path level to detect duplicates - self.key_registry: Dict[str, Dict[str, bool]] = {} - self.current_duplicate_index: Dict[str, int] = {} - # Track seen array elements to detect duplicates - self.seen_array_elements: Dict[str, List[JsonValue]] = {} - - def get_path_with_index(self, path: List[str], key: str) -> List[str]: - current_level = ".".join(path) - index_map = self.current_duplicate_index.setdefault(current_level, {}) - count = index_map.get(key, 0) - index_map[key] = count + 1 - - # If it's the first occurrence, keep the key as is. - # Subsequent occurrences get bracket-indexed. - if count == 0: - return path + [key] - else: - return path + [f"{key}[{count - 1}]"] - - def check_key(self, key: str, path: List[str]) -> None: - """Check if a key at the current path is a duplicate. - - A duplicate occurs when the same key appears twice at the same - nesting level, even if the values differ. - """ - current_level = ".".join(path) - current_keys = self.key_registry.setdefault(current_level, {}) - if key in current_keys: - duplicate_path = ".".join(path + [key]) - self.duplicate_keys_and_paths.setdefault(key, []).append(duplicate_path) - print(f"Found duplicate key: {key} at path: {'.'.join(path + [key])}") - else: - current_keys[key] = True - - def process_collection( - self, value: JsonObject | JsonArray, path: list[str], key: str - ) -> None: - """Determine if the given 'value' is an object or an array and handle it.""" - new_path = self.get_path_with_index(path, key) - if value and isinstance(value[0], tuple): - self.traverse_json(value, new_path) - else: - self.traverse_array(value, new_path) - - def traverse_json(self, data: JsonObject, path: list[str]) -> None: - """Traverse JSON object and check for duplicate keys.""" - for key, value in data: - print(f"Processing key: {key}, value: {value}") - self.check_key(key, path) - if isinstance(value, (list, tuple)): - self.process_collection(value, path, key) - - def traverse_array(self, items: JsonArray, path: list[str]) -> None: - """Process JSON array items while updating the path for duplicates.""" - array_path = path[-1] - base_path = path[:-1] - seen_elements = self.seen_array_elements.setdefault(".".join(path), set()) - - for idx, item in enumerate(items): - serialized_item = json.dumps(item, sort_keys=True) - if serialized_item in seen_elements: - element = f"{array_path}[{idx}]" - duplicate_path = ".".join(base_path + [element]) - self.duplicate_keys_and_paths.setdefault(element, []).append( - duplicate_path - ) - print(f"Found duplicate array element at path: {duplicate_path}") - else: - seen_elements.add(serialized_item) - - if not isinstance(item, (list, tuple)): - continue - self.process_collection(item, base_path, f"{array_path}[{idx}]") - +from typing import Any def check_for_duplicate_keys(pairs: list[tuple[str, Any]]): keys = {} @@ -128,14 +32,25 @@ def flatten_duplicates(data: dict | list) -> list[str]: dupes = flatten_duplicates(value) path = f"{key}" if isinstance(data, dict) else f"[{key}]" - duplicates.extend([f"{path_key}.{dupe}" for dupe in dupes]) + duplicates.extend([f"{path}.{dupe}" for dupe in dupes]) print(f"flatten_duplicates data={data} dupes={duplicates}") return duplicates -def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: +def format_path(path): + parts = path.split('.') + formatted_parts = [] + for part in parts: + if part.startswith('['): + formatted_parts[-1] += part + else: + formatted_parts.append(part) + return '.'.join(formatted_parts) + + +def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: """Find all duplicate keys in a JSON string. Traverses the entire JSON structure and reports: @@ -153,25 +68,8 @@ def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]: dupe_data = json.loads( json_content, object_pairs_hook=check_for_duplicate_keys ) - duplicate_paths = [f"root.{path}" for path in flatten_duplicates(dupe_data)] - duplicate_keys = [key.split(".")[-1] for key in duplicate_paths] + duplicate_paths = [f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data)] + duplicate_keys = list(dict.fromkeys([key.split(".")[-1] for key in duplicate_paths])) return duplicate_keys, duplicate_paths except json.JSONDecodeError: - raise ValueError("Error: Invalid JSON format") - - try: - parsed_data = json.loads(json_content, object_pairs_hook=lambda pairs: pairs) - print("Parsed JSON:", parsed_data) - except json.JSONDecodeError: - raise ValueError("Error: Invalid JSON format") - - checker = DuplicateKeyChecker() - checker.traverse_json(parsed_data, ["root"]) - - duplicates = list(checker.duplicate_keys_and_paths.keys()) - # flatten the list of paths - paths = sum(checker.duplicate_keys_and_paths.values(), []) - print("Final duplicates:", duplicates) - print("Final paths:", paths) - - return duplicates, paths + raise ValueError("Error: Invalid JSON format") \ No newline at end of file diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index b7bddd4d5..870643a48 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -323,61 +323,4 @@ def test_array_edge_case_duplicate(self): """ duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["array"]) - self.assertEqual(paths, ["root.array"]) - - # def test_array_element_duplicate(self): - # json_content = """ - # { - # "array": [ - # 1, - # 2, - # 3, - # 1 - # ] - # } - # """ - # duplicates, paths = check_duplicate_keys(json_content) - # self.assertEqual(duplicates, ["array[3]"]) - # self.assertEqual(paths, ["root.array[3]"]) - - # deeply nested object with a deeply nested array with a duplicate - def test_deeply_nested_object_with_deeply_nested_array_duplicate(self): - json_content = """ - { - "root": { - "level1": { - "level2": { - "level3": { - "level4": { - "level5": { - "level6": { - "level7": { - "level8": { - "level9": { - "level10": { - "array": [ - {"key1": 1, "key2": 2}, - {"key1": 1, "key2": 2} - ] - } - } - } - } - } - } - } - } - } - } - } - } - """ - duplicates, paths = check_duplicate_keys(json_content) - self.assertEqual(duplicates, ["array[1]"]) - # duplicate root here needs fixing in traverse_array loop - self.assertEqual( - paths, - [ - "root.root.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.array[1]" - ], - ) + self.assertEqual(paths, ["root.array"]) \ No newline at end of file From 633c565dfdf61fbaaebda51b6bb9a6f8b7026042 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Wed, 22 Jan 2025 12:19:59 +0000 Subject: [PATCH 15/26] NRL-1215 Fix linting, remove redundant code --- layer/nrlf/core/json_duplicate_checker.py | 41 ++++++++----------- .../core/tests/test_json_duplicate_checker.py | 2 +- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index a0694bd4d..22b9f7234 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -1,7 +1,8 @@ import json from typing import Any -def check_for_duplicate_keys(pairs: list[tuple[str, Any]]): + +def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: keys = {} dupes = {} for key, value in pairs: @@ -11,7 +12,6 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]): dupes[key] = [] dupes[key].append(value) continue - keys[key] = value if dupes: @@ -22,32 +22,27 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]): def flatten_duplicates(data: dict | list) -> list[str]: duplicates = [] - for key, value in data.items() if isinstance(data, dict) else enumerate(data): if key == "__duplicates__": duplicates.extend([f"{dupe_key}" for dupe_key in value.keys()]) continue - if isinstance(value, (dict, list)): dupes = flatten_duplicates(value) - path = f"{key}" if isinstance(data, dict) else f"[{key}]" duplicates.extend([f"{path}.{dupe}" for dupe in dupes]) - print(f"flatten_duplicates data={data} dupes={duplicates}") - return duplicates -def format_path(path): - parts = path.split('.') +def format_path(path: str) -> str: + parts = path.split(".") formatted_parts = [] for part in parts: - if part.startswith('['): + if part.startswith("["): formatted_parts[-1] += part else: formatted_parts.append(part) - return '.'.join(formatted_parts) + return ".".join(formatted_parts) def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: @@ -60,16 +55,14 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: A key is considered duplicate if it appears multiple times within the same object, regardless of nesting level or array position. """ - - use_hooks_approach = True - - if use_hooks_approach: - try: - dupe_data = json.loads( - json_content, object_pairs_hook=check_for_duplicate_keys - ) - duplicate_paths = [f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data)] - duplicate_keys = list(dict.fromkeys([key.split(".")[-1] for key in duplicate_paths])) - return duplicate_keys, duplicate_paths - except json.JSONDecodeError: - raise ValueError("Error: Invalid JSON format") \ No newline at end of file + try: + dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys) + duplicate_paths = [ + f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data) + ] + duplicate_keys = list( + dict.fromkeys([key.split(".")[-1] for key in duplicate_paths]) + ) + return duplicate_keys, duplicate_paths + except json.JSONDecodeError: + raise ValueError("Error: Invalid JSON format") diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 870643a48..65f357c22 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -323,4 +323,4 @@ def test_array_edge_case_duplicate(self): """ duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["array"]) - self.assertEqual(paths, ["root.array"]) \ No newline at end of file + self.assertEqual(paths, ["root.array"]) From 688b2a7839386280732e90a2f0f94deeb67b1969 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 23 Jan 2025 09:07:22 +0000 Subject: [PATCH 16/26] NRL-1215 Simplify implementation --- layer/nrlf/core/json_duplicate_checker.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 22b9f7234..004faecea 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -5,14 +5,13 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: keys = {} dupes = {} + for key, value in pairs: print(f"Processing key: {key}, value: {value}") if key in keys: - if key not in dupes: - dupes[key] = [] - dupes[key].append(value) - continue - keys[key] = value + dupes.setdefault(key, []).append(value) + else: + keys[key] = value if dupes: keys["__duplicates__"] = dupes @@ -22,14 +21,16 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: def flatten_duplicates(data: dict | list) -> list[str]: duplicates = [] - for key, value in data.items() if isinstance(data, dict) else enumerate(data): + items = data.items() if isinstance(data, dict) else enumerate(data) + + for key, value in items: if key == "__duplicates__": - duplicates.extend([f"{dupe_key}" for dupe_key in value.keys()]) - continue - if isinstance(value, (dict, list)): - dupes = flatten_duplicates(value) + duplicates.extend(value.keys()) + elif isinstance(value, (dict, list)): path = f"{key}" if isinstance(data, dict) else f"[{key}]" + dupes = flatten_duplicates(value) duplicates.extend([f"{path}.{dupe}" for dupe in dupes]) + print(f"flatten_duplicates data={data} dupes={duplicates}") return duplicates From f017a8f3b40d95a3fc15d874a167886ced55c860 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 23 Jan 2025 10:55:10 +0000 Subject: [PATCH 17/26] NRL-1215 Add extra docstring --- layer/nrlf/core/json_duplicate_checker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 004faecea..f6cbef2ad 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -36,6 +36,7 @@ def flatten_duplicates(data: dict | list) -> list[str]: def format_path(path: str) -> str: + """Transforms a path like root.key1.[2].key2 into root.key1[2].key2""" parts = path.split(".") formatted_parts = [] for part in parts: From 8c54886ac79896c79d8d6916f79aa4c5e944a70e Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 23 Jan 2025 16:48:43 +0000 Subject: [PATCH 18/26] NRL-1215 Fix case sensitive detection --- layer/nrlf/core/json_duplicate_checker.py | 5 ++++- layer/nrlf/core/tests/test_json_duplicate_checker.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index f6cbef2ad..ac48a398e 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -5,13 +5,16 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: keys = {} dupes = {} + normalized_keys = [] for key, value in pairs: + normalized_key = key.lower() print(f"Processing key: {key}, value: {value}") - if key in keys: + if normalized_key in normalized_keys: dupes.setdefault(key, []).append(value) else: keys[key] = value + normalized_keys += [normalized_key] if dupes: keys["__duplicates__"] = dupes diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 65f357c22..3cb58d358 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -324,3 +324,9 @@ def test_array_edge_case_duplicate(self): duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["array"]) self.assertEqual(paths, ["root.array"]) + + def test_case_sensitive_keys(self): + json_content = '{"a": 1, "A": 2, "aA": 3, "Aa": 4}' + duplicates, paths = check_duplicate_keys(json_content) + self.assertEqual(duplicates, ["A", "Aa"]) + self.assertEqual(paths, ["root.A", "root.Aa"]) From 8bf87760f56ab88bc6d42847b793ea8be44e7bce Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 23 Jan 2025 17:47:39 +0000 Subject: [PATCH 19/26] NRL-1215 Add more prints for test report --- layer/nrlf/core/json_duplicate_checker.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index ac48a398e..b7e250d43 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -12,6 +12,7 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: print(f"Processing key: {key}, value: {value}") if normalized_key in normalized_keys: dupes.setdefault(key, []).append(value) + print(f"Duplicate key found: {key}") else: keys[key] = value normalized_keys += [normalized_key] @@ -61,6 +62,9 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: the same object, regardless of nesting level or array position. """ try: + print("JSON content to be processed:") + print(json_content) + print("================================") dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys) duplicate_paths = [ f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data) @@ -68,6 +72,9 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: duplicate_keys = list( dict.fromkeys([key.split(".")[-1] for key in duplicate_paths]) ) + print("================================") + print(f"Duplicate keys: {duplicate_keys}") + print(f"Duplicate paths: {duplicate_paths}") return duplicate_keys, duplicate_paths except json.JSONDecodeError: raise ValueError("Error: Invalid JSON format") From 5dbbd920bae28ef7e5954a1912c1c57fcffff78c Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Thu, 23 Jan 2025 17:50:53 +0000 Subject: [PATCH 20/26] NRL-1215 Clean up prints --- layer/nrlf/core/json_duplicate_checker.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index b7e250d43..c034fb9ee 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -9,10 +9,8 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: for key, value in pairs: normalized_key = key.lower() - print(f"Processing key: {key}, value: {value}") if normalized_key in normalized_keys: dupes.setdefault(key, []).append(value) - print(f"Duplicate key found: {key}") else: keys[key] = value normalized_keys += [normalized_key] @@ -35,7 +33,6 @@ def flatten_duplicates(data: dict | list) -> list[str]: dupes = flatten_duplicates(value) duplicates.extend([f"{path}.{dupe}" for dupe in dupes]) - print(f"flatten_duplicates data={data} dupes={duplicates}") return duplicates @@ -62,9 +59,6 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: the same object, regardless of nesting level or array position. """ try: - print("JSON content to be processed:") - print(json_content) - print("================================") dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys) duplicate_paths = [ f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data) @@ -72,9 +66,6 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: duplicate_keys = list( dict.fromkeys([key.split(".")[-1] for key in duplicate_paths]) ) - print("================================") - print(f"Duplicate keys: {duplicate_keys}") - print(f"Duplicate paths: {duplicate_paths}") return duplicate_keys, duplicate_paths except json.JSONDecodeError: raise ValueError("Error: Invalid JSON format") From 5bc99cb3b7f38c458026660408d50c4bba6bc11c Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Fri, 24 Jan 2025 11:47:56 +0000 Subject: [PATCH 21/26] NRL-1215 Fix remove extra prints --- layer/nrlf/core/tests/test_json_duplicate_checker.py | 9 --------- tests/features/steps/2_request.py | 1 - 2 files changed, 10 deletions(-) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 3cb58d358..9968118b3 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -283,20 +283,12 @@ def test_parametrized_nested_arrays(self): for depth in range(1, 11): # Test depths 1 through 10 with self.subTest(depth=depth): json_content = self.generate_nested_json(depth) - print(f"\n=== Testing depth {depth} ===") - print("Generated JSON:") - print(json_content) duplicates, paths = check_duplicate_keys(json_content) expected_duplicates = self.get_expected_duplicates(depth) expected_paths = self.get_expected_paths(depth) - print("\nActual duplicates:", duplicates) - print("Expected duplicates:", expected_duplicates) - print("\nActual paths:", paths) - print("Expected paths:", expected_paths) - self.assertEqual( sorted(duplicates), sorted(expected_duplicates), @@ -307,7 +299,6 @@ def test_parametrized_nested_arrays(self): sorted(expected_paths), f"Failed for depth {depth} - paths mismatch", ) - print("=== Test passed for depth", depth, "===\n") def test_array_edge_case_duplicate(self): json_content = """ diff --git a/tests/features/steps/2_request.py b/tests/features/steps/2_request.py index 3897f7547..a904982a7 100644 --- a/tests/features/steps/2_request.py +++ b/tests/features/steps/2_request.py @@ -149,7 +149,6 @@ def update_post_body_step(context: Context, section: str, pointer_id: str): doc_ref[section] = "placeholder" doc_ref_text = json.dumps(doc_ref) doc_ref_text = doc_ref_text.replace('"placeholder"', context.text) - print(doc_ref_text) producer_client = producer_client_from_context(context, "TSTCUS") context.response = producer_client.update_text(doc_ref_text, pointer_id) From 0235c76efd0b637f2d0ab36ff8f3cf6b07e63b47 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Fri, 24 Jan 2025 11:51:36 +0000 Subject: [PATCH 22/26] NRL-1215 Add docstrings --- layer/nrlf/core/json_duplicate_checker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index c034fb9ee..0713c2eec 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -3,6 +3,7 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: + """Custom JSON object_pairs_hook that checks for duplicate keys.""" keys = {} dupes = {} normalized_keys = [] @@ -22,6 +23,7 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: def flatten_duplicates(data: dict | list) -> list[str]: + """Flattens a JSON structure and returns a list of duplicate paths.""" duplicates = [] items = data.items() if isinstance(data, dict) else enumerate(data) From ef8f45a5f916e321a1bbccbadb1ccd925df83077 Mon Sep 17 00:00:00 2001 From: "Axel Garcia K." Date: Fri, 24 Jan 2025 12:10:48 +0000 Subject: [PATCH 23/26] NRL-1215 Log when duplicate key in post body --- layer/nrlf/core/log_references.py | 1 + layer/nrlf/core/request.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/layer/nrlf/core/log_references.py b/layer/nrlf/core/log_references.py index 698dcd691..5c812c66a 100644 --- a/layer/nrlf/core/log_references.py +++ b/layer/nrlf/core/log_references.py @@ -36,6 +36,7 @@ class LogReference(Enum): HANDLER016 = _Reference("INFO", "Set response headers") HANDLER017 = _Reference("WARN", "Correlation ID not found in request headers") HANDLER018 = _Reference("INFO", "Checking for duplicate keys in request body") + HANDLER019 = _Reference("ERROR", "Duplicate keys found in the request body") HANDLER999 = _Reference("INFO", "Request handler returned successfully") # Error Logs diff --git a/layer/nrlf/core/request.py b/layer/nrlf/core/request.py index 22826e60e..499e40889 100644 --- a/layer/nrlf/core/request.py +++ b/layer/nrlf/core/request.py @@ -108,13 +108,15 @@ def raise_when_duplicate_keys(json_content: str) -> None: logger.log(LogReference.HANDLER018) duplicates, paths = check_duplicate_keys(json_content) if duplicates: - raise OperationOutcomeError( + error = OperationOutcomeError( severity="error", code="invalid", details=SpineErrorConcept.from_code("MESSAGE_NOT_WELL_FORMED"), diagnostics=f"Duplicate keys found in FHIR document: {duplicates}", expression=paths, ) + logger.log(LogReference.HANDLER019, error=str(error)) + raise error def parse_path( From 023e514afc8a6f7a66e84ecf545001531f3ada82 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Tue, 28 Jan 2025 14:18:05 +0000 Subject: [PATCH 24/26] [NRL-1215] Switch root to DocumentReference in json dupe checker and add typing --- layer/nrlf/core/json_duplicate_checker.py | 21 +++---- .../core/tests/test_json_duplicate_checker.py | 60 ++++++++++++------- .../tests/test_json_duplicate_checker_nrlf.py | 2 +- layer/nrlf/core/tests/test_request.py | 2 +- 4 files changed, 53 insertions(+), 32 deletions(-) diff --git a/layer/nrlf/core/json_duplicate_checker.py b/layer/nrlf/core/json_duplicate_checker.py index 0713c2eec..7c9d8de6c 100644 --- a/layer/nrlf/core/json_duplicate_checker.py +++ b/layer/nrlf/core/json_duplicate_checker.py @@ -2,11 +2,11 @@ from typing import Any -def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: +def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict[str, Any]: """Custom JSON object_pairs_hook that checks for duplicate keys.""" - keys = {} - dupes = {} - normalized_keys = [] + keys: dict[str, Any] = {} + dupes: dict[str, Any] = {} + normalized_keys: list[str] = [] for key, value in pairs: normalized_key = key.lower() @@ -22,9 +22,9 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict: return keys -def flatten_duplicates(data: dict | list) -> list[str]: +def flatten_duplicates(data: dict[str, Any] | list[Any]) -> list[str]: """Flattens a JSON structure and returns a list of duplicate paths.""" - duplicates = [] + duplicates: list[str] = [] items = data.items() if isinstance(data, dict) else enumerate(data) for key, value in items: @@ -39,9 +39,9 @@ def flatten_duplicates(data: dict | list) -> list[str]: def format_path(path: str) -> str: - """Transforms a path like root.key1.[2].key2 into root.key1[2].key2""" + """Transforms a path like key1.[2].key2 into key1[2].key2""" parts = path.split(".") - formatted_parts = [] + formatted_parts: list[str] = [] for part in parts: if part.startswith("["): formatted_parts[-1] += part @@ -55,7 +55,7 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: Traverses the entire JSON structure and reports: - List of keys that appear multiple times at the same level - - Full paths to each duplicate key occurrence + - Full paths to each duplicate key occurrkeysence A key is considered duplicate if it appears multiple times within the same object, regardless of nesting level or array position. @@ -63,7 +63,8 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: try: dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys) duplicate_paths = [ - f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data) + f"DocumentReference.{format_path(path)}" + for path in flatten_duplicates(dupe_data) ] duplicate_keys = list( dict.fromkeys([key.split(".")[-1] for key in duplicate_paths]) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker.py b/layer/nrlf/core/tests/test_json_duplicate_checker.py index 9968118b3..c8c0e6bf4 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker.py @@ -14,7 +14,7 @@ def test_simple_duplicates(self): json_content = '{"a": 1, "b": 2, "a": 3}' duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["a"]) - self.assertEqual(paths, ["root.a"]) + self.assertEqual(paths, ["DocumentReference.a"]) def test_nested_duplicates(self): # This JSON has no duplicates because the 'b' keys are at different levels @@ -28,7 +28,7 @@ def test_same_level_duplicates(self): json_content = '{"a": {"b": 1, "b": 2}, "c": {"d": 3}}' duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["b"]) - self.assertEqual(paths, ["root.a.b"]) + self.assertEqual(paths, ["DocumentReference.a.b"]) def test_same_level_duplicates_objects(self): # This JSON has duplicates because there are two 'b' keys at the same level @@ -38,14 +38,16 @@ def test_same_level_duplicates_objects(self): ) duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["b"]) - self.assertEqual(paths, ["root.a.b"]) + self.assertEqual(paths, ["DocumentReference.a.b"]) def test_multiple_level_duplicates(self): # This JSON has duplicates at multiple levels json_content = '{"a": 1, "b": {"c": 2, "c": 3}, "a": 4}' duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(sorted(duplicates), sorted(["a", "c"])) - self.assertEqual(sorted(paths), sorted(["root.a", "root.b.c"])) + self.assertEqual( + sorted(paths), sorted(["DocumentReference.a", "DocumentReference.b.c"]) + ) def test_invalid_json(self): json_content = "{invalid json}" @@ -56,7 +58,7 @@ def test_complex_nested_duplicates(self): json_content = '{"a": {"b": 1, "c": {"d": 2, "c": 3}}, "a": {"e": 4}}' duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(sorted(duplicates), sorted(["a"])) - self.assertEqual(sorted(paths), sorted(["root.a"])) + self.assertEqual(sorted(paths), sorted(["DocumentReference.a"])) def test_multiple_duplicates_same_path(self): json_content = """ @@ -82,7 +84,15 @@ def test_multiple_duplicates_same_path(self): duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(sorted(duplicates), sorted(["b", "c", "e", "g"])) self.assertEqual( - sorted(paths), sorted(["root.b", "root.b.c", "root.b.d.e", "root.b.d.f.g"]) + sorted(paths), + sorted( + [ + "DocumentReference.b", + "DocumentReference.b.c", + "DocumentReference.b.d.e", + "DocumentReference.b.d.f.g", + ] + ), ) def test_no_duplicates_deeply_nested(self): @@ -119,7 +129,10 @@ def test_duplicates_with_arrays(self): """ duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(sorted(duplicates), sorted(["b", "c"])) - self.assertEqual(sorted(paths), sorted(["root.a[0].b", "root.a[1].c"])) + self.assertEqual( + sorted(paths), + sorted(["DocumentReference.a[0].b", "DocumentReference.a[1].c"]), + ) def test_large_json_with_mixed_duplicates(self): json_content = """ @@ -148,7 +161,14 @@ def test_large_json_with_mixed_duplicates(self): self.assertEqual(sorted(duplicates), sorted(["c", "f", "h", "j"])) self.assertEqual( sorted(paths), - sorted(["root.b.c", "root.b.e.f", "root.b.e.g.h", "root.i.j"]), + sorted( + [ + "DocumentReference.b.c", + "DocumentReference.b.e.f", + "DocumentReference.b.e.g.h", + "DocumentReference.i.j", + ] + ), ) def test_complex_nested_arrays_with_duplicates(self): @@ -190,13 +210,13 @@ def test_complex_nested_arrays_with_duplicates(self): sorted(paths), sorted( [ - "root.level1.arrays", - "root.level1.arrays[0].a", - "root.level1.arrays[0].nested.b", - "root.level1.arrays[0].nested.b[0].c", - "root.level1.arrays[1].mixed", - "root.level1.arrays[1].mixed[1].f[0].g", - "root.level1.arrays[1].mixed[1].f[1].h.i", + "DocumentReference.level1.arrays", + "DocumentReference.level1.arrays[0].a", + "DocumentReference.level1.arrays[0].nested.b", + "DocumentReference.level1.arrays[0].nested.b[0].c", + "DocumentReference.level1.arrays[1].mixed", + "DocumentReference.level1.arrays[1].mixed[1].f[0].g", + "DocumentReference.level1.arrays[1].mixed[1].f[1].h.i", ] ), ) @@ -233,8 +253,8 @@ def test_deep_nested_array_object_duplicates(self): sorted(paths), sorted( [ - "root.root.level1[0].level2[0][0].data", - "root.root.level1[0].level2[1][0].other", + "DocumentReference.root.level1[0].level2[0][0].data", + "DocumentReference.root.level1[0].level2[1][0].other", ] ), ) @@ -263,7 +283,7 @@ def get_expected_duplicates(self, max_depth): def get_expected_paths(self, max_depth): """Helper function to get expected duplicate paths.""" paths = [] - current_path = "root" + current_path = "DocumentReference" # Start from level0 and increment for i in range(max_depth): @@ -314,10 +334,10 @@ def test_array_edge_case_duplicate(self): """ duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["array"]) - self.assertEqual(paths, ["root.array"]) + self.assertEqual(paths, ["DocumentReference.array"]) def test_case_sensitive_keys(self): json_content = '{"a": 1, "A": 2, "aA": 3, "Aa": 4}' duplicates, paths = check_duplicate_keys(json_content) self.assertEqual(duplicates, ["A", "Aa"]) - self.assertEqual(paths, ["root.A", "root.Aa"]) + self.assertEqual(paths, ["DocumentReference.A", "DocumentReference.Aa"]) diff --git a/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py b/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py index f3da51748..66595a909 100644 --- a/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py +++ b/layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py @@ -59,4 +59,4 @@ def test_parse_body_valid_docref_with_duplicate_keys(field): node = field.split(".")[-1] assert result[0] == [node] - assert result[1] == [f"root.{field}"] + assert result[1] == [f"DocumentReference.{field}"] diff --git a/layer/nrlf/core/tests/test_request.py b/layer/nrlf/core/tests/test_request.py index d05337676..e9d8706a5 100644 --- a/layer/nrlf/core/tests/test_request.py +++ b/layer/nrlf/core/tests/test_request.py @@ -181,7 +181,7 @@ def test_parse_body_valid_docref_with_duplicate_key(): ] }, "diagnostics": "Duplicate keys found in FHIR document: ['docStatus']", - "expression": ["root.docStatus"], + "expression": ["DocumentReference.docStatus"], } ], } From 9601c3fb39e947bce49d165af826502387fe3175 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Tue, 28 Jan 2025 14:28:39 +0000 Subject: [PATCH 25/26] [NRL-1215] Add __str__ to OperationOutcomeError to allow logging of errors --- layer/nrlf/core/errors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/layer/nrlf/core/errors.py b/layer/nrlf/core/errors.py index 615ef9a2a..f47df92bc 100644 --- a/layer/nrlf/core/errors.py +++ b/layer/nrlf/core/errors.py @@ -79,6 +79,9 @@ def response(self) -> Response: body=self.operation_outcome.model_dump_json(exclude_none=True, indent=2), ) + def __str__(self): + return f"OperationOutcomeError: {self.operation_outcome}" + class ParseError(Exception): issues: List[OperationOutcomeIssue] From e8d35e47e37b8367196a25789d587dc321d9bba9 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Tue, 28 Jan 2025 15:04:06 +0000 Subject: [PATCH 26/26] [NRL-1215] Fixup integ tests for root. fix --- .../producer/createDocumentReference-duplicateField.feature | 6 +++--- .../producer/updateDocumentReference-duplicateField.feature | 6 +++--- .../producer/upsertDocumentReference-duplicateField.feature | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/features/producer/createDocumentReference-duplicateField.feature b/tests/features/producer/createDocumentReference-duplicateField.feature index b9a25b114..beed7225c 100644 --- a/tests/features/producer/createDocumentReference-duplicateField.feature +++ b/tests/features/producer/createDocumentReference-duplicateField.feature @@ -54,7 +54,7 @@ Feature: Producer - createDocumentReference - Duplicate Field Scenarios }, "diagnostics": "Duplicate keys found in FHIR document: ['url']", "expression": [ - "root.content[0].attachment.url" + "DocumentReference.content[0].attachment.url" ] } """ @@ -121,8 +121,8 @@ Feature: Producer - createDocumentReference - Duplicate Field Scenarios }, "diagnostics": "Duplicate keys found in FHIR document: ['attachment', 'format']", "expression": [ - "root.content[0].attachment", - "root.content[0].format" + "DocumentReference.content[0].attachment", + "DocumentReference.content[0].format" ] } """ diff --git a/tests/features/producer/updateDocumentReference-duplicateField.feature b/tests/features/producer/updateDocumentReference-duplicateField.feature index 298c56e9a..89c73c843 100644 --- a/tests/features/producer/updateDocumentReference-duplicateField.feature +++ b/tests/features/producer/updateDocumentReference-duplicateField.feature @@ -65,7 +65,7 @@ Feature: Producer - updateDocumentReference - Duplicate Field Scenarios }, "diagnostics": "Duplicate keys found in FHIR document: ['url']", "expression": [ - "root.content[0].attachment.url" + "DocumentReference.content[0].attachment.url" ] } """ @@ -143,8 +143,8 @@ Feature: Producer - updateDocumentReference - Duplicate Field Scenarios }, "diagnostics": "Duplicate keys found in FHIR document: ['attachment', 'format']", "expression": [ - "root.content[0].attachment", - "root.content[0].format" + "DocumentReference.content[0].attachment", + "DocumentReference.content[0].format" ] } """ diff --git a/tests/features/producer/upsertDocumentReference-duplicateField.feature b/tests/features/producer/upsertDocumentReference-duplicateField.feature index 94230df9d..5fcada22f 100644 --- a/tests/features/producer/upsertDocumentReference-duplicateField.feature +++ b/tests/features/producer/upsertDocumentReference-duplicateField.feature @@ -54,7 +54,7 @@ Feature: Producer - upsertDocumentReference - Duplicate Field Scenarios }, "diagnostics": "Duplicate keys found in FHIR document: ['url']", "expression": [ - "root.content[0].attachment.url" + "DocumentReference.content[0].attachment.url" ] } """ @@ -121,8 +121,8 @@ Feature: Producer - upsertDocumentReference - Duplicate Field Scenarios }, "diagnostics": "Duplicate keys found in FHIR document: ['attachment', 'format']", "expression": [ - "root.content[0].attachment", - "root.content[0].format" + "DocumentReference.content[0].attachment", + "DocumentReference.content[0].format" ] } """