From c96335f3d66c178b5f44a78726280893c419c151 Mon Sep 17 00:00:00 2001 From: Marques Johansson Date: Thu, 28 Aug 2025 09:51:28 -0400 Subject: [PATCH 1/5] feat: support $sourceDescriptions operationId references Signed-off-by: Marques Johansson --- .../executor/operation_finder.py | 41 +++++++++++++++++ .../tests/executor/test_operation_finder.py | 44 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/runner/arazzo_runner/executor/operation_finder.py b/runner/arazzo_runner/executor/operation_finder.py index 8823bb2..599a463 100644 --- a/runner/arazzo_runner/executor/operation_finder.py +++ b/runner/arazzo_runner/executor/operation_finder.py @@ -39,6 +39,47 @@ def find_by_id(self, operation_id: str) -> dict | None: Returns: Dictionary with operation details or None if not found """ + # Special handling: support references in Arazzo specs like + # $sourceDescriptions.. + if isinstance(operation_id, str) and operation_id.startswith("$sourceDescriptions."): + # parse into three parts: prefix, source_name, operation_name + parts = operation_id.split(".", 2) + if len(parts) == 3: + _, source_name, target_op = parts + source_desc = self.source_descriptions.get(source_name) + if source_desc: + paths = source_desc.get("paths", {}) + for path, path_item in paths.items(): + for method, operation in path_item.items(): + if ( + method in ["get", "post", "put", "delete", "patch"] + and operation.get("operationId") == target_op + ): + try: + servers = source_desc.get("servers") + if not servers or not isinstance(servers, list): + raise ValueError( + "Missing or invalid 'servers' list in OpenAPI spec." + ) + base_url = servers[0].get("url") + if not base_url or not isinstance(base_url, str): + raise ValueError( + "Missing or invalid 'url' in the first server object." + ) + except (IndexError, ValueError) as e: + raise ValueError( + f"Could not determine base URL from OpenAPI spec servers: {e}" + ) from e + + return { + "source": source_name, + "path": path, + "method": method, + "url": base_url + path, + "operation": operation, + } + + # Default: search all source descriptions for an operation with matching operationId for source_name, source_desc in self.source_descriptions.items(): # Search through paths and operations paths = source_desc.get("paths", {}) diff --git a/runner/tests/executor/test_operation_finder.py b/runner/tests/executor/test_operation_finder.py index 206000a..eb376bd 100644 --- a/runner/tests/executor/test_operation_finder.py +++ b/runner/tests/executor/test_operation_finder.py @@ -371,5 +371,49 @@ def test_get_security_requirements_for_openapi_operation_basic(): ] +def test_find_by_id_with_source_descriptions_reference(): + """Ensure operationId references using $sourceDescriptions.. resolve to the correct source.""" + source_descriptions = { + "pet-coupons": { + "servers": [{"url": "http://pet.example"}], + "paths": { + "/pet/{petId}/coupons": { + "get": {"operationId": "findPetsByTags", "summary": "Find pets"} + } + }, + }, + "other": { + "servers": [{"url": "http://other.example"}], + "paths": {"/items": {"get": {"operationId": "listItems"}}}, + }, + } + + finder = OperationFinder(source_descriptions) + + op_ref = "$sourceDescriptions.pet-coupons.findPetsByTags" + op_info = finder.find_by_id(op_ref) + assert op_info is not None + assert op_info["source"] == "pet-coupons" + assert op_info["path"] == "/pet/{petId}/coupons" + assert op_info["method"] == "get" + + +def test_find_by_id_with_missing_source_falls_back(): + """If the referenced source doesn't exist, find_by_id should fall back to global search or return None.""" + source_descriptions = { + "api": { + "servers": [{"url": "http://localhost"}], + "paths": {"/foo": {"get": {"operationId": "op1"}}}, + } + } + + finder = OperationFinder(source_descriptions) + + # Reference a non-existent source; should not raise, and should try global search (no match -> None) + op_ref = "$sourceDescriptions.nonexistent.op1" + op_info = finder.find_by_id(op_ref) + assert op_info is None + + if __name__ == "__main__": unittest.main() From a4f46ac3af131ce7c707a4cbe65e9125405be306 Mon Sep 17 00:00:00 2001 From: Marques Johansson Date: Thu, 28 Aug 2025 10:03:35 -0400 Subject: [PATCH 2/5] feat: support sourceDescriptions with curly braces for operationPath refs --- .../executor/operation_finder.py | 100 +++++++++++++++++- .../tests/executor/test_operation_finder.py | 58 ++++++++++ 2 files changed, 153 insertions(+), 5 deletions(-) diff --git a/runner/arazzo_runner/executor/operation_finder.py b/runner/arazzo_runner/executor/operation_finder.py index 599a463..4bc1e21 100644 --- a/runner/arazzo_runner/executor/operation_finder.py +++ b/runner/arazzo_runner/executor/operation_finder.py @@ -352,6 +352,9 @@ def _extract_path_method_with_regex( # Decode the path (replace ~1 with / and ~0 with ~) decoded_path = encoded_path.replace("~1", "/").replace("~0", "~") + # Normalize leading slashes: decoded_path may start with multiple slashes + # because encoded_path often begins with a leading '/'. Ensure a single leading slash. + decoded_path = "/" + decoded_path.lstrip("/") logger.debug(f"Decoded path: {decoded_path}") # Try to find the operation in the source description @@ -790,11 +793,98 @@ def get_operations_for_workflow(self, workflow: dict) -> list[dict]: if op_info: operations.append(op_info) elif "operationPath" in step: - # operationPath format: # - match = re.match(r"([^#]+)#(.+)", step["operationPath"]) - if match: - source_url, json_pointer = match.groups() - op_info = self.find_by_path(source_url, json_pointer) + # operationPath may be either # or a runtime + # expression referencing a sourceDescription. Examples: + # $sourceDescriptions.#/paths/~1pet~1{petId}/get + # '{$sourceDescriptions.petstoreDescription.url}#/paths/~1pet~1findByStatus/get' + op_path = step["operationPath"] + + if not isinstance(op_path, str): + continue + + # Split into left and json pointer parts if '#' present + if "#" in op_path: + left, json_pointer = op_path.split("#", 1) + else: + left, json_pointer = op_path, "" + + def _evaluate_runtime_expressions(s: str) -> str: + """Evaluate simple runtime expressions wrapped in { }. + + Supported forms: + - $sourceDescriptions. + - $sourceDescriptions..url + Any other expression is left as-is. + """ + out = s + while "{" in out and "}" in out: + i = out.find("{") + j = out.find("}", i) + if j == -1: + break + expr = out[i + 1 : j].strip() + val = None + if expr.startswith("$sourceDescriptions."): + parts = expr.split(".") + if len(parts) >= 2: + src_name = parts[1] + src = self.source_descriptions.get(src_name) + if src: + # support .$url attribute to return the url string + if len(parts) >= 3 and parts[2] == "url": + val = src.get("url") + else: + # default to the source name if no attribute requested + val = src_name + # If we couldn't evaluate, keep the original expr text + if val is None: + val = expr + out = out[:i] + str(val) + out[j + 1 :] + return out + + # If left contains a braced expression, evaluate it into a string + resolved_left = left.strip() + if "{" in resolved_left and "}" in resolved_left: + resolved_left = _evaluate_runtime_expressions(resolved_left) + + # If the resolved left still starts with $sourceDescriptions., parse as before + if resolved_left.startswith("$sourceDescriptions."): + parts = resolved_left.split(".", 2) + if len(parts) >= 2: + source_name = parts[1] + op_info = self.find_by_path(source_name, json_pointer) + if op_info: + operations.append(op_info) + continue + + # Otherwise, try to map the resolved_left (which may be a URL or file path) + # back to a source description name by comparing against each source's + # declared `url` attribute. If that fails, fall back to treating + # resolved_left as a source identifier. + source_candidate = None + for name, desc in self.source_descriptions.items(): + src_url = desc.get("url") + if not src_url: + continue + try: + if src_url == resolved_left or resolved_left.endswith(src_url) or src_url in resolved_left or resolved_left in src_url: + source_candidate = name + break + except Exception: + continue + + if source_candidate: + op_info = self.find_by_path(source_candidate, json_pointer) if op_info: operations.append(op_info) + else: + # Fallback: treat resolved_left as a source identifier or URL + match = re.match(r"([^#]+)#?(.+)?", resolved_left) + if match: + source_url = match.group(1) + # prefer json_pointer from the original op_path split if present + ptr = json_pointer + op_info = self.find_by_path(source_url, ptr) + if op_info: + operations.append(op_info) return operations diff --git a/runner/tests/executor/test_operation_finder.py b/runner/tests/executor/test_operation_finder.py index eb376bd..0776bc8 100644 --- a/runner/tests/executor/test_operation_finder.py +++ b/runner/tests/executor/test_operation_finder.py @@ -415,5 +415,63 @@ def test_find_by_id_with_missing_source_falls_back(): assert op_info is None +def test_get_operations_for_workflow_with_operationPath_runtime_expression(): + """Ensure get_operations_for_workflow handles operationPath runtime expressions + that reference a sourceDescriptions entry combined with a JSON Pointer. + """ + source_descriptions = { + "pet-coupons": { + "servers": [{"url": "http://pet.example"}], + "paths": { + "/pet/findByTags": { + "get": {"operationId": "findPetsByTags", "summary": "Find pets"} + } + }, + } + } + + finder = OperationFinder(source_descriptions) + + wf = {"steps": [{"operationPath": "$sourceDescriptions.pet-coupons#/paths/~1pet~1findByTags/get"}]} + + ops = finder.get_operations_for_workflow(wf) + assert isinstance(ops, list) + assert len(ops) == 1 + op = ops[0] + assert op["source"] == "pet-coupons" + assert op["path"] == "/pet/findByTags" + assert op["method"] == "get" + + +def test_get_operations_for_workflow_with_braced_runtime_expression(): + """Ensure get_operations_for_workflow evaluates braced runtime expressions + embedded in operationPath, e.g. '{$sourceDescriptions.pet-coupons.url}#/paths/~1pet~1findByTags/get' + """ + source_descriptions = { + "pet-coupons": { + "url": "pet.example", # using a simple url attribute to exercise .url evaluation + "servers": [{"url": "http://pet.example"}], + "paths": { + "/pet/findByTags": { + "get": {"operationId": "findPetsByTags", "summary": "Find pets"} + } + }, + } + } + + finder = OperationFinder(source_descriptions) + + # Example operationPath with braced runtime expression referencing the sourceDescriptions url + wf = {"steps": [{"operationPath": "{$sourceDescriptions.pet-coupons.url}#/paths/~1pet~1findByTags/get"}]} + + ops = finder.get_operations_for_workflow(wf) + assert isinstance(ops, list) + assert len(ops) == 1 + op = ops[0] + assert op["source"] == "pet-coupons" + assert op["path"] == "/pet/findByTags" + assert op["method"] == "get" + + if __name__ == "__main__": unittest.main() From fe220448df3fd43cf098c1e40a4375aa66f07f70 Mon Sep 17 00:00:00 2001 From: Marques Johansson Date: Thu, 28 Aug 2025 16:53:35 -0400 Subject: [PATCH 3/5] use ExpressionEvaluator in sourceDescriptions evals Signed-off-by: Marques Johansson --- .../executor/operation_finder.py | 54 +++++++------------ 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/runner/arazzo_runner/executor/operation_finder.py b/runner/arazzo_runner/executor/operation_finder.py index 4bc1e21..158b686 100644 --- a/runner/arazzo_runner/executor/operation_finder.py +++ b/runner/arazzo_runner/executor/operation_finder.py @@ -12,6 +12,8 @@ import jsonpointer from arazzo_runner.auth.models import SecurityOption, SecurityRequirement +from arazzo_runner.evaluator import ExpressionEvaluator +from arazzo_runner.models import ExecutionState # Configure logging logger = logging.getLogger("arazzo-runner.executor") @@ -808,44 +810,24 @@ def get_operations_for_workflow(self, workflow: dict) -> list[dict]: else: left, json_pointer = op_path, "" - def _evaluate_runtime_expressions(s: str) -> str: - """Evaluate simple runtime expressions wrapped in { }. - - Supported forms: - - $sourceDescriptions. - - $sourceDescriptions..url - Any other expression is left as-is. - """ - out = s - while "{" in out and "}" in out: - i = out.find("{") - j = out.find("}", i) - if j == -1: - break - expr = out[i + 1 : j].strip() - val = None - if expr.startswith("$sourceDescriptions."): - parts = expr.split(".") - if len(parts) >= 2: - src_name = parts[1] - src = self.source_descriptions.get(src_name) - if src: - # support .$url attribute to return the url string - if len(parts) >= 3 and parts[2] == "url": - val = src.get("url") - else: - # default to the source name if no attribute requested - val = src_name - # If we couldn't evaluate, keep the original expr text - if val is None: - val = expr - out = out[:i] + str(val) + out[j + 1 :] - return out - - # If left contains a braced expression, evaluate it into a string + # If left contains a braced expression, evaluate it using the shared ExpressionEvaluator resolved_left = left.strip() if "{" in resolved_left and "}" in resolved_left: - resolved_left = _evaluate_runtime_expressions(resolved_left) + # Use a minimal ExecutionState for expression evaluation (only sourceDescriptions are needed here) + eval_state = ExecutionState(workflow_id="__internal__") + + def _replace_braced(match: re.Match) -> str: + expr = match.group(1) + try: + val = ExpressionEvaluator.evaluate_expression( + expr, eval_state, self.source_descriptions + ) + except Exception: + val = None + # Coerce to string for embedding into operationPath; keep 'None' as empty + return "" if val is None else str(val) + + resolved_left = re.sub(r"\{(\$[^}]+)\}", _replace_braced, resolved_left) # If the resolved left still starts with $sourceDescriptions., parse as before if resolved_left.startswith("$sourceDescriptions."): From 7691a5b36ad62f4216b35b883c4901e42e47621d Mon Sep 17 00:00:00 2001 From: Marques Johansson Date: Thu, 28 Aug 2025 17:56:26 -0400 Subject: [PATCH 4/5] optimize for line-count in handling sourceDescriptions Signed-off-by: Marques Johansson --- .../executor/operation_finder.py | 88 ++++++++----------- 1 file changed, 37 insertions(+), 51 deletions(-) diff --git a/runner/arazzo_runner/executor/operation_finder.py b/runner/arazzo_runner/executor/operation_finder.py index 158b686..4557758 100644 --- a/runner/arazzo_runner/executor/operation_finder.py +++ b/runner/arazzo_runner/executor/operation_finder.py @@ -795,78 +795,64 @@ def get_operations_for_workflow(self, workflow: dict) -> list[dict]: if op_info: operations.append(op_info) elif "operationPath" in step: - # operationPath may be either # or a runtime - # expression referencing a sourceDescription. Examples: - # $sourceDescriptions.#/paths/~1pet~1{petId}/get - # '{$sourceDescriptions.petstoreDescription.url}#/paths/~1pet~1findByStatus/get' + # operationPath may be # or a runtime expression + # referencing a sourceDescription. We evaluate any braced expressions + # using the shared ExpressionEvaluator and then map the resolved + # left-hand value to a source name (by name or by matching the + # source's declared `url`) before delegating to find_by_path. op_path = step["operationPath"] - if not isinstance(op_path, str): continue - # Split into left and json pointer parts if '#' present - if "#" in op_path: - left, json_pointer = op_path.split("#", 1) - else: - left, json_pointer = op_path, "" - - # If left contains a braced expression, evaluate it using the shared ExpressionEvaluator + left, json_pointer = (op_path.split("#", 1) + [""])[0:2] resolved_left = left.strip() + + # Evaluate any embedded braced runtime expressions with the + # project's ExpressionEvaluator to preserve semantics. if "{" in resolved_left and "}" in resolved_left: - # Use a minimal ExecutionState for expression evaluation (only sourceDescriptions are needed here) eval_state = ExecutionState(workflow_id="__internal__") - def _replace_braced(match: re.Match) -> str: - expr = match.group(1) + def _repl(m: re.Match) -> str: + expr = m.group(1) try: val = ExpressionEvaluator.evaluate_expression( expr, eval_state, self.source_descriptions ) except Exception: val = None - # Coerce to string for embedding into operationPath; keep 'None' as empty return "" if val is None else str(val) - resolved_left = re.sub(r"\{(\$[^}]+)\}", _replace_braced, resolved_left) + resolved_left = re.sub(r"\{(\$[^}]+)\}", _repl, resolved_left) - # If the resolved left still starts with $sourceDescriptions., parse as before + source_name = None + # Direct $sourceDescriptions. reference if resolved_left.startswith("$sourceDescriptions."): parts = resolved_left.split(".", 2) if len(parts) >= 2: source_name = parts[1] - op_info = self.find_by_path(source_name, json_pointer) - if op_info: - operations.append(op_info) - continue - - # Otherwise, try to map the resolved_left (which may be a URL or file path) - # back to a source description name by comparing against each source's - # declared `url` attribute. If that fails, fall back to treating - # resolved_left as a source identifier. - source_candidate = None - for name, desc in self.source_descriptions.items(): - src_url = desc.get("url") - if not src_url: - continue - try: - if src_url == resolved_left or resolved_left.endswith(src_url) or src_url in resolved_left or resolved_left in src_url: - source_candidate = name + else: + # Try to match by declared url or by exact name + for name, desc in self.source_descriptions.items(): + src_url = desc.get("url") + if src_url and ( + src_url == resolved_left + or resolved_left.endswith(src_url) + or src_url in resolved_left + or resolved_left in src_url + ): + source_name = name break - except Exception: - continue - - if source_candidate: - op_info = self.find_by_path(source_candidate, json_pointer) - if op_info: - operations.append(op_info) + if source_name is None and resolved_left in self.source_descriptions: + source_name = resolved_left + + # Prefer using the resolved source_name, otherwise fall back to + # using the resolved_left directly (it may be a URL or identifier). + op_info = None + if source_name: + op_info = self.find_by_path(source_name, json_pointer) else: - # Fallback: treat resolved_left as a source identifier or URL - match = re.match(r"([^#]+)#?(.+)?", resolved_left) - if match: - source_url = match.group(1) - # prefer json_pointer from the original op_path split if present - ptr = json_pointer - op_info = self.find_by_path(source_url, ptr) - if op_info: - operations.append(op_info) + op_info = self.find_by_path(resolved_left, json_pointer) + + if op_info: + operations.append(op_info) return operations From 66ba2e964f7dea9e537b6d48817536a5ad385b47 Mon Sep 17 00:00:00 2001 From: Marques Johansson Date: Thu, 28 Aug 2025 18:08:27 -0400 Subject: [PATCH 5/5] optimize readbility and diff-size in sourceDescriptions handling Signed-off-by: Marques Johansson --- .../executor/operation_finder.py | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/runner/arazzo_runner/executor/operation_finder.py b/runner/arazzo_runner/executor/operation_finder.py index 4557758..db8b70a 100644 --- a/runner/arazzo_runner/executor/operation_finder.py +++ b/runner/arazzo_runner/executor/operation_finder.py @@ -807,12 +807,11 @@ def get_operations_for_workflow(self, workflow: dict) -> list[dict]: left, json_pointer = (op_path.split("#", 1) + [""])[0:2] resolved_left = left.strip() - # Evaluate any embedded braced runtime expressions with the - # project's ExpressionEvaluator to preserve semantics. + # Evaluate embedded braced runtime expressions using central evaluator if "{" in resolved_left and "}" in resolved_left: eval_state = ExecutionState(workflow_id="__internal__") - def _repl(m: re.Match) -> str: + def _eval_braced(m: re.Match) -> str: expr = m.group(1) try: val = ExpressionEvaluator.evaluate_expression( @@ -822,37 +821,32 @@ def _repl(m: re.Match) -> str: val = None return "" if val is None else str(val) - resolved_left = re.sub(r"\{(\$[^}]+)\}", _repl, resolved_left) + resolved_left = re.sub(r"\{(\$[^}]+)\}", _eval_braced, resolved_left) + # Resolve a source name: prefer explicit $sourceDescriptions., + # otherwise match by declared url or exact name source_name = None - # Direct $sourceDescriptions. reference if resolved_left.startswith("$sourceDescriptions."): parts = resolved_left.split(".", 2) - if len(parts) >= 2: - source_name = parts[1] + source_name = parts[1] if len(parts) >= 2 else None else: - # Try to match by declared url or by exact name - for name, desc in self.source_descriptions.items(): - src_url = desc.get("url") - if src_url and ( - src_url == resolved_left - or resolved_left.endswith(src_url) - or src_url in resolved_left - or resolved_left in src_url - ): - source_name = name - break + source_name = next( + ( + name + for name, desc in self.source_descriptions.items() + if desc.get("url") and ( + desc.get("url") == resolved_left + or resolved_left.endswith(desc.get("url")) + or desc.get("url") in resolved_left + or resolved_left in desc.get("url") + ) + ), + None, + ) if source_name is None and resolved_left in self.source_descriptions: source_name = resolved_left - # Prefer using the resolved source_name, otherwise fall back to - # using the resolved_left directly (it may be a URL or identifier). - op_info = None - if source_name: - op_info = self.find_by_path(source_name, json_pointer) - else: - op_info = self.find_by_path(resolved_left, json_pointer) - + op_info = self.find_by_path(source_name or resolved_left, json_pointer) if op_info: operations.append(op_info) return operations