From c0319ec673053bfe094332cd91c7cfc6c1e5a9a8 Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:32:02 +0000 Subject: [PATCH 1/8] Fix for file dict in multipart payload processing. Sanitised output. Go to step in runner. --- runner/arazzo_runner/__main__.py | 35 +++++++++++- .../executor/parameter_processor.py | 27 +++++++++ runner/arazzo_runner/http.py | 35 ++++++++++-- runner/arazzo_runner/models.py | 2 + runner/arazzo_runner/runner.py | 57 ++++++++++++++++--- 5 files changed, 142 insertions(+), 14 deletions(-) diff --git a/runner/arazzo_runner/__main__.py b/runner/arazzo_runner/__main__.py index 3bb0a83..d963e39 100644 --- a/runner/arazzo_runner/__main__.py +++ b/runner/arazzo_runner/__main__.py @@ -15,6 +15,31 @@ logger = logging.getLogger("arazzo-runner-cli") +# Maximum length for string values in printed workflow result (avoids dumping HTML/large bodies) +_MAX_DISPLAY_STRING_LEN = 300 + +# Keys that typically hold HTML or large binary/text content - show only length in output +_OMIT_CONTENT_KEYS = frozenset( + {"htmlContent", "fileContent", "body", "content", "responseBody", "data"} +) + + +def _truncate_for_display( + obj: Any, max_len: int = _MAX_DISPLAY_STRING_LEN, parent_key: str = "" +) -> Any: + """Recursively truncate long strings and binary in dicts/lists for terminal display.""" + if isinstance(obj, bytes): + return f"<{len(obj)} bytes>" + if isinstance(obj, str): + if parent_key in _OMIT_CONTENT_KEYS or len(obj) > max_len: + return f"<{len(obj)} chars>" + return obj + if isinstance(obj, dict): + return {k: _truncate_for_display(v, max_len, k) for k, v in obj.items()} + if isinstance(obj, list): + return [_truncate_for_display(v, max_len, parent_key) for v in obj] + return obj + def parse_inputs(inputs_str: str) -> dict[str, Any]: """Parse input string into a dictionary.""" @@ -238,9 +263,15 @@ async def handle_execute_workflow(runner: ArazzoRunner | None, args: argparse.Na logger.error(f"Failed to execute workflow: {e}", exc_info=True) sys.exit(1) - # Print outputs and determine success/failure + # Print outputs and determine success/failure (truncate long strings e.g. HTML) print(f"\n=== Completed workflow: {args.workflow_id} ===") - print(f"Outputs: {result}") + display_outputs = _truncate_for_display(result.outputs) if result.outputs else result.outputs + display_step_outputs = ( + _truncate_for_display(result.step_outputs) if result.step_outputs else result.step_outputs + ) + print(f"Outputs: {display_outputs}") + if display_step_outputs: + print(f"Step outputs: {display_step_outputs}") # Check for failure in outputs (if possible) try: diff --git a/runner/arazzo_runner/executor/parameter_processor.py b/runner/arazzo_runner/executor/parameter_processor.py index dbbdce2..745dcc5 100644 --- a/runner/arazzo_runner/executor/parameter_processor.py +++ b/runner/arazzo_runner/executor/parameter_processor.py @@ -95,6 +95,23 @@ def _process_multipart_payload(self, payload: dict[str, Any]) -> dict[str, Any]: # Handle blob references if isinstance(value, dict) and "blob_ref" in value: processed_payload[key] = self._rehydrate_blob_reference(value, key) + # Handle already-formatted file dicts (has "content" and "file_name"/"filename") + # This must come before the generic dict check to avoid JSON serialization + elif isinstance(value, dict) and "content" in value: + # Check if this is already a file dict (has file_name or filename) + has_file_name = "file_name" in value or "filename" in value + if has_file_name: + logger.debug(f"File dict already formatted for field '{key}', using as-is.") + # Ensure file_name exists (even if None, it will be set to default in HTTP executor) + if "file_name" not in value: + value["file_name"] = value.get("filename") + processed_payload[key] = value + else: + # Dict with "content" but no file_name - might be a regular dict, serialize it + try: + processed_payload[key] = json.dumps(value, separators=(",", ":")) + except (TypeError, ValueError): + processed_payload[key] = str(value) elif isinstance(value, bytes | bytearray): logger.debug(f"Wrapping binary data in field '{key}' for multipart upload.") processed_payload[key] = { @@ -579,6 +596,16 @@ def prepare_parameters(self, step: dict, state: ExecutionState) -> dict[str, Any value = ExpressionEvaluator.evaluate_expression( value, state, self.source_descriptions ) + elif re.search(r"\$inputs\.\w+|\$steps\.\w+", value): + # Substitute $inputs.x and $steps.stepId.outputs.x inside strings (e.g. q param) + def replace_embedded(match): + expr = match.group(0) + eval_val = ExpressionEvaluator.evaluate_expression( + expr, state, self.source_descriptions + ) + return "" if eval_val is None else str(eval_val) + + value = re.sub(r"\$inputs\.[\w.]+|\$steps\.[\w.]+", replace_embedded, value) elif "{" in value and "}" in value: # Template with expressions def replace_expr(match): diff --git a/runner/arazzo_runner/http.py b/runner/arazzo_runner/http.py index 52d5cd6..b762c6b 100644 --- a/runner/arazzo_runner/http.py +++ b/runner/arazzo_runner/http.py @@ -187,14 +187,41 @@ def execute_request( data = {} for key, value in payload.items(): # A field is treated as a file upload if its value is an object - # containing 'content' and 'filename' keys. - if isinstance(value, dict) and "content" in value and "filename" in value: + # containing 'content' and either 'file_name' or 'filename' keys. + # Support both 'file_name' (new) and 'filename' (legacy) for backward compatibility + has_file_name = ( + isinstance(value, dict) + and "content" in value + and ("file_name" in value or "filename" in value) + ) + if has_file_name: # requests expects a tuple: (filename, file_data, content_type) file_content = value["content"] - file_name = value["filename"] if value.get("filename") else "attachment" + # Support both 'file_name' (preferred) and 'filename' (legacy) + file_name = value.get("file_name") or value.get("filename") or "attachment" file_type = value.get("contentType", "application/octet-stream") + + # Validate that file_content is bytes/bytearray + if not isinstance(file_content, bytes | bytearray): + if file_content is None: + logger.error( + f"File content for field '{key}' is None. Cannot upload file." + ) + raise ValueError( + f"File content for field '{key}' is None. Ensure the file content expression evaluates to bytes." + ) + else: + logger.error( + f"File content for field '{key}' is {type(file_content).__name__}, expected bytes. Value: {str(file_content)[:100]}" + ) + raise ValueError( + f"File content for field '{key}' must be bytes or bytearray, got {type(file_content).__name__}" + ) + files[key] = (file_name, file_content, file_type) - logger.debug(f"Preparing file '{file_name}' for upload.") + logger.debug( + f"Preparing file '{file_name}' for upload ({len(file_content)} bytes)." + ) elif isinstance(value, bytes | bytearray): # Fallback: treat raw bytes as a file with a generic name files[key] = ("attachment", value, "application/octet-stream") diff --git a/runner/arazzo_runner/models.py b/runner/arazzo_runner/models.py index e94e504..ea5ae66 100644 --- a/runner/arazzo_runner/models.py +++ b/runner/arazzo_runner/models.py @@ -81,6 +81,8 @@ class ExecutionState: workflow_id: str current_step_id: str | None = None + """When set after a GOTO, the next step to run is this one (not the step after it).""" + pending_goto_step_id: str | None = None inputs: dict[str, Any] = None step_outputs: dict[str, dict[str, Any]] = None workflow_outputs: dict[str, Any] = None diff --git a/runner/arazzo_runner/runner.py b/runner/arazzo_runner/runner.py index 8b0a0f7..558b49a 100644 --- a/runner/arazzo_runner/runner.py +++ b/runner/arazzo_runner/runner.py @@ -193,8 +193,21 @@ def register_callback(self, event_type: str, callback: Callable[..., None]) -> N else: logger.warning(f"Unknown event type: {event_type}") + def _sanitize_for_json(self, obj: Any) -> Any: + """Recursively replace non-JSON-serializable values (e.g. bytes) with placeholders.""" + if isinstance(obj, bytes | bytearray): + return f"<{len(obj)} bytes>" + if isinstance(obj, dict): + return {k: self._sanitize_for_json(v) for k, v in obj.items()} + if isinstance(obj, list): + return [self._sanitize_for_json(v) for v in obj] + return obj + def _trigger_event(self, event_type: str, **kwargs: Any) -> None: """Trigger registered callbacks for an event""" + # Sanitize outputs for step_complete so callbacks can safely json.dumps them + if event_type == "step_complete" and "outputs" in kwargs and kwargs["outputs"]: + kwargs = {**kwargs, "outputs": self._sanitize_for_json(kwargs["outputs"])} for callback in self.event_callbacks.get(event_type, []): try: callback(**kwargs) @@ -405,7 +418,15 @@ def execute_next_step(self, execution_id: str) -> dict[str, Any]: next_step = None next_step_idx = 0 - if state.current_step_id is None: + if state.pending_goto_step_id is not None: + # After a GOTO, run the target step (not the step after current) + for idx, step in enumerate(steps): + if step.get("stepId") == state.pending_goto_step_id: + next_step = step + next_step_idx = idx + state.pending_goto_step_id = None + break + elif state.current_step_id is None: # First step in the workflow if steps: next_step = steps[0] @@ -552,17 +573,29 @@ def execute_next_step(self, execution_id: str) -> dict[str, Any]: } elif "step_id" in next_action: # Go to a specific step in the current workflow - # Find the step index + target_step_id = next_action["step_id"] for idx, step in enumerate(steps): - if step.get("stepId") == next_action["step_id"]: + if step.get("stepId") == target_step_id: next_step_idx = idx break - - # Update current step - state.current_step_id = steps[next_step_idx].get("stepId") + else: + logger.warning( + f"GOTO target step '{target_step_id}' not found, continuing to next step" + ) + state.current_step_id = step_id + return { + "status": WorkflowExecutionStatus.STEP_COMPLETE, + "step_id": step_id, + "success": success, + "outputs": step_result.get("outputs", {}), + } + + # Next run should execute the target step, not the step after it + state.pending_goto_step_id = steps[next_step_idx].get("stepId") + state.current_step_id = step_id # step we just completed return { "status": WorkflowExecutionStatus.GOTO_STEP, - "step_id": state.current_step_id, + "step_id": state.pending_goto_step_id, } elif next_action["type"] == ActionType.RETRY: # Retry the current step @@ -793,10 +826,18 @@ def generate_env_mappings( - 'auth': Environment variable mappings for authentication - 'servers': Environment variable mappings for server URLs (only included if server variables exist) """ + # Normalize: accept single doc or list; auth_processor expects list of doc dicts + if arazzo_docs is None: + arazzo_specs: list[dict[str, Any]] = [] + elif isinstance(arazzo_docs, dict) and "workflows" in arazzo_docs: + arazzo_specs = [arazzo_docs] + else: + arazzo_specs = list(arazzo_docs) if arazzo_docs else [] + auth_processor = AuthProcessor() auth_config = auth_processor.process_api_auth( openapi_specs=source_descriptions or {}, - arazzo_specs=arazzo_docs or [], + arazzo_specs=arazzo_specs, ) auth_env_mappings = auth_config.get("env_mappings", {}) From c324494c95851d11b03dc7191adb9e53b81dd296 Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Tue, 10 Mar 2026 17:01:28 +0000 Subject: [PATCH 2/8] Test config now uses go to steps so more API calls than previous config expectation. --- runner/tests/fixtures/bnpl/test_config.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runner/tests/fixtures/bnpl/test_config.yaml b/runner/tests/fixtures/bnpl/test_config.yaml index f1623f3..42fe523 100644 --- a/runner/tests/fixtures/bnpl/test_config.yaml +++ b/runner/tests/fixtures/bnpl/test_config.yaml @@ -51,8 +51,9 @@ workflows: "customer": "https://api.bnpl-example.com/bnpl/v1/customers/CUST1001" "redirectAuthToken": "eda8c851-f36e-4d78-b832-2d1411e1414b" "loanTransactionResourceUrl": "https://api.bnpl-example.com/bnpl/v1/loan-transactions/LOAN1001" - # The workflow makes 4 API calls because the auth step is skipped - expected_api_calls: 4 + # With GOTO handling: 6 API calls (products, terms, customers, loan-transactions, + # retrieveFinalizedPaymentPlan, updateOrderStatus); auth step is skipped. + expected_api_calls: 6 # Despite the issues, all steps that run actually succeed expect_success: true custom_mocks: From a4220b6fc81093ede6b8961f135a324846242906 Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Wed, 11 Mar 2026 11:07:23 +0000 Subject: [PATCH 3/8] Revert changes to separate PR concerns. --- runner/arazzo_runner/models.py | 4 +-- runner/arazzo_runner/runner.py | 50 ++++++---------------------------- 2 files changed, 9 insertions(+), 45 deletions(-) diff --git a/runner/arazzo_runner/models.py b/runner/arazzo_runner/models.py index ea5ae66..d62a815 100644 --- a/runner/arazzo_runner/models.py +++ b/runner/arazzo_runner/models.py @@ -81,8 +81,6 @@ class ExecutionState: workflow_id: str current_step_id: str | None = None - """When set after a GOTO, the next step to run is this one (not the step after it).""" - pending_goto_step_id: str | None = None inputs: dict[str, Any] = None step_outputs: dict[str, dict[str, Any]] = None workflow_outputs: dict[str, Any] = None @@ -132,4 +130,4 @@ class RuntimeParams(BaseModel): servers: dict[str, str] | None = Field( default=None, description="Server variable overrides for server resolution." - ) + ) \ No newline at end of file diff --git a/runner/arazzo_runner/runner.py b/runner/arazzo_runner/runner.py index 558b49a..3c9e19a 100644 --- a/runner/arazzo_runner/runner.py +++ b/runner/arazzo_runner/runner.py @@ -193,21 +193,8 @@ def register_callback(self, event_type: str, callback: Callable[..., None]) -> N else: logger.warning(f"Unknown event type: {event_type}") - def _sanitize_for_json(self, obj: Any) -> Any: - """Recursively replace non-JSON-serializable values (e.g. bytes) with placeholders.""" - if isinstance(obj, bytes | bytearray): - return f"<{len(obj)} bytes>" - if isinstance(obj, dict): - return {k: self._sanitize_for_json(v) for k, v in obj.items()} - if isinstance(obj, list): - return [self._sanitize_for_json(v) for v in obj] - return obj - def _trigger_event(self, event_type: str, **kwargs: Any) -> None: """Trigger registered callbacks for an event""" - # Sanitize outputs for step_complete so callbacks can safely json.dumps them - if event_type == "step_complete" and "outputs" in kwargs and kwargs["outputs"]: - kwargs = {**kwargs, "outputs": self._sanitize_for_json(kwargs["outputs"])} for callback in self.event_callbacks.get(event_type, []): try: callback(**kwargs) @@ -418,15 +405,7 @@ def execute_next_step(self, execution_id: str) -> dict[str, Any]: next_step = None next_step_idx = 0 - if state.pending_goto_step_id is not None: - # After a GOTO, run the target step (not the step after current) - for idx, step in enumerate(steps): - if step.get("stepId") == state.pending_goto_step_id: - next_step = step - next_step_idx = idx - state.pending_goto_step_id = None - break - elif state.current_step_id is None: + if state.current_step_id is None: # First step in the workflow if steps: next_step = steps[0] @@ -573,29 +552,17 @@ def execute_next_step(self, execution_id: str) -> dict[str, Any]: } elif "step_id" in next_action: # Go to a specific step in the current workflow - target_step_id = next_action["step_id"] + # Find the step index for idx, step in enumerate(steps): - if step.get("stepId") == target_step_id: + if step.get("stepId") == next_action["step_id"]: next_step_idx = idx break - else: - logger.warning( - f"GOTO target step '{target_step_id}' not found, continuing to next step" - ) - state.current_step_id = step_id - return { - "status": WorkflowExecutionStatus.STEP_COMPLETE, - "step_id": step_id, - "success": success, - "outputs": step_result.get("outputs", {}), - } - - # Next run should execute the target step, not the step after it - state.pending_goto_step_id = steps[next_step_idx].get("stepId") - state.current_step_id = step_id # step we just completed + + # Update current step + state.current_step_id = steps[next_step_idx].get("stepId") return { "status": WorkflowExecutionStatus.GOTO_STEP, - "step_id": state.pending_goto_step_id, + "step_id": state.current_step_id, } elif next_action["type"] == ActionType.RETRY: # Retry the current step @@ -833,7 +800,6 @@ def generate_env_mappings( arazzo_specs = [arazzo_docs] else: arazzo_specs = list(arazzo_docs) if arazzo_docs else [] - auth_processor = AuthProcessor() auth_config = auth_processor.process_api_auth( openapi_specs=source_descriptions or {}, @@ -846,4 +812,4 @@ def generate_env_mappings( result = {"auth": auth_env_mappings} if server_env_mappings: result["servers"] = server_env_mappings - return result + return result \ No newline at end of file From 5440f082059e81eade759abc38f7360e257ae565 Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Thu, 12 Mar 2026 10:59:23 +0000 Subject: [PATCH 4/8] Revert sanitisation change for other PR. --- runner/arazzo_runner/__main__.py | 37 +++----------------------------- 1 file changed, 3 insertions(+), 34 deletions(-) diff --git a/runner/arazzo_runner/__main__.py b/runner/arazzo_runner/__main__.py index d963e39..34fbefa 100644 --- a/runner/arazzo_runner/__main__.py +++ b/runner/arazzo_runner/__main__.py @@ -15,31 +15,6 @@ logger = logging.getLogger("arazzo-runner-cli") -# Maximum length for string values in printed workflow result (avoids dumping HTML/large bodies) -_MAX_DISPLAY_STRING_LEN = 300 - -# Keys that typically hold HTML or large binary/text content - show only length in output -_OMIT_CONTENT_KEYS = frozenset( - {"htmlContent", "fileContent", "body", "content", "responseBody", "data"} -) - - -def _truncate_for_display( - obj: Any, max_len: int = _MAX_DISPLAY_STRING_LEN, parent_key: str = "" -) -> Any: - """Recursively truncate long strings and binary in dicts/lists for terminal display.""" - if isinstance(obj, bytes): - return f"<{len(obj)} bytes>" - if isinstance(obj, str): - if parent_key in _OMIT_CONTENT_KEYS or len(obj) > max_len: - return f"<{len(obj)} chars>" - return obj - if isinstance(obj, dict): - return {k: _truncate_for_display(v, max_len, k) for k, v in obj.items()} - if isinstance(obj, list): - return [_truncate_for_display(v, max_len, parent_key) for v in obj] - return obj - def parse_inputs(inputs_str: str) -> dict[str, Any]: """Parse input string into a dictionary.""" @@ -263,15 +238,9 @@ async def handle_execute_workflow(runner: ArazzoRunner | None, args: argparse.Na logger.error(f"Failed to execute workflow: {e}", exc_info=True) sys.exit(1) - # Print outputs and determine success/failure (truncate long strings e.g. HTML) + # Print outputs and determine success/failure print(f"\n=== Completed workflow: {args.workflow_id} ===") - display_outputs = _truncate_for_display(result.outputs) if result.outputs else result.outputs - display_step_outputs = ( - _truncate_for_display(result.step_outputs) if result.step_outputs else result.step_outputs - ) - print(f"Outputs: {display_outputs}") - if display_step_outputs: - print(f"Step outputs: {display_step_outputs}") + print(f"Outputs: {result}") # Check for failure in outputs (if possible) try: @@ -545,4 +514,4 @@ def run_main(): if __name__ == "__main__": - run_main() + run_main() \ No newline at end of file From e51cf8213eae1b8015541fed9960ea214263279c Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Thu, 12 Mar 2026 11:04:18 +0000 Subject: [PATCH 5/8] expression substitution resulting in none fix --- runner/arazzo_runner/executor/parameter_processor.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/runner/arazzo_runner/executor/parameter_processor.py b/runner/arazzo_runner/executor/parameter_processor.py index 745dcc5..9a35392 100644 --- a/runner/arazzo_runner/executor/parameter_processor.py +++ b/runner/arazzo_runner/executor/parameter_processor.py @@ -603,7 +603,12 @@ def replace_embedded(match): eval_val = ExpressionEvaluator.evaluate_expression( expr, state, self.source_descriptions ) - return "" if eval_val is None else str(eval_val) + if eval_val is None: + logger.warning( + f"Embedded expression {expr} evaluated to None - keeping original substring" + ) + return expr + return str(eval_val) value = re.sub(r"\$inputs\.[\w.]+|\$steps\.[\w.]+", replace_embedded, value) elif "{" in value and "}" in value: From 4c6861ac69e0651bf91dc5a4743ceec621f5f2c4 Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Thu, 12 Mar 2026 13:30:46 +0000 Subject: [PATCH 6/8] Test config reverted. file_name canonical key --- runner/arazzo_runner/http.py | 11 +-- runner/tests/fixtures/bnpl/test_config.yaml | 4 +- runner/tests/test_http_client.py | 100 +++++++++++++++++++- 3 files changed, 103 insertions(+), 12 deletions(-) diff --git a/runner/arazzo_runner/http.py b/runner/arazzo_runner/http.py index b762c6b..4f0e2f9 100644 --- a/runner/arazzo_runner/http.py +++ b/runner/arazzo_runner/http.py @@ -187,18 +187,15 @@ def execute_request( data = {} for key, value in payload.items(): # A field is treated as a file upload if its value is an object - # containing 'content' and either 'file_name' or 'filename' keys. - # Support both 'file_name' (new) and 'filename' (legacy) for backward compatibility + # containing 'content' and 'file_name' (canonical key; parameter_processor + # normalizes filename -> file_name before payload reaches here). has_file_name = ( - isinstance(value, dict) - and "content" in value - and ("file_name" in value or "filename" in value) + isinstance(value, dict) and "content" in value and "file_name" in value ) if has_file_name: # requests expects a tuple: (filename, file_data, content_type) file_content = value["content"] - # Support both 'file_name' (preferred) and 'filename' (legacy) - file_name = value.get("file_name") or value.get("filename") or "attachment" + file_name = value.get("file_name") or "attachment" file_type = value.get("contentType", "application/octet-stream") # Validate that file_content is bytes/bytearray diff --git a/runner/tests/fixtures/bnpl/test_config.yaml b/runner/tests/fixtures/bnpl/test_config.yaml index 42fe523..a145550 100644 --- a/runner/tests/fixtures/bnpl/test_config.yaml +++ b/runner/tests/fixtures/bnpl/test_config.yaml @@ -51,9 +51,7 @@ workflows: "customer": "https://api.bnpl-example.com/bnpl/v1/customers/CUST1001" "redirectAuthToken": "eda8c851-f36e-4d78-b832-2d1411e1414b" "loanTransactionResourceUrl": "https://api.bnpl-example.com/bnpl/v1/loan-transactions/LOAN1001" - # With GOTO handling: 6 API calls (products, terms, customers, loan-transactions, - # retrieveFinalizedPaymentPlan, updateOrderStatus); auth step is skipped. - expected_api_calls: 6 + expected_api_calls: 4 # Despite the issues, all steps that run actually succeed expect_success: true custom_mocks: diff --git a/runner/tests/test_http_client.py b/runner/tests/test_http_client.py index cf3572c..cb55e90 100644 --- a/runner/tests/test_http_client.py +++ b/runner/tests/test_http_client.py @@ -320,7 +320,7 @@ def test_apply_auth_multiple_apis(http_client: HTTPExecutor): def test_execute_request_multipart(http_client: HTTPExecutor): - """Test executing a multipart/form-data request.""" + """Test executing a multipart/form-data request (canonical file_name key).""" mock_response = MagicMock() mock_response.status_code = 200 mock_response.headers = {"Content-Type": "application/json"} @@ -331,7 +331,7 @@ def test_execute_request_multipart(http_client: HTTPExecutor): "payload": { "file": { "content": b"file content", - "filename": "test.txt", + "file_name": "test.txt", "contentType": "text/plain", }, "description": "A test file", @@ -717,6 +717,102 @@ def test_execute_request_raw_with_unserializable_payload(http_client: HTTPExecut assert kwargs["headers"]["Content-Type"] == "text/plain" +def test_execute_request_multipart_file_dict_with_file_name(http_client: HTTPExecutor): + """Test multipart upload with file dict using canonical file_name key.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Type": "application/json"} + mock_response.json.return_value = {"status": "ok"} + + request_body = { + "contentType": "multipart/form-data", + "payload": { + "file": { + "content": b"binary file content", + "file_name": "document.pdf", + }, + "description": "A PDF document", + }, + } + + with patch("requests.Session.request", return_value=mock_response) as mock_request: + http_client.execute_request( + method="POST", + url="http://test.com/upload", + parameters={}, + request_body=request_body, + security_options=None, + source_name=None, + ) + + mock_request.assert_called_once() + args, kwargs = mock_request.call_args + assert kwargs["method"] == "POST" + assert kwargs["url"] == "http://test.com/upload" + assert "files" in kwargs + assert kwargs["files"]["file"] == ( + "document.pdf", + b"binary file content", + "application/octet-stream", + ) + assert "data" in kwargs + assert kwargs["data"]["description"] == "A PDF document" + assert "Content-Type" not in kwargs["headers"] + + +def test_execute_request_multipart_file_dict_content_none_raises(http_client: HTTPExecutor): + """Test that a file dict with content=None raises ValueError.""" + request_body = { + "contentType": "multipart/form-data", + "payload": { + "file": { + "content": None, + "file_name": "test.txt", + }, + }, + } + + with pytest.raises(ValueError) as exc_info: + http_client.execute_request( + method="POST", + url="http://test.com/upload", + parameters={}, + request_body=request_body, + security_options=None, + source_name=None, + ) + + assert "File content for field 'file' is None" in str(exc_info.value) + assert "bytes" in str(exc_info.value).lower() + + +def test_execute_request_multipart_file_dict_content_str_raises(http_client: HTTPExecutor): + """Test that a file dict with content as str (non-bytes) raises ValueError.""" + request_body = { + "contentType": "multipart/form-data", + "payload": { + "file": { + "content": "not bytes", + "file_name": "test.txt", + }, + }, + } + + with pytest.raises(ValueError) as exc_info: + http_client.execute_request( + method="POST", + url="http://test.com/upload", + parameters={}, + request_body=request_body, + security_options=None, + source_name=None, + ) + + assert "File content for field 'file'" in str(exc_info.value) + assert "must be bytes or bytearray" in str(exc_info.value) + assert "str" in str(exc_info.value).lower() + + def test_execute_request_multipart_missing_content_key(http_client: HTTPExecutor): """Test multipart processing with a file dict missing 'content': should treat it as regular field.""" mock_response = MagicMock() From f1eb54024c381b9d25a3ef792ec0c912b187d2cd Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Thu, 12 Mar 2026 13:42:27 +0000 Subject: [PATCH 7/8] Param processor test. Lint --- runner/arazzo_runner/__main__.py | 2 +- runner/arazzo_runner/models.py | 2 +- runner/arazzo_runner/runner.py | 2 +- .../executor/test_parameter_processor.py | 26 +++++++++++++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/runner/arazzo_runner/__main__.py b/runner/arazzo_runner/__main__.py index 34fbefa..3bb0a83 100644 --- a/runner/arazzo_runner/__main__.py +++ b/runner/arazzo_runner/__main__.py @@ -514,4 +514,4 @@ def run_main(): if __name__ == "__main__": - run_main() \ No newline at end of file + run_main() diff --git a/runner/arazzo_runner/models.py b/runner/arazzo_runner/models.py index d62a815..e94e504 100644 --- a/runner/arazzo_runner/models.py +++ b/runner/arazzo_runner/models.py @@ -130,4 +130,4 @@ class RuntimeParams(BaseModel): servers: dict[str, str] | None = Field( default=None, description="Server variable overrides for server resolution." - ) \ No newline at end of file + ) diff --git a/runner/arazzo_runner/runner.py b/runner/arazzo_runner/runner.py index 3c9e19a..26646e5 100644 --- a/runner/arazzo_runner/runner.py +++ b/runner/arazzo_runner/runner.py @@ -812,4 +812,4 @@ def generate_env_mappings( result = {"auth": auth_env_mappings} if server_env_mappings: result["servers"] = server_env_mappings - return result \ No newline at end of file + return result diff --git a/runner/tests/executor/test_parameter_processor.py b/runner/tests/executor/test_parameter_processor.py index e42d131..0a25871 100644 --- a/runner/tests/executor/test_parameter_processor.py +++ b/runner/tests/executor/test_parameter_processor.py @@ -467,6 +467,32 @@ def test_prepare_multipart_form_body_with_mixed_types(self): self.assertEqual(result["body"]["contentType"], "multipart/form-data") self.assertEqual(result["body"]["payload"], expected_payload) + def test_process_multipart_payload_preserves_file_dict_not_json_serialized(self): + """Dict with content + file_name (or filename) is treated as file object and preserved, not json.dumps'd.""" + binary_content = b"PDF binary \x00\x01\x02" + payload = { + "file": { + "content": binary_content, + "file_name": "document.pdf", + "contentType": "application/pdf", + }, + "purpose": "ocr", + } + result = self.processor._process_multipart_payload(payload) + self.assertIn("file", result) + out = result["file"] + # Preserved as file dict with canonical file_name + self.assertIsInstance(out, dict) + self.assertIn("content", out) + self.assertIn("file_name", out) + # Content must still be bytes (not JSON-serialized string) + self.assertIsInstance(out["content"], bytes) + self.assertEqual(out["content"], binary_content) + self.assertEqual(out["file_name"], "document.pdf") + self.assertEqual(out["contentType"], "application/pdf") + # Scalar field unchanged + self.assertEqual(result["purpose"], "ocr") + if __name__ == "__main__": unittest.main() From 471bf4ddf47bc3441ce8766dcab59d4b65aeb054 Mon Sep 17 00:00:00 2001 From: sophie-jentic <212357613+sophie-jentic@users.noreply.github.com> Date: Thu, 12 Mar 2026 13:50:01 +0000 Subject: [PATCH 8/8] Use file_name as key name --- runner/arazzo_runner/executor/parameter_processor.py | 2 +- runner/tests/executor/test_parameter_processor.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/runner/arazzo_runner/executor/parameter_processor.py b/runner/arazzo_runner/executor/parameter_processor.py index 9a35392..479e372 100644 --- a/runner/arazzo_runner/executor/parameter_processor.py +++ b/runner/arazzo_runner/executor/parameter_processor.py @@ -116,7 +116,7 @@ def _process_multipart_payload(self, payload: dict[str, Any]) -> dict[str, Any]: logger.debug(f"Wrapping binary data in field '{key}' for multipart upload.") processed_payload[key] = { "content": value, - "filename": "attachment", # Using a generic filename + "file_name": "attachment", "contentType": "application/octet-stream", } else: diff --git a/runner/tests/executor/test_parameter_processor.py b/runner/tests/executor/test_parameter_processor.py index 0a25871..978df7e 100644 --- a/runner/tests/executor/test_parameter_processor.py +++ b/runner/tests/executor/test_parameter_processor.py @@ -388,7 +388,7 @@ def test_prepare_multipart_form_body(self): "payload": { "file": { "content": b"file content", - "filename": "attachment", + "file_name": "attachment", "contentType": "application/octet-stream", }, "description": "this is a file", @@ -408,7 +408,7 @@ def test_prepare_multipart_form_body_with_bytearray(self): "payload": { "file": { "content": bytearray(b"file content"), - "filename": "attachment", + "file_name": "attachment", "contentType": "application/octet-stream", }, "description": "a bytearray file", @@ -453,12 +453,12 @@ def test_prepare_multipart_form_body_with_mixed_types(self): expected_payload = { "file_bytes": { "content": b"this is bytes", - "filename": "attachment", + "file_name": "attachment", "contentType": "application/octet-stream", }, "file_bytearray": { "content": bytearray(b"this is bytearray"), - "filename": "attachment", + "file_name": "attachment", "contentType": "application/octet-stream", }, "description": "mixed payload",