From 3efcda87de0525d5db31523557665e8c8a2f41d8 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 2 Sep 2025 13:57:30 +0200 Subject: [PATCH 01/42] resolve inputs and outputs from a Haystack pipeline YAML definition --- src/hayhooks/server/utils/yaml_utils.py | 125 ++++++++++++++++++ .../yaml/inputs_outputs_pipeline.yml | 59 +++++++++ tests/test_yaml_inputs_outputs.py | 44 ++++++ 3 files changed, 228 insertions(+) create mode 100644 src/hayhooks/server/utils/yaml_utils.py create mode 100644 tests/test_files/yaml/inputs_outputs_pipeline.yml create mode 100644 tests/test_yaml_inputs_outputs.py diff --git a/src/hayhooks/server/utils/yaml_utils.py b/src/hayhooks/server/utils/yaml_utils.py new file mode 100644 index 0000000..31722be --- /dev/null +++ b/src/hayhooks/server/utils/yaml_utils.py @@ -0,0 +1,125 @@ +from typing import Any, Union + +import yaml +from pydantic import BaseModel + + +class BaseInputOutputResolution(BaseModel): + path: str + component: str + name: str + type: Any + + +class InputResolution(BaseInputOutputResolution): + required: bool + + +class OutputResolution(BaseInputOutputResolution): + pass + + +def _normalize_declared_path(value: Any) -> Union[str, None]: + """ + Normalize a declared path value. + + A declared IO path in YAML can be provided either as a string (e.g. "comp.field") + or as a one-item list of strings. This helper normalizes both cases to a single + string, or None if the value cannot be normalized. + + Args: + value: Declared path value from YAML (string or list of strings). + + Returns: + The normalized "component.field" string, or None when not available. + """ + if isinstance(value, list): + return value[0] if value else None + return value + + +def _resolve_declared_io( + declared_map: dict[str, Any], + pipeline_meta: dict[str, dict[str, Any]], +) -> dict[str, BaseInputOutputResolution]: + """ + Resolve declared IO entries using the pipeline metadata. + + Auto-detects input vs output based on metadata: inputs generally expose + "is_mandatory" or "default_value", while outputs do not. + + Args: + declared_map: Mapping from declared IO name to path (string or list). + pipeline_meta: Pipeline metadata as returned by Haystack (inputs/outputs). + + Returns: + A mapping from declared IO name to `InputResolution` or `OutputResolution`. + """ + resolutions: dict[str, BaseInputOutputResolution] = {} + for io_name, declared_path in declared_map.items(): + normalized_path = _normalize_declared_path(declared_path) + if not isinstance(normalized_path, str) or "." not in normalized_path: + continue + + component_name, field_name = normalized_path.split(".", 1) + meta = (pipeline_meta.get(component_name, {}) or {}).get(field_name, {}) or {} + resolved_type = meta.get("type") + + # inputs metadata expose "is_mandatory"/"default_value" + is_input_like = "is_mandatory" in meta or "default_value" in meta + if is_input_like: + resolutions[io_name] = InputResolution( + path=f"{component_name}.{field_name}", + component=component_name, + name=field_name, + type=resolved_type, + required=bool(meta.get("is_mandatory", False)), + ) + else: + resolutions[io_name] = OutputResolution( + path=f"{component_name}.{field_name}", + component=component_name, + name=field_name, + type=resolved_type, + ) + + return resolutions + + +def get_inputs_outputs_from_yaml(yaml_source_code: str) -> dict[str, dict[str, BaseInputOutputResolution]]: + """ + Resolve inputs and outputs from a Haystack pipeline YAML. + + This function aligns the YAML-declared inputs and outputs with the pipeline + metadata returned by Haystack, producing for each declared IO its path, + component, field name, resolved type, and (for inputs) the required flag. + + Args: + yaml_source_code: Pipeline YAML source code. + + Returns: + A dictionary with two keys: "inputs" and "outputs". Each value is a mapping + from the declared IO name to a resolution model (`InputResolution` for inputs, + `OutputResolution` for outputs). + + Raises: + ValueError: If both inputs and outputs are missing from the YAML definition. + """ + yaml_dict = yaml.safe_load(yaml_source_code) or {} + declared_inputs = yaml_dict.get("inputs", {}) or {} + declared_outputs = yaml_dict.get("outputs", {}) or {} + + if not declared_inputs and not declared_outputs: + msg = "YAML pipeline must declare at least one of 'inputs' or 'outputs'." + raise ValueError(msg) + + from haystack import Pipeline + + pipeline = Pipeline.loads(yaml_source_code) + pipeline_inputs = pipeline.inputs() + pipeline_outputs = pipeline.outputs() + + input_resolutions = _resolve_declared_io(declared_inputs, pipeline_inputs) + output_resolutions = _resolve_declared_io(declared_outputs, pipeline_outputs) + + return {"inputs": input_resolutions, "outputs": output_resolutions} diff --git a/tests/test_files/yaml/inputs_outputs_pipeline.yml b/tests/test_files/yaml/inputs_outputs_pipeline.yml new file mode 100644 index 0000000..cb94864 --- /dev/null +++ b/tests/test_files/yaml/inputs_outputs_pipeline.yml @@ -0,0 +1,59 @@ +components: + converter: + type: haystack.components.converters.html.HTMLToDocument + init_parameters: + extraction_kwargs: null + + fetcher: + init_parameters: + raise_on_failure: true + retry_attempts: 2 + timeout: 3 + user_agents: + - haystack/LinkContentFetcher/2.0.0b8 + type: haystack.components.fetchers.link_content.LinkContentFetcher + + llm: + init_parameters: + api_base_url: null + api_key: + env_vars: + - OPENAI_API_KEY + strict: true + type: env_var + generation_kwargs: {} + model: gpt-4o-mini + streaming_callback: null + system_prompt: null + type: haystack.components.generators.openai.OpenAIGenerator + + prompt: + init_parameters: + template: | + "According to the contents of this website: + {% for document in documents %} + {{document.content}} + {% endfor %} + Answer the given question: {{query}} + Answer: + " + type: haystack.components.builders.prompt_builder.PromptBuilder + +connections: + - receiver: converter.sources + sender: fetcher.streams + - receiver: prompt.documents + sender: converter.documents + - receiver: llm.prompt + sender: prompt.prompt + +metadata: {} + +inputs: + urls: + - fetcher.urls + query: + - prompt.query + +outputs: + replies: llm.replies diff --git a/tests/test_yaml_inputs_outputs.py b/tests/test_yaml_inputs_outputs.py new file mode 100644 index 0000000..1d7ccb0 --- /dev/null +++ b/tests/test_yaml_inputs_outputs.py @@ -0,0 +1,44 @@ +from pathlib import Path +from typing import Any + +import pytest + +from hayhooks.server.utils.yaml_utils import InputResolution, OutputResolution, get_inputs_outputs_from_yaml + + +def test_get_inputs_outputs_from_yaml_matches_pipeline_metadata(): + yaml_path = Path(__file__).parent / "test_files" / "yaml" / "inputs_outputs_pipeline.yml" + yaml_source = yaml_path.read_text() + + result = get_inputs_outputs_from_yaml(yaml_source) + + assert set(result.keys()) == {"inputs", "outputs"} + assert set(result["inputs"].keys()) == {"urls", "query"} + assert set(result["outputs"].keys()) == {"replies"} + + assert isinstance(result["inputs"]["urls"], InputResolution) + assert result["inputs"]["urls"].path == "fetcher.urls" + assert result["inputs"]["urls"].component == "fetcher" + assert result["inputs"]["urls"].name == "urls" + assert result["inputs"]["urls"].type == list[str] + assert result["inputs"]["urls"].required is True + + assert isinstance(result["inputs"]["query"], InputResolution) + assert result["inputs"]["query"].path == "prompt.query" + assert result["inputs"]["query"].component == "prompt" + assert result["inputs"]["query"].name == "query" + assert result["inputs"]["query"].type == Any + + assert isinstance(result["outputs"]["replies"], OutputResolution) + assert result["outputs"]["replies"].path == "llm.replies" + assert result["outputs"]["replies"].component == "llm" + assert result["outputs"]["replies"].name == "replies" + assert result["outputs"]["replies"].type == list[str] + + +def test_get_inputs_outputs_from_yaml_raises_when_missing_inputs_outputs(): + yaml_path = Path(__file__).parent / "test_files" / "mixed" / "chat_with_website" / "chat_with_website.yml" + yaml_source = yaml_path.read_text() + + with pytest.raises(ValueError, match="YAML pipeline must declare at least one of 'inputs' or 'outputs'."): + get_inputs_outputs_from_yaml(yaml_source) From a855065bd92ee60d36b4190ef85f84ab194b2801 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 2 Sep 2025 15:48:57 +0200 Subject: [PATCH 02/42] Add method to add YAML pipeline to registry --- src/hayhooks/server/pipelines/models.py | 46 ++++++++++++++++ src/hayhooks/server/utils/deploy_utils.py | 66 +++++++++++++++++++++++ tests/test_deploy_yaml.py | 54 +++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 tests/test_deploy_yaml.py diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index fb27cde..488bbe7 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, ConfigDict, create_model from hayhooks.server.utils.create_valid_type import handle_unsupported_types +from hayhooks.server.utils.yaml_utils import InputResolution, OutputResolution class PipelineDefinition(BaseModel): @@ -76,6 +77,51 @@ def get_response_model(pipeline_name: str, pipeline_outputs: dict[str, dict[str, return create_model(f"{pipeline_name.capitalize()}RunResponse", **response_model, __config__=config) +def get_request_model_from_resolved_io( + pipeline_name: str, declared_inputs: dict[str, InputResolution] +) -> type[BaseModel]: + """ + Create a flat Pydantic request model from declared inputs resolved by yaml_utils. + + Args: + pipeline_name: Name of the pipeline used for model naming. + declared_inputs: Mapping of declared input name to InputResolution. + + Returns: + A Pydantic model with top-level fields matching declared input names. + """ + fields: dict[str, Any] = {} + + for input_name, resolution in declared_inputs.items(): + input_type = resolution.type + default_value = ... if resolution.required else None + fields[input_name] = (input_type, default_value) + + return create_model(f"{pipeline_name.capitalize()}RunRequest", **fields) + + +def get_response_model_from_resolved_io( + pipeline_name: str, declared_outputs: dict[str, OutputResolution] +) -> type[BaseModel]: + """ + Create a flat Pydantic response model from declared outputs resolved by yaml_utils. + + Args: + pipeline_name: Name of the pipeline used for model naming. + declared_outputs: Mapping of declared output name to OutputResolution. + + Returns: + A Pydantic model with top-level fields matching declared output names. + """ + fields: dict[str, Any] = {} + + for output_name, resolution in declared_outputs.items(): + output_type = resolution.type + fields[output_name] = (output_type, ...) + + return create_model(f"{pipeline_name.capitalize()}RunResponse", **fields) + + def convert_value_to_dict(value: Any) -> Union[str, int, float, bool, None, dict[str, Any], list[Any]]: """Convert a single value to a dictionary if possible""" if hasattr(value, "to_dict"): diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index c499800..a9bc7bd 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -29,9 +29,12 @@ PipelineDefinition, convert_component_output, get_request_model, + get_request_model_from_resolved_io, get_response_model, + get_response_model_from_resolved_io, ) from hayhooks.server.utils.base_pipeline_wrapper import BasePipelineWrapper +from hayhooks.server.utils.yaml_utils import get_inputs_outputs_from_yaml from hayhooks.settings import settings @@ -397,6 +400,69 @@ def deploy_pipeline_files( return {"name": pipeline_name} +def add_yaml_pipeline_to_registry( + pipeline_name: str, + source_code: str, + overwrite: bool = False, + description: Optional[str] = None, + skip_mcp: Optional[bool] = False, +) -> None: + """ + Add a YAML pipeline to the registry. + + Args: + pipeline_name: Name of the pipeline to deploy + source_code: Source code of the pipeline + """ + + log.debug(f"Checking if YAML pipeline '{pipeline_name}' already exists: {registry.get(pipeline_name)}") + if registry.get(pipeline_name): + if overwrite: + log.debug(f"Clearing existing YAML pipeline '{pipeline_name}'") + registry.remove(pipeline_name) + else: + msg = f"YAML pipeline '{pipeline_name}' already exists" + raise PipelineAlreadyExistsError(msg) + + clog = log.bind(pipeline_name=pipeline_name, type="yaml") + + clog.debug("Creating request/response models from declared YAML inputs/outputs") + + # Build request/response models from declared YAML inputs/outputs using resolved IO types + try: + resolved_io = get_inputs_outputs_from_yaml(source_code) + + pipeline_inputs = resolved_io.get("inputs", {}) + pipeline_outputs = resolved_io.get("outputs", {}) + + # Prefer resolved IO-based flat models for API schema + request_model = get_request_model_from_resolved_io(pipeline_name, pipeline_inputs) + response_model = get_response_model_from_resolved_io(pipeline_name, pipeline_outputs) + except Exception as e: + clog.error(f"Failed creating request/response models for YAML pipeline: {e!s}") + raise + + metadata = { + "description": description or pipeline_name, + "request_model": request_model, + "response_model": response_model, + "skip_mcp": bool(skip_mcp), + } + + clog.debug(f"Adding YAML pipeline to registry with metadata: {metadata}") + + # Store the instantiated pipeline together with its metadata + try: + from haystack import Pipeline + + pipeline = Pipeline.loads(source_code) + except Exception as e: + msg = f"Unable to parse Haystack Pipeline {pipeline_name}: {e!s}" + raise ValueError(msg) from e + + registry.add(pipeline_name, pipeline, metadata=metadata) + + def add_pipeline_to_registry( pipeline_name: str, files: dict[str, str], save_files: bool = True, overwrite: bool = False ) -> BasePipelineWrapper: diff --git a/tests/test_deploy_yaml.py b/tests/test_deploy_yaml.py new file mode 100644 index 0000000..c66c6a0 --- /dev/null +++ b/tests/test_deploy_yaml.py @@ -0,0 +1,54 @@ +from pathlib import Path + +from hayhooks.server.pipelines.registry import registry +from hayhooks.server.utils.deploy_utils import add_yaml_pipeline_to_registry + + +def test_deploy_pipeline_with_inputs_outputs(): + pipeline_file = Path(__file__).parent / "test_files/yaml/inputs_outputs_pipeline.yml" + pipeline_data = { + "name": pipeline_file.stem, + "source_code": pipeline_file.read_text(), + } + + add_yaml_pipeline_to_registry( + pipeline_name=pipeline_data["name"], + source_code=pipeline_data["source_code"], + ) + + assert registry.get(pipeline_data["name"]) is not None + + metadata = registry.get_metadata(pipeline_data["name"]) + assert metadata is not None + assert metadata["request_model"] is not None + assert metadata["response_model"] is not None + + assert metadata["request_model"].model_json_schema() == { + "properties": { + "urls": { + "title": "Urls", + "type": "array", + "items": {"type": "string"}, + }, + "query": { + "default": None, + "title": "Query", + }, + }, + "required": ["urls"], + "title": "Inputs_outputs_pipelineRunRequest", + "type": "object", + } + + assert metadata["response_model"].model_json_schema() == { + "properties": { + "replies": { + "type": "array", + "items": {"type": "string"}, + "title": "Replies", + }, + }, + "required": ["replies"], + "type": "object", + "title": "Inputs_outputs_pipelineRunResponse", + } From d2ad83313c3eeacc2b3680dc712b6a50e11b315e Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 2 Sep 2025 16:17:58 +0200 Subject: [PATCH 03/42] Better types handling --- src/hayhooks/server/utils/deploy_utils.py | 4 +- src/hayhooks/server/utils/yaml_utils.py | 84 +++++++++++++++-------- 2 files changed, 57 insertions(+), 31 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index a9bc7bd..5fcbbe0 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -432,8 +432,8 @@ def add_yaml_pipeline_to_registry( try: resolved_io = get_inputs_outputs_from_yaml(source_code) - pipeline_inputs = resolved_io.get("inputs", {}) - pipeline_outputs = resolved_io.get("outputs", {}) + pipeline_inputs = resolved_io["inputs"] + pipeline_outputs = resolved_io["outputs"] # Prefer resolved IO-based flat models for API schema request_model = get_request_model_from_resolved_io(pipeline_name, pipeline_inputs) diff --git a/src/hayhooks/server/utils/yaml_utils.py b/src/hayhooks/server/utils/yaml_utils.py index 31722be..04cc743 100644 --- a/src/hayhooks/server/utils/yaml_utils.py +++ b/src/hayhooks/server/utils/yaml_utils.py @@ -1,4 +1,4 @@ -from typing import Any, Union +from typing import Any, TypedDict, Union import yaml from pydantic import BaseModel @@ -19,6 +19,11 @@ class OutputResolution(BaseInputOutputResolution): pass +class ResolvedIO(TypedDict): + inputs: dict[str, InputResolution] + outputs: dict[str, OutputResolution] + + def _normalize_declared_path(value: Any) -> Union[str, None]: """ Normalize a declared path value. @@ -38,24 +43,56 @@ def _normalize_declared_path(value: Any) -> Union[str, None]: return value -def _resolve_declared_io( +def _resolve_declared_inputs( declared_map: dict[str, Any], pipeline_meta: dict[str, dict[str, Any]], -) -> dict[str, BaseInputOutputResolution]: +) -> dict[str, InputResolution]: + """ + Resolve declared input entries using the pipeline metadata. + + Args: + declared_map: Mapping from declared IO name to path (string or list). + pipeline_meta: Pipeline inputs metadata as returned by Haystack. + + Returns: + A mapping from declared IO name to `InputResolution`. """ - Resolve declared IO entries using the pipeline metadata. + resolutions: dict[str, InputResolution] = {} + for io_name, declared_path in declared_map.items(): + normalized_path = _normalize_declared_path(declared_path) + if not isinstance(normalized_path, str) or "." not in normalized_path: + continue - Auto-detects input vs output based on metadata: inputs generally expose - "is_mandatory" or "default_value", while outputs do not. + component_name, field_name = normalized_path.split(".", 1) + meta = (pipeline_meta.get(component_name, {}) or {}).get(field_name, {}) or {} + resolved_type = meta.get("type") + + resolutions[io_name] = InputResolution( + path=f"{component_name}.{field_name}", + component=component_name, + name=field_name, + type=resolved_type, + required=bool(meta.get("is_mandatory", False)), + ) + + return resolutions + + +def _resolve_declared_outputs( + declared_map: dict[str, Any], + pipeline_meta: dict[str, dict[str, Any]], +) -> dict[str, OutputResolution]: + """ + Resolve declared output entries using the pipeline metadata. Args: declared_map: Mapping from declared IO name to path (string or list). - pipeline_meta: Pipeline metadata as returned by Haystack (inputs/outputs). + pipeline_meta: Pipeline outputs metadata as returned by Haystack. Returns: - A mapping from declared IO name to `InputResolution` or `OutputResolution`. + A mapping from declared IO name to `OutputResolution`. """ - resolutions: dict[str, BaseInputOutputResolution] = {} + resolutions: dict[str, OutputResolution] = {} for io_name, declared_path in declared_map.items(): normalized_path = _normalize_declared_path(declared_path) if not isinstance(normalized_path, str) or "." not in normalized_path: @@ -65,28 +102,17 @@ def _resolve_declared_io( meta = (pipeline_meta.get(component_name, {}) or {}).get(field_name, {}) or {} resolved_type = meta.get("type") - # inputs metadata expose "is_mandatory"/"default_value" - is_input_like = "is_mandatory" in meta or "default_value" in meta - if is_input_like: - resolutions[io_name] = InputResolution( - path=f"{component_name}.{field_name}", - component=component_name, - name=field_name, - type=resolved_type, - required=bool(meta.get("is_mandatory", False)), - ) - else: - resolutions[io_name] = OutputResolution( - path=f"{component_name}.{field_name}", - component=component_name, - name=field_name, - type=resolved_type, - ) + resolutions[io_name] = OutputResolution( + path=f"{component_name}.{field_name}", + component=component_name, + name=field_name, + type=resolved_type, + ) return resolutions -def get_inputs_outputs_from_yaml(yaml_source_code: str) -> dict[str, dict[str, BaseInputOutputResolution]]: +def get_inputs_outputs_from_yaml(yaml_source_code: str) -> ResolvedIO: """ Resolve inputs and outputs from a Haystack pipeline YAML. @@ -119,7 +145,7 @@ def get_inputs_outputs_from_yaml(yaml_source_code: str) -> dict[str, dict[str, B pipeline_inputs = pipeline.inputs() pipeline_outputs = pipeline.outputs() - input_resolutions = _resolve_declared_io(declared_inputs, pipeline_inputs) - output_resolutions = _resolve_declared_io(declared_outputs, pipeline_outputs) + input_resolutions = _resolve_declared_inputs(declared_inputs, pipeline_inputs) + output_resolutions = _resolve_declared_outputs(declared_outputs, pipeline_outputs) return {"inputs": input_resolutions, "outputs": output_resolutions} From d55195fb7340804abe896b20493d1ed8962018b7 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 10 Sep 2025 21:53:03 +0200 Subject: [PATCH 04/42] Add deploy_pipeline_yaml ; Add route for YAML pipeline ; Add /deploy-yaml ; refactoring --- src/hayhooks/server/routers/deploy.py | 60 +++++++ src/hayhooks/server/utils/deploy_utils.py | 147 +++++++++++++++++- src/hayhooks/server/utils/mcp_utils.py | 6 +- tests/test_deploy_utils.py | 4 +- .../test_files/yaml/sample_calc_pipeline.yml | 19 +++ tests/test_files/yaml/sample_pipeline.yml | 19 +++ tests/test_it_deploy_yaml_route.py | 48 ++++++ tests/test_it_mcp_server.py | 6 +- tests/test_mcp.py | 8 +- 9 files changed, 302 insertions(+), 15 deletions(-) create mode 100644 tests/test_files/yaml/sample_calc_pipeline.yml create mode 100644 tests/test_files/yaml/sample_pipeline.yml create mode 100644 tests/test_it_deploy_yaml_route.py diff --git a/src/hayhooks/server/routers/deploy.py b/src/hayhooks/server/routers/deploy.py index f22bb61..252811b 100644 --- a/src/hayhooks/server/routers/deploy.py +++ b/src/hayhooks/server/routers/deploy.py @@ -9,6 +9,7 @@ PipelineWrapperError, deploy_pipeline_def, deploy_pipeline_files, + deploy_pipeline_yaml, ) router = APIRouter() @@ -86,6 +87,65 @@ async def deploy(pipeline_def: PipelineDefinition, request: Request) -> DeployRe return DeployResponse(name=result["name"], success=True, endpoint=f"/{result['name']}/run") +class YamlDeployRequest(BaseModel): + name: str = Field(description="Name of the pipeline to deploy") + source_code: str = Field(description="YAML pipeline definition source code") + overwrite: bool = Field(default=False, description="Whether to overwrite an existing pipeline with the same name") + description: str | None = Field(default=None, description="Optional description for the pipeline") + skip_mcp: bool | None = Field(default=None, description="Whether to skip MCP integration for this pipeline") + save_file: bool | None = Field(default=True, description="Whether to save YAML under pipelines/{name}.yml") + + model_config = { + "json_schema_extra": { + "description": "Request model for deploying a YAML pipeline", + "examples": [ + { + "name": "inputs_outputs_pipeline", + "source_code": "{yaml source}", + "overwrite": False, + "description": "My pipeline", + "skip_mcp": False, + } + ], + } + } + + +@router.post( + "/deploy-yaml", + tags=["config"], + operation_id="yaml_pipeline_deploy", + response_model=DeployResponse, + summary="Deploy a pipeline from a YAML definition (preferred)", + description=( + "Deploys a Haystack pipeline from a YAML string. Builds request/response schemas from declared " + "inputs/outputs. Returns 409 if the pipeline already exists and overwrite is false." + ), +) +async def deploy_yaml(yaml_request: YamlDeployRequest, request: Request) -> DeployResponse: + try: + result = deploy_pipeline_yaml( + app=request.app, + pipeline_name=yaml_request.name, + source_code=yaml_request.source_code, + overwrite=yaml_request.overwrite, + options={ + "description": yaml_request.description, + "skip_mcp": yaml_request.skip_mcp, + "save_file": yaml_request.save_file, + }, + ) + return DeployResponse(name=result["name"], success=True, endpoint=f"/{result['name']}/run") + except PipelineModuleLoadError as e: + raise HTTPException(status_code=422, detail=str(e)) from e + except PipelineWrapperError as e: + raise HTTPException(status_code=422, detail=str(e)) from e + except PipelineAlreadyExistsError as e: + raise HTTPException(status_code=409, detail=str(e)) from e + except Exception as e: + raise HTTPException(status_code=500, detail=f"Unexpected error deploying YAML pipeline: {e!s}") from e + + @router.post( "/deploy_files", tags=["config"], diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 5fcbbe0..52c2ab9 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -127,6 +127,33 @@ def save_pipeline_files(pipeline_name: str, files: dict[str, str], pipelines_dir raise PipelineFilesError(msg) from e +def save_yaml_pipeline_file(pipeline_name: str, source_code: str, pipelines_dir: str) -> str: + """ + Save a single YAML pipeline file in the pipelines directory as {name}.yml. + + Args: + pipeline_name: Name of the pipeline + source_code: YAML content + pipelines_dir: Path to the pipelines directory + + Returns: + The saved file path as string + + Raises: + PipelineFilesError: If there are any issues saving the file + """ + try: + pipelines_dir_path = Path(pipelines_dir) + pipelines_dir_path.mkdir(parents=True, exist_ok=True) + file_path = pipelines_dir_path / f"{pipeline_name}.yml" + log.debug(f"Saving YAML pipeline file: {file_path}") + file_path.write_text(source_code) + return str(file_path) + except Exception as e: + msg = f"Failed to save YAML pipeline file: {e!s}" + raise PipelineFilesError(msg) from e + + def remove_pipeline_files(pipeline_name: str, pipelines_dir: str) -> None: """ Remove pipeline files from disk. @@ -308,7 +335,7 @@ async def run_endpoint_without_files(run_req: request_model) -> response_model: return run_endpoint_with_files if requires_files else run_endpoint_without_files -def add_pipeline_api_route(app: FastAPI, pipeline_name: str, pipeline_wrapper: BasePipelineWrapper) -> None: +def add_pipeline_wrapper_api_route(app: FastAPI, pipeline_name: str, pipeline_wrapper: BasePipelineWrapper) -> None: clog = log.bind(pipeline_name=pipeline_name) # Determine which run_api method to use (prefer async if available) @@ -373,6 +400,75 @@ def add_pipeline_api_route(app: FastAPI, pipeline_name: str, pipeline_wrapper: B app.setup() +def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str, source_code: str) -> None: + """ + Create or replace the YAML pipeline run endpoint at /{pipeline_name}/run. + + Builds the flat request/response models from declared YAML inputs/outputs and wires a handler that + maps the flat body into the nested structure required by Haystack Pipeline.run. + """ + pipeline = registry.get(pipeline_name) + if pipeline is None: + msg = f"Pipeline '{pipeline_name}' not found after registration" + raise HTTPException(status_code=500, detail=msg) + + # Compute IO resolution to map flat request fields to pipeline.run nested inputs + resolved_io = get_inputs_outputs_from_yaml(source_code) + declared_inputs = resolved_io["inputs"] + declared_outputs = resolved_io["outputs"] + + metadata = registry.get_metadata(pipeline_name) or {} + PipelineRunRequest = metadata.get("request_model") # type: ignore[assignment] + PipelineRunResponse = metadata.get("response_model") # type: ignore[assignment] + + if PipelineRunRequest is None or PipelineRunResponse is None: + msg = f"Missing request/response models for YAML pipeline '{pipeline_name}'" + raise HTTPException(status_code=500, detail=msg) + + @handle_pipeline_exceptions() + async def pipeline_run(run_req: PipelineRunRequest) -> PipelineRunResponse: # type: ignore[valid-type] + # Map flat declared inputs to the nested structure expected by Haystack Pipeline.run + payload = {} + req_dict = run_req.model_dump() + for input_name, resolution in declared_inputs.items(): + value = req_dict.get(input_name) + if value is None: + continue + component_inputs = payload.setdefault(resolution.component, {}) + component_inputs[resolution.name] = value + + # Execute the pipeline + result = await run_in_threadpool(pipeline.run, data=payload) # type: ignore[attr-defined] + + # Map pipeline outputs back to declared outputs (flat) + final_output: dict[str, Any] = {} + for output_name, resolution in declared_outputs.items(): + component_result = (result or {}).get(resolution.component, {}) + raw_value = component_result.get(resolution.name) + final_output[output_name] = convert_component_output(raw_value) + + return PipelineRunResponse(**final_output) # type: ignore[call-arg] + + # Clear existing YAML run route if it exists (old or new path) + for route in list(app.routes): + if isinstance(route, APIRoute) and route.path in (f"/{pipeline_name}", f"/{pipeline_name}/run"): + app.routes.remove(route) + + # Register the run endpoint at /{pipeline_name}/run + app.add_api_route( + path=f"/{pipeline_name}/run", + endpoint=pipeline_run, + methods=["POST"], + name=f"{pipeline_name}_run", + response_model=PipelineRunResponse, + tags=["pipelines"], + ) + + # Invalidate OpenAPI cache + app.openapi_schema = None + app.setup() + + def deploy_pipeline_files( pipeline_name: str, files: dict[str, str], @@ -392,10 +488,53 @@ def deploy_pipeline_files( save_files: Whether to save the pipeline files to disk overwrite: Whether to overwrite an existing pipeline """ - pipeline_wrapper = add_pipeline_to_registry(pipeline_name, files, save_files, overwrite) + pipeline_wrapper = add_pipeline_wrapper_to_registry(pipeline_name, files, save_files, overwrite) if app: - add_pipeline_api_route(app, pipeline_name, pipeline_wrapper) + add_pipeline_wrapper_api_route(app, pipeline_name, pipeline_wrapper) + + return {"name": pipeline_name} + + +def deploy_pipeline_yaml( + app: FastAPI, + pipeline_name: str, + source_code: str, + overwrite: bool = False, + options: Optional[dict[str, Any]] = None, +) -> dict[str, str]: + """ + Deploy a YAML pipeline to the FastAPI application with IO declared in the YAML. + + This will add the pipeline to the registry, create flat request/response models based on + declared inputs/outputs, and set up the API route at /{pipeline_name}/run. + + Args: + app: FastAPI application instance + pipeline_name: Name of the pipeline + source_code: YAML pipeline source code + overwrite: Whether to overwrite an existing pipeline + options: Optional dict with additional deployment options. Supported keys: + - description: Optional[str] + - skip_mcp: Optional[bool] + """ + + # Optionally save YAML to disk as pipelines/{name}.yml (default True) + save_file: bool = True if options is None else bool(options.get("save_file", True)) + if save_file: + save_yaml_pipeline_file(pipeline_name, source_code, settings.pipelines_dir) + + # Add pipeline to the registry and build metadata (request/response models) + add_yaml_pipeline_to_registry( + pipeline_name=pipeline_name, + source_code=source_code, + overwrite=overwrite, + description=(options or {}).get("description"), + skip_mcp=(options or {}).get("skip_mcp"), + ) + + # Add the YAML pipeline API route at /{pipeline_name}/run + add_pipeline_yaml_api_route(app, pipeline_name, source_code) return {"name": pipeline_name} @@ -463,7 +602,7 @@ def add_yaml_pipeline_to_registry( registry.add(pipeline_name, pipeline, metadata=metadata) -def add_pipeline_to_registry( +def add_pipeline_wrapper_to_registry( pipeline_name: str, files: dict[str, str], save_files: bool = True, overwrite: bool = False ) -> BasePipelineWrapper: """ diff --git a/src/hayhooks/server/utils/mcp_utils.py b/src/hayhooks/server/utils/mcp_utils.py index 363e55f..928e25a 100644 --- a/src/hayhooks/server/utils/mcp_utils.py +++ b/src/hayhooks/server/utils/mcp_utils.py @@ -20,7 +20,7 @@ from hayhooks.server.routers.deploy import PipelineFilesRequest from hayhooks.server.utils.base_pipeline_wrapper import BasePipelineWrapper from hayhooks.server.utils.deploy_utils import ( - add_pipeline_to_registry, + add_pipeline_wrapper_to_registry, deploy_pipeline_files, read_pipeline_files_from_dir, undeploy_pipeline, @@ -63,7 +63,9 @@ def deploy_pipelines() -> None: log.debug(f"Deploying pipeline from {pipeline_dir}") try: - add_pipeline_to_registry(pipeline_name=pipeline_dir.name, files=read_pipeline_files_from_dir(pipeline_dir)) + add_pipeline_wrapper_to_registry( + pipeline_name=pipeline_dir.name, files=read_pipeline_files_from_dir(pipeline_dir) + ) except Exception as e: log.warning(f"Skipping pipeline directory {pipeline_dir}: {e!s}") continue diff --git a/tests/test_deploy_utils.py b/tests/test_deploy_utils.py index 670debe..4dc8b06 100644 --- a/tests/test_deploy_utils.py +++ b/tests/test_deploy_utils.py @@ -12,7 +12,7 @@ from hayhooks.server.pipelines import registry from hayhooks.server.utils.base_pipeline_wrapper import BasePipelineWrapper from hayhooks.server.utils.deploy_utils import ( - add_pipeline_to_registry, + add_pipeline_wrapper_to_registry, create_pipeline_wrapper_instance, create_request_model_from_callable, create_response_model_from_callable, @@ -393,7 +393,7 @@ def test_add_pipeline_to_registry_with_async_run_api(): "question_answer.yml": pipeline_yml_path.read_text(), } - pipeline_wrapper = add_pipeline_to_registry(pipeline_name=pipeline_name, files=files, save_files=False) + pipeline_wrapper = add_pipeline_wrapper_to_registry(pipeline_name=pipeline_name, files=files, save_files=False) assert registry.get(pipeline_name) == pipeline_wrapper metadata = registry.get_metadata(pipeline_name) diff --git a/tests/test_files/yaml/sample_calc_pipeline.yml b/tests/test_files/yaml/sample_calc_pipeline.yml new file mode 100644 index 0000000..6d26bf3 --- /dev/null +++ b/tests/test_files/yaml/sample_calc_pipeline.yml @@ -0,0 +1,19 @@ +components: + first_addition: + init_parameters: + add: 2 + type: haystack.testing.sample_components.add_value.AddFixedValue + double: + init_parameters: {} + type: haystack.testing.sample_components.double.Double +connections: +- receiver: double.value + sender: first_addition.result + +metadata: {} + +inputs: + value: first_addition.value + +outputs: + result: double.value diff --git a/tests/test_files/yaml/sample_pipeline.yml b/tests/test_files/yaml/sample_pipeline.yml new file mode 100644 index 0000000..6d26bf3 --- /dev/null +++ b/tests/test_files/yaml/sample_pipeline.yml @@ -0,0 +1,19 @@ +components: + first_addition: + init_parameters: + add: 2 + type: haystack.testing.sample_components.add_value.AddFixedValue + double: + init_parameters: {} + type: haystack.testing.sample_components.double.Double +connections: +- receiver: double.value + sender: first_addition.result + +metadata: {} + +inputs: + value: first_addition.value + +outputs: + result: double.value diff --git a/tests/test_it_deploy_yaml_route.py b/tests/test_it_deploy_yaml_route.py new file mode 100644 index 0000000..b76bc35 --- /dev/null +++ b/tests/test_it_deploy_yaml_route.py @@ -0,0 +1,48 @@ +from pathlib import Path + +from hayhooks.server.routers.deploy import DeployResponse + +SAMPLE_CALC_PIPELINE_PATH = Path(__file__).parent / "test_files" / "yaml" / "sample_calc_pipeline.yml" + + +def test_deploy_yaml_route_and_run_ok(client): + yaml_source = SAMPLE_CALC_PIPELINE_PATH.read_text().strip() + + # Deploy via the new route + response = client.post("/deploy-yaml", json={"name": "calc", "source_code": yaml_source, "overwrite": True}) + assert response.status_code == 200 + assert response.json() == DeployResponse(name="calc", success=True, endpoint="/calc/run").model_dump() + + # /status should include the pipeline + status_response = client.get("/status/calc") + assert status_response.status_code == 200 + assert status_response.json()["pipeline"] == "calc" + + # OpenAPI docs should render + docs_response = client.get("/docs") + assert docs_response.status_code == 200 + + # Run the flat endpoint using declared inputs + run_response = client.post("/calc/run", json={"value": 3}) + assert run_response.status_code == 200 + + # (3 + 2) * 2 = 10 + assert run_response.json() == {"result": 10} + + +def test_deploy_yaml_saves_file(client, test_settings): + yaml_source = SAMPLE_CALC_PIPELINE_PATH.read_text().strip() + response = client.post( + "/deploy-yaml", + json={ + "name": "save_me", + "source_code": yaml_source, + "overwrite": True, + "save_file": True, + }, + ) + assert response.status_code == 200 + + file_path = Path(test_settings.pipelines_dir) / "save_me.yml" + assert file_path.exists() + assert file_path.read_text() == yaml_source diff --git a/tests/test_it_mcp_server.py b/tests/test_it_mcp_server.py index f567137..fb35c22 100644 --- a/tests/test_it_mcp_server.py +++ b/tests/test_it_mcp_server.py @@ -5,7 +5,7 @@ import pytest from hayhooks.server.pipelines import registry -from hayhooks.server.utils.deploy_utils import add_pipeline_to_registry +from hayhooks.server.utils.deploy_utils import add_pipeline_wrapper_to_registry from hayhooks.server.utils.mcp_utils import CoreTools, create_mcp_server MCP_AVAILABLE = importlib.util.find_spec("mcp") is not None @@ -38,7 +38,7 @@ def deploy_chat_with_website_mcp_pipeline(): "pipeline_wrapper.py": pipeline_wrapper_path.read_text(), "chat_with_website.yml": pipeline_yml_path.read_text(), } - add_pipeline_to_registry(pipeline_name=pipeline_name, files=files) + add_pipeline_wrapper_to_registry(pipeline_name=pipeline_name, files=files) return pipeline_name @@ -51,7 +51,7 @@ def deploy_async_question_answer_mcp_pipeline(): "pipeline_wrapper.py": pipeline_wrapper_path.read_text(), "question_answer.yml": pipeline_yml_path.read_text(), } - add_pipeline_to_registry(pipeline_name=pipeline_name, files=files) + add_pipeline_wrapper_to_registry(pipeline_name=pipeline_name, files=files) return pipeline_name diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 5f1608c..048e6f1 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -5,7 +5,7 @@ from hayhooks.server.pipelines import registry from hayhooks.server.routers.deploy import PipelineFilesRequest -from hayhooks.server.utils.deploy_utils import add_pipeline_to_registry +from hayhooks.server.utils.deploy_utils import add_pipeline_wrapper_to_registry from hayhooks.server.utils.mcp_utils import ( PIPELINE_NAME_SCHEMA, CoreTools, @@ -37,7 +37,7 @@ def deploy_chat_with_website_mcp(): "pipeline_wrapper.py": pipeline_wrapper_path.read_text(), "chat_with_website.yml": pipeline_yml_path.read_text(), } - add_pipeline_to_registry(pipeline_name="chat_with_website", files=files) + add_pipeline_wrapper_to_registry(pipeline_name="chat_with_website", files=files) @pytest.fixture @@ -46,7 +46,7 @@ def deploy_chat_with_website_mcp_skip(): files = { "pipeline_wrapper.py": pipeline_wrapper_path.read_text(), } - add_pipeline_to_registry(pipeline_name="chat_with_website_mcp_skip", files=files) + add_pipeline_wrapper_to_registry(pipeline_name="chat_with_website_mcp_skip", files=files) @pytest.mark.asyncio @@ -79,7 +79,7 @@ async def test_list_pipeline_without_description(): "pipeline_wrapper.py": Path("tests/test_files/files/chat_with_website/pipeline_wrapper.py").read_text(), "chat_with_website.yml": Path("tests/test_files/files/chat_with_website/chat_with_website.yml").read_text(), } - add_pipeline_to_registry(pipeline_name="chat_with_website", files=files) + add_pipeline_wrapper_to_registry(pipeline_name="chat_with_website", files=files) tools = await list_pipelines_as_tools() From 28d6ca79254a4f25eb0c0a3cb5309e9c01bc0b63 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 11:32:30 +0200 Subject: [PATCH 05/42] Fix types --- src/hayhooks/server/utils/deploy_utils.py | 40 ++++++++++++++--------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 52c2ab9..b7d69ab 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -7,7 +7,7 @@ from functools import wraps from pathlib import Path from types import ModuleType -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, Optional, Union, cast import docstring_parser from docstring_parser.common import Docstring @@ -15,6 +15,7 @@ from fastapi.concurrency import run_in_threadpool from fastapi.responses import JSONResponse from fastapi.routing import APIRoute +from haystack import AsyncPipeline, Pipeline from pydantic import BaseModel, Field, create_model from hayhooks.server.exceptions import ( @@ -407,47 +408,54 @@ def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str, source_code: s Builds the flat request/response models from declared YAML inputs/outputs and wires a handler that maps the flat body into the nested structure required by Haystack Pipeline.run. """ - pipeline = registry.get(pipeline_name) - if pipeline is None: + pipeline_instance = registry.get(pipeline_name) + if pipeline_instance is None: msg = f"Pipeline '{pipeline_name}' not found after registration" raise HTTPException(status_code=500, detail=msg) + # Ensure the registered object is a Haystack Pipeline, not a wrapper + if not isinstance(pipeline_instance, (Pipeline, AsyncPipeline)): + msg = f"Pipeline '{pipeline_name}' is not a Haystack Pipeline instance" + raise HTTPException(status_code=500, detail=msg) + + pipeline: Union[Pipeline, AsyncPipeline] = pipeline_instance + # Compute IO resolution to map flat request fields to pipeline.run nested inputs resolved_io = get_inputs_outputs_from_yaml(source_code) declared_inputs = resolved_io["inputs"] declared_outputs = resolved_io["outputs"] metadata = registry.get_metadata(pipeline_name) or {} - PipelineRunRequest = metadata.get("request_model") # type: ignore[assignment] - PipelineRunResponse = metadata.get("response_model") # type: ignore[assignment] + PipelineRunRequest = metadata.get("request_model") + PipelineRunResponse = metadata.get("response_model") if PipelineRunRequest is None or PipelineRunResponse is None: msg = f"Missing request/response models for YAML pipeline '{pipeline_name}'" raise HTTPException(status_code=500, detail=msg) @handle_pipeline_exceptions() - async def pipeline_run(run_req: PipelineRunRequest) -> PipelineRunResponse: # type: ignore[valid-type] + async def pipeline_run(run_req: PipelineRunRequest) -> PipelineRunResponse: # type: ignore # Map flat declared inputs to the nested structure expected by Haystack Pipeline.run - payload = {} - req_dict = run_req.model_dump() - for input_name, resolution in declared_inputs.items(): + payload: dict[str, dict[str, Any]] = {} + req_dict = run_req.model_dump() # type: ignore[attr-defined] + for input_name, in_resolution in declared_inputs.items(): value = req_dict.get(input_name) if value is None: continue - component_inputs = payload.setdefault(resolution.component, {}) - component_inputs[resolution.name] = value + component_inputs = payload.setdefault(in_resolution.component, {}) + component_inputs[in_resolution.name] = value # Execute the pipeline - result = await run_in_threadpool(pipeline.run, data=payload) # type: ignore[attr-defined] + result = await run_in_threadpool(pipeline.run, data=payload) # Map pipeline outputs back to declared outputs (flat) final_output: dict[str, Any] = {} - for output_name, resolution in declared_outputs.items(): - component_result = (result or {}).get(resolution.component, {}) - raw_value = component_result.get(resolution.name) + for output_name, out_resolution in declared_outputs.items(): + component_result = (result or {}).get(out_resolution.component, {}) + raw_value = component_result.get(out_resolution.name) final_output[output_name] = convert_component_output(raw_value) - return PipelineRunResponse(**final_output) # type: ignore[call-arg] + return PipelineRunResponse(**final_output) # Clear existing YAML run route if it exists (old or new path) for route in list(app.routes): From bf52b0de332d52e8b3aba97b3da52e5b3c01b657 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 14:56:01 +0200 Subject: [PATCH 06/42] Fix lint --- src/hayhooks/server/utils/deploy_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index b7d69ab..69de641 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -7,7 +7,7 @@ from functools import wraps from pathlib import Path from types import ModuleType -from typing import Any, Callable, Optional, Union, cast +from typing import Any, Callable, Optional, Union import docstring_parser from docstring_parser.common import Docstring From 787bd3764f026fc6604800b61e395ed716503301 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 15:10:32 +0200 Subject: [PATCH 07/42] Fix for python 3.9 --- src/hayhooks/server/routers/deploy.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/hayhooks/server/routers/deploy.py b/src/hayhooks/server/routers/deploy.py index 252811b..1962678 100644 --- a/src/hayhooks/server/routers/deploy.py +++ b/src/hayhooks/server/routers/deploy.py @@ -1,3 +1,5 @@ +from typing import Optional + from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel, Field, field_validator @@ -91,9 +93,9 @@ class YamlDeployRequest(BaseModel): name: str = Field(description="Name of the pipeline to deploy") source_code: str = Field(description="YAML pipeline definition source code") overwrite: bool = Field(default=False, description="Whether to overwrite an existing pipeline with the same name") - description: str | None = Field(default=None, description="Optional description for the pipeline") - skip_mcp: bool | None = Field(default=None, description="Whether to skip MCP integration for this pipeline") - save_file: bool | None = Field(default=True, description="Whether to save YAML under pipelines/{name}.yml") + description: Optional[str] = Field(default=None, description="Optional description for the pipeline") + skip_mcp: Optional[bool] = Field(default=None, description="Whether to skip MCP integration for this pipeline") + save_file: Optional[bool] = Field(default=True, description="Whether to save YAML under pipelines/{name}.yml") model_config = { "json_schema_extra": { From 28d08404fdf6f6d362676ac97cadd5df4871b44e Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 15:20:04 +0200 Subject: [PATCH 08/42] Fix for last ruff version --- tests/test_deploy_utils.py | 13 ++++++++----- tests/test_yaml_inputs_outputs.py | 5 ++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/test_deploy_utils.py b/tests/test_deploy_utils.py index 4dc8b06..ebd8780 100644 --- a/tests/test_deploy_utils.py +++ b/tests/test_deploy_utils.py @@ -1,4 +1,5 @@ import inspect +import re import shutil from pathlib import Path from typing import Callable @@ -64,7 +65,7 @@ def test_load_pipeline_wrong_dir(): with pytest.raises( PipelineModuleLoadError, - match="Required file 'tests/test_files/files/wrong_dir/pipeline_wrapper.py' not found", + match=re.escape("Required file 'tests/test_files/files/wrong_dir/pipeline_wrapper.py' not found"), ): load_pipeline_module(pipeline_name, pipeline_dir_path) @@ -75,7 +76,7 @@ def test_load_pipeline_no_wrapper(): with pytest.raises( PipelineModuleLoadError, - match="Required file 'tests/test_files/files/no_wrapper/pipeline_wrapper.py' not found", + match=re.escape("Required file 'tests/test_files/files/no_wrapper/pipeline_wrapper.py' not found"), ): load_pipeline_module(pipeline_name, pipeline_dir_path) @@ -277,7 +278,7 @@ def run_api(self): module = type("Module", (), {"PipelineWrapper": BrokenSetupWrapper}) with pytest.raises( - PipelineWrapperError, match="Failed to call setup\\(\\) on pipeline wrapper instance: Setup error" + PipelineWrapperError, match=re.escape("Failed to call setup\\(\\) on pipeline wrapper instance: Setup error") ): create_pipeline_wrapper_instance(module) @@ -291,7 +292,7 @@ def setup(self): with pytest.raises( PipelineWrapperError, - match="At least one of run_api, run_api_async, run_chat_completion, or run_chat_completion_async", + match=re.escape("At least one of run_api, run_api_async, run_chat_completion, or run_chat_completion_async"), ): create_pipeline_wrapper_instance(module) @@ -416,5 +417,7 @@ def test_deploy_pipeline_files_without_return_type(test_settings, mocker): test_file_path = Path("tests/test_files/files/no_return_type/pipeline_wrapper.py") files = {"pipeline_wrapper.py": test_file_path.read_text()} - with pytest.raises(PipelineWrapperError, match="Pipeline wrapper is missing a return type for 'run_api' method"): + with pytest.raises( + PipelineWrapperError, match=re.escape("Pipeline wrapper is missing a return type for 'run_api' method") + ): deploy_pipeline_files(app=mock_app, pipeline_name="test_pipeline_no_return_type", files=files, save_files=False) diff --git a/tests/test_yaml_inputs_outputs.py b/tests/test_yaml_inputs_outputs.py index 1d7ccb0..be5909d 100644 --- a/tests/test_yaml_inputs_outputs.py +++ b/tests/test_yaml_inputs_outputs.py @@ -1,3 +1,4 @@ +import re from pathlib import Path from typing import Any @@ -40,5 +41,7 @@ def test_get_inputs_outputs_from_yaml_raises_when_missing_inputs_outputs(): yaml_path = Path(__file__).parent / "test_files" / "mixed" / "chat_with_website" / "chat_with_website.yml" yaml_source = yaml_path.read_text() - with pytest.raises(ValueError, match="YAML pipeline must declare at least one of 'inputs' or 'outputs'."): + with pytest.raises( + ValueError, match=re.escape("YAML pipeline must declare at least one of 'inputs' or 'outputs'.") + ): get_inputs_outputs_from_yaml(yaml_source) From 1cf600f97cc948aca99a80d2d30f8aa183daf19b Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 15:27:58 +0200 Subject: [PATCH 09/42] Fix tests --- tests/test_deploy_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_deploy_utils.py b/tests/test_deploy_utils.py index ebd8780..62c119e 100644 --- a/tests/test_deploy_utils.py +++ b/tests/test_deploy_utils.py @@ -278,7 +278,7 @@ def run_api(self): module = type("Module", (), {"PipelineWrapper": BrokenSetupWrapper}) with pytest.raises( - PipelineWrapperError, match=re.escape("Failed to call setup\\(\\) on pipeline wrapper instance: Setup error") + PipelineWrapperError, match=re.escape("Failed to call setup() on pipeline wrapper instance: Setup error") ): create_pipeline_wrapper_instance(module) From 5b7fe4b882560ac7bd77b6cd077ab825eb594f06 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 15:57:41 +0200 Subject: [PATCH 10/42] Add route for YAML if app is present --- src/hayhooks/server/utils/deploy_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 69de641..e160a79 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -505,9 +505,9 @@ def deploy_pipeline_files( def deploy_pipeline_yaml( - app: FastAPI, pipeline_name: str, source_code: str, + app: Optional[FastAPI] = None, overwrite: bool = False, options: Optional[dict[str, Any]] = None, ) -> dict[str, str]: @@ -518,7 +518,7 @@ def deploy_pipeline_yaml( declared inputs/outputs, and set up the API route at /{pipeline_name}/run. Args: - app: FastAPI application instance + app: Optional FastAPI application instance. If provided, the API route will be added. pipeline_name: Name of the pipeline source_code: YAML pipeline source code overwrite: Whether to overwrite an existing pipeline @@ -541,8 +541,8 @@ def deploy_pipeline_yaml( skip_mcp=(options or {}).get("skip_mcp"), ) - # Add the YAML pipeline API route at /{pipeline_name}/run - add_pipeline_yaml_api_route(app, pipeline_name, source_code) + if app: + add_pipeline_yaml_api_route(app, pipeline_name, source_code) return {"name": pipeline_name} From fd9c44db163c56e49438af4c713bc192b27e5094 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 16:41:47 +0200 Subject: [PATCH 11/42] Introduced InvalidYamlIOError 422 error for YAML deployments with missing inputs/outputs --- src/hayhooks/server/exceptions.py | 6 ++++++ src/hayhooks/server/routers/deploy.py | 4 +++- src/hayhooks/server/utils/yaml_utils.py | 6 ++++-- tests/test_it_deploy_yaml_route.py | 22 ++++++++++++++++++++++ tests/test_yaml_inputs_outputs.py | 3 ++- 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/hayhooks/server/exceptions.py b/src/hayhooks/server/exceptions.py index 0018c97..92c5b80 100644 --- a/src/hayhooks/server/exceptions.py +++ b/src/hayhooks/server/exceptions.py @@ -24,3 +24,9 @@ class PipelineNotFoundError(Exception): """Exception for errors when a pipeline is not found.""" pass + + +class InvalidYamlIOError(Exception): + """Exception for invalid or missing YAML inputs/outputs declarations.""" + + pass diff --git a/src/hayhooks/server/routers/deploy.py b/src/hayhooks/server/routers/deploy.py index 1962678..1495584 100644 --- a/src/hayhooks/server/routers/deploy.py +++ b/src/hayhooks/server/routers/deploy.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel, Field, field_validator -from hayhooks.server.exceptions import PipelineAlreadyExistsError +from hayhooks.server.exceptions import InvalidYamlIOError, PipelineAlreadyExistsError from hayhooks.server.utils.deploy_utils import ( PipelineDefinition, PipelineFilesError, @@ -138,6 +138,8 @@ async def deploy_yaml(yaml_request: YamlDeployRequest, request: Request) -> Depl }, ) return DeployResponse(name=result["name"], success=True, endpoint=f"/{result['name']}/run") + except InvalidYamlIOError as e: + raise HTTPException(status_code=422, detail=str(e)) from e except PipelineModuleLoadError as e: raise HTTPException(status_code=422, detail=str(e)) from e except PipelineWrapperError as e: diff --git a/src/hayhooks/server/utils/yaml_utils.py b/src/hayhooks/server/utils/yaml_utils.py index 04cc743..da656e1 100644 --- a/src/hayhooks/server/utils/yaml_utils.py +++ b/src/hayhooks/server/utils/yaml_utils.py @@ -3,6 +3,8 @@ import yaml from pydantic import BaseModel +from hayhooks.server.exceptions import InvalidYamlIOError + class BaseInputOutputResolution(BaseModel): path: str @@ -129,7 +131,7 @@ def get_inputs_outputs_from_yaml(yaml_source_code: str) -> ResolvedIO: `OutputResolution` for outputs). Raises: - ValueError: If both inputs and outputs are missing from the YAML definition. + InvalidYamlIOError: If both inputs and outputs are missing from the YAML definition. """ yaml_dict = yaml.safe_load(yaml_source_code) or {} declared_inputs = yaml_dict.get("inputs", {}) or {} @@ -137,7 +139,7 @@ def get_inputs_outputs_from_yaml(yaml_source_code: str) -> ResolvedIO: if not declared_inputs and not declared_outputs: msg = "YAML pipeline must declare at least one of 'inputs' or 'outputs'." - raise ValueError(msg) + raise InvalidYamlIOError(msg) from haystack import Pipeline diff --git a/tests/test_it_deploy_yaml_route.py b/tests/test_it_deploy_yaml_route.py index b76bc35..5e0d74a 100644 --- a/tests/test_it_deploy_yaml_route.py +++ b/tests/test_it_deploy_yaml_route.py @@ -46,3 +46,25 @@ def test_deploy_yaml_saves_file(client, test_settings): file_path = Path(test_settings.pipelines_dir) / "save_me.yml" assert file_path.exists() assert file_path.read_text() == yaml_source + + +def test_deploy_yaml_missing_io_returns_422(client): + yaml_source = """ +components: + first_addition: + init_parameters: + add: 2 + type: haystack.testing.sample_components.add_value.AddFixedValue + double: + init_parameters: {} + type: haystack.testing.sample_components.double.Double +connections: +- receiver: double.value + sender: first_addition.result + +metadata: {} +""".strip() + + response = client.post("/deploy-yaml", json={"name": "no_io", "source_code": yaml_source, "overwrite": True}) + assert response.status_code == 422 + assert response.json()["detail"] == "YAML pipeline must declare at least one of 'inputs' or 'outputs'." diff --git a/tests/test_yaml_inputs_outputs.py b/tests/test_yaml_inputs_outputs.py index be5909d..2148e67 100644 --- a/tests/test_yaml_inputs_outputs.py +++ b/tests/test_yaml_inputs_outputs.py @@ -4,6 +4,7 @@ import pytest +from hayhooks.server.exceptions import InvalidYamlIOError from hayhooks.server.utils.yaml_utils import InputResolution, OutputResolution, get_inputs_outputs_from_yaml @@ -42,6 +43,6 @@ def test_get_inputs_outputs_from_yaml_raises_when_missing_inputs_outputs(): yaml_source = yaml_path.read_text() with pytest.raises( - ValueError, match=re.escape("YAML pipeline must declare at least one of 'inputs' or 'outputs'.") + InvalidYamlIOError, match=re.escape("YAML pipeline must declare at least one of 'inputs' or 'outputs'.") ): get_inputs_outputs_from_yaml(yaml_source) From 924bd3a3151c66f113e82275acbbeb3750c72418 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 17:44:49 +0200 Subject: [PATCH 12/42] Add deploy-yaml CLI command --- src/hayhooks/cli/pipeline.py | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/hayhooks/cli/pipeline.py b/src/hayhooks/cli/pipeline.py index 83da7d7..f1dd03d 100644 --- a/src/hayhooks/cli/pipeline.py +++ b/src/hayhooks/cli/pipeline.py @@ -57,6 +57,53 @@ def deploy( _deploy_with_progress(ctx=ctx, name=name, endpoint="deploy", payload=payload) +@pipeline.command(name="deploy-yaml") +def deploy_yaml( # noqa: PLR0913 + ctx: typer.Context, + pipeline_file: Path = typer.Argument( # noqa: B008 + help="The path to the YAML pipeline file to deploy." + ), + name: Annotated[Optional[str], typer.Option("--name", "-n", help="The name of the pipeline to deploy.")] = None, + overwrite: Annotated[ + bool, typer.Option("--overwrite", "-o", help="Whether to overwrite the pipeline if it already exists.") + ] = False, + description: Annotated[ + Optional[str], typer.Option("--description", help="Optional description for the pipeline.") + ] = None, + skip_mcp: Annotated[ + bool, typer.Option("--skip-mcp", help="If set, skip MCP integration for this pipeline.") + ] = False, + save_file: Annotated[ + bool, + typer.Option( + "--save-file/--no-save-file", + help="Whether to save the YAML under pipelines/{name}.yml on the server.", + ), + ] = True, +) -> None: + """Deploy a YAML pipeline using the preferred /deploy-yaml endpoint.""" + if not pipeline_file.exists(): + show_error_and_abort("Pipeline file does not exist.", str(pipeline_file)) + + if name is None: + name = pipeline_file.stem + + payload = { + "name": name, + "source_code": pipeline_file.read_text(), + "overwrite": overwrite, + "save_file": save_file, + } + + if description is not None: + payload["description"] = description + + # Always include skip_mcp flag (defaults to False) + payload["skip_mcp"] = skip_mcp + + _deploy_with_progress(ctx=ctx, name=name, endpoint="deploy-yaml", payload=payload) + + @pipeline.command() def deploy_files( ctx: typer.Context, From eee69a355a50b5fdbf7a192b96c969566ec57bf8 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 18:02:25 +0200 Subject: [PATCH 13/42] Ensure inputs / outputs YAML pipelines are deployed at startup (so not using old YAML deploy logic) --- src/hayhooks/server/app.py | 10 ++-------- src/hayhooks/server/utils/deploy_utils.py | 1 + 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/hayhooks/server/app.py b/src/hayhooks/server/app.py index 366b3ae..eaca830 100644 --- a/src/hayhooks/server/app.py +++ b/src/hayhooks/server/app.py @@ -11,12 +11,7 @@ from hayhooks.server.logger import log from hayhooks.server.routers import deploy_router, draw_router, openai_router, status_router, undeploy_router -from hayhooks.server.utils.deploy_utils import ( - PipelineDefinition, - deploy_pipeline_def, - deploy_pipeline_files, - read_pipeline_files_from_dir, -) +from hayhooks.server.utils.deploy_utils import deploy_pipeline_files, deploy_pipeline_yaml, read_pipeline_files_from_dir from hayhooks.settings import APP_DESCRIPTION, APP_TITLE, check_cors_settings, settings @@ -35,8 +30,7 @@ def deploy_yaml_pipeline(app: FastAPI, pipeline_file_path: Path) -> dict: with open(pipeline_file_path) as pipeline_file: source_code = pipeline_file.read() - pipeline_definition = PipelineDefinition(name=name, source_code=source_code) - deployed_pipeline = deploy_pipeline_def(app, pipeline_definition) + deployed_pipeline = deploy_pipeline_yaml(pipeline_name=name, source_code=source_code, app=app) log.info(f"Deployed pipeline from yaml: {deployed_pipeline['name']}") return deployed_pipeline diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index e160a79..17eb0a3 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -608,6 +608,7 @@ def add_yaml_pipeline_to_registry( raise ValueError(msg) from e registry.add(pipeline_name, pipeline, metadata=metadata) + log.success(f"YAML pipeline '{pipeline_name}' successfully added to registry") def add_pipeline_wrapper_to_registry( From 01761704f8baa6083f0330aefd88ab5a8f84aabe Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Thu, 11 Sep 2025 21:02:08 +0200 Subject: [PATCH 14/42] Skip tests with old YAML logic --- tests/test_deploy_at_startup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_deploy_at_startup.py b/tests/test_deploy_at_startup.py index 38f0157..f0e94a1 100644 --- a/tests/test_deploy_at_startup.py +++ b/tests/test_deploy_at_startup.py @@ -84,6 +84,7 @@ def test_app_loads_pipeline_from_files_directory(test_client_files, test_files_p assert "chat_with_website" in pipelines +@pytest.mark.skip(reason="To be reviewed when old YAML deployment is removed") def test_app_loads_pipeline_from_yaml_directory(test_client_yaml, test_yaml_pipelines_dir): response = test_client_yaml.get("/status") assert response.status_code == 200 @@ -92,6 +93,7 @@ def test_app_loads_pipeline_from_yaml_directory(test_client_yaml, test_yaml_pipe assert len(pipelines) == len(list(test_yaml_pipelines_dir.rglob("*"))) +@pytest.mark.skip(reason="To be reviewed when old YAML deployment is removed") def test_app_loads_pipeline_from_mixed_directory(test_client_mixed, test_mixed_pipelines_dir): response = test_client_mixed.get("/status") assert response.status_code == 200 From 9aadb12791a0faccce57cf024f1c4519debcaac2 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Fri, 12 Sep 2025 10:45:38 +0200 Subject: [PATCH 15/42] Cleanup of old YAML handling logic to avoid confusion --- src/hayhooks/server/exceptions.py | 6 + src/hayhooks/server/pipelines/models.py | 141 +++++---------- src/hayhooks/server/routers/deploy.py | 27 +-- .../server/utils/create_valid_type.py | 49 ----- src/hayhooks/server/utils/deploy_utils.py | 154 ++-------------- tests/conftest.py | 8 +- tests/test_convert_component_output.py | 44 ----- tests/test_deploy_at_startup.py | 2 +- tests/test_deploy_yaml.py | 11 +- .../yaml/{ => broken}/broken_rag_pipeline.yml | 6 + tests/test_files/yaml/sample_pipeline.yml | 19 -- .../working_pipelines/basic_rag_pipeline.yml | 72 -------- .../working_pipelines/chat_with_website.yml | 51 ------ .../working_pipelines/minimal_retriever.yml | 79 -------- .../working_pipelines/pipeline_qdrant.yml | 169 ------------------ .../working_pipelines/pipeline_qdrant_2.yml | 46 ----- .../yaml/working_pipelines/st_retriever.yml | 46 ----- .../working_pipelines/test_pipeline_01.yml | 13 -- .../working_pipelines/test_pipeline_02.yml | 14 -- tests/test_handle_callable_type.py | 53 ------ tests/test_handle_unsupported_types.py | 42 ----- tests/test_it_deploy.py | 16 +- tests/test_it_deploy_yaml_route.py | 6 +- tests/test_it_handling_deploy_exceptions.py | 10 +- tests/test_it_status.py | 6 +- tests/test_registry.py | 2 +- tests/test_undeploy.py | 23 +-- 27 files changed, 116 insertions(+), 999 deletions(-) delete mode 100644 src/hayhooks/server/utils/create_valid_type.py delete mode 100644 tests/test_convert_component_output.py rename tests/test_files/yaml/{ => broken}/broken_rag_pipeline.yml (91%) delete mode 100644 tests/test_files/yaml/sample_pipeline.yml delete mode 100644 tests/test_files/yaml/working_pipelines/basic_rag_pipeline.yml delete mode 100644 tests/test_files/yaml/working_pipelines/chat_with_website.yml delete mode 100644 tests/test_files/yaml/working_pipelines/minimal_retriever.yml delete mode 100644 tests/test_files/yaml/working_pipelines/pipeline_qdrant.yml delete mode 100644 tests/test_files/yaml/working_pipelines/pipeline_qdrant_2.yml delete mode 100644 tests/test_files/yaml/working_pipelines/st_retriever.yml delete mode 100644 tests/test_files/yaml/working_pipelines/test_pipeline_01.yml delete mode 100644 tests/test_files/yaml/working_pipelines/test_pipeline_02.yml delete mode 100644 tests/test_handle_callable_type.py delete mode 100644 tests/test_handle_unsupported_types.py diff --git a/src/hayhooks/server/exceptions.py b/src/hayhooks/server/exceptions.py index 92c5b80..2de5f39 100644 --- a/src/hayhooks/server/exceptions.py +++ b/src/hayhooks/server/exceptions.py @@ -10,6 +10,12 @@ class PipelineWrapperError(Exception): pass +class PipelineYamlError(Exception): + """Exception for errors loading pipeline YAML.""" + + pass + + class PipelineModuleLoadError(Exception): """Exception for errors loading pipeline module.""" diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index 488bbe7..9c05cdf 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -1,82 +1,13 @@ -from typing import Any, Union +import inspect +from typing import Any, Callable -from haystack import Document -from pydantic import BaseModel, ConfigDict, create_model +from docstring_parser.common import Docstring +from pydantic import BaseModel, Field, create_model -from hayhooks.server.utils.create_valid_type import handle_unsupported_types +from hayhooks.server.exceptions import PipelineWrapperError from hayhooks.server.utils.yaml_utils import InputResolution, OutputResolution -class PipelineDefinition(BaseModel): - name: str - source_code: str - - -DEFAULT_TYPES_MAPPING = { - Document: dict, -} - - -def get_request_model(pipeline_name: str, pipeline_inputs: dict[str, dict[str, Any]]) -> type[BaseModel]: - """ - Inputs have the form below. - - { - 'first_addition': { <-- Component Name - 'value': {'type': , 'is_mandatory': True}, <-- Input - 'add': {'type': typing.Optional[int], 'is_mandatory': False, 'default_value': None}, <-- Input - }, - 'second_addition': {'add': {'type': typing.Optional[int], 'is_mandatory': False}}, - } - """ - request_model: dict[str, Any] = {} - config = ConfigDict(arbitrary_types_allowed=True) - - for component_name, inputs in pipeline_inputs.items(): - component_model: dict[str, Any] = {} - for name, typedef in inputs.items(): - if isinstance(typedef, dict) and "type" in typedef: - try: - input_type = handle_unsupported_types(type_=typedef["type"], types_mapping=DEFAULT_TYPES_MAPPING) - except TypeError as e: - raise e - - if input_type is not None: - component_model[name] = ( - input_type, - typedef.get("default_value", ...), - ) - request_model[component_name] = (create_model("ComponentParams", **component_model, __config__=config), ...) - - return create_model(f"{pipeline_name.capitalize()}RunRequest", **request_model, __config__=config) - - -def get_response_model(pipeline_name: str, pipeline_outputs: dict[str, dict[str, Any]]) -> type[BaseModel]: - """ - Outputs have the form below. - - { - 'second_addition': { <-- Component Name - 'result': {'type': ""} <-- Output - }, - } - """ - response_model: dict[str, Any] = {} - config = ConfigDict(arbitrary_types_allowed=True) - for component_name, outputs in pipeline_outputs.items(): - component_model: dict[str, Any] = {} - for name, typedef in outputs.items(): - if isinstance(typedef, dict) and "type" in typedef: - output_type = typedef["type"] - component_model[name] = ( - handle_unsupported_types(type_=output_type, types_mapping=DEFAULT_TYPES_MAPPING), - ..., - ) - response_model[component_name] = (create_model("ComponentParams", **component_model, __config__=config), ...) - - return create_model(f"{pipeline_name.capitalize()}RunResponse", **response_model, __config__=config) - - def get_request_model_from_resolved_io( pipeline_name: str, declared_inputs: dict[str, InputResolution] ) -> type[BaseModel]: @@ -119,38 +50,54 @@ def get_response_model_from_resolved_io( output_type = resolution.type fields[output_name] = (output_type, ...) - return create_model(f"{pipeline_name.capitalize()}RunResponse", **fields) + return create_model( + f"{pipeline_name.capitalize()}RunResponse", result=(dict, Field(..., description="Pipeline result")) + ) -def convert_value_to_dict(value: Any) -> Union[str, int, float, bool, None, dict[str, Any], list[Any]]: - """Convert a single value to a dictionary if possible""" - if hasattr(value, "to_dict"): - if "init_parameters" in value.to_dict(): - return value.to_dict()["init_parameters"] - return value.to_dict() - elif hasattr(value, "model_dump"): - return value.model_dump() - elif isinstance(value, dict): - return {k: convert_value_to_dict(v) for k, v in value.items()} - elif isinstance(value, list): - return [convert_value_to_dict(item) for item in value] - else: - return value +def create_request_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: + """ + Create a dynamic Pydantic model based on callable's signature. + Args: + func: The callable (function/method) to analyze + model_name: Name for the generated model -def convert_component_output(component_output: Any) -> Union[str, int, float, bool, None, dict[str, Any], list[Any]]: + Returns: + Pydantic model class for request """ - Converts component outputs to dictionaries that can be validated against response model. - Handles nested structures recursively. + params = inspect.signature(func).parameters + param_docs = {p.arg_name: p.description for p in docstring.params} + + fields: dict[str, Any] = {} + for name, param in params.items(): + default_value = ... if param.default == param.empty else param.default + description = param_docs.get(name) or f"Parameter '{name}'" + field_info = Field(default=default_value, description=description) + fields[name] = (param.annotation, field_info) + + return create_model(f"{model_name}Request", **fields) + + +def create_response_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: + """ + Create a dynamic Pydantic model based on callable's return type. Args: - component_output: Dict with component outputs + func: The callable (function/method) to analyze + model_name: Name for the generated model Returns: - Dict with all nested objects converted to dictionaries + Pydantic model class for response """ - if isinstance(component_output, dict): - return {name: convert_value_to_dict(data) for name, data in component_output.items()} - return convert_value_to_dict(component_output) + return_type = inspect.signature(func).return_annotation + + if return_type is inspect.Signature.empty: + msg = f"Pipeline wrapper is missing a return type for '{func.__name__}' method" + raise PipelineWrapperError(msg) + + return_description = docstring.returns.description if docstring.returns else None + + return create_model(f"{model_name}Response", result=(return_type, Field(..., description=return_description))) diff --git a/src/hayhooks/server/routers/deploy.py b/src/hayhooks/server/routers/deploy.py index 1495584..6fe53f4 100644 --- a/src/hayhooks/server/routers/deploy.py +++ b/src/hayhooks/server/routers/deploy.py @@ -3,13 +3,11 @@ from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel, Field, field_validator -from hayhooks.server.exceptions import InvalidYamlIOError, PipelineAlreadyExistsError +from hayhooks.server.exceptions import InvalidYamlIOError, PipelineAlreadyExistsError, PipelineYamlError from hayhooks.server.utils.deploy_utils import ( - PipelineDefinition, PipelineFilesError, PipelineModuleLoadError, PipelineWrapperError, - deploy_pipeline_def, deploy_pipeline_files, deploy_pipeline_yaml, ) @@ -70,25 +68,6 @@ class DeployResponse(BaseModel): model_config = {"json_schema_extra": {"description": "Response model for pipeline deployment operations"}} -@router.post( - "/deploy", - tags=["config"], - response_model=DeployResponse, - operation_id="legacy_yaml_deploy", - summary="Deploy a pipeline from YAML definition (Not Maintained)", - description=( - "[DEPRECATED] This route is no longer maintained and will be removed in a future version. " - "Please use /deploy_files endpoint instead. " - "Deploys a pipeline from a PipelineDefinition object. " - "Returns 409 if the pipeline already exists and overwrite is false." - ), - deprecated=True, -) -async def deploy(pipeline_def: PipelineDefinition, request: Request) -> DeployResponse: - result = deploy_pipeline_def(request.app, pipeline_def) - return DeployResponse(name=result["name"], success=True, endpoint=f"/{result['name']}/run") - - class YamlDeployRequest(BaseModel): name: str = Field(description="Name of the pipeline to deploy") source_code: str = Field(description="YAML pipeline definition source code") @@ -140,9 +119,7 @@ async def deploy_yaml(yaml_request: YamlDeployRequest, request: Request) -> Depl return DeployResponse(name=result["name"], success=True, endpoint=f"/{result['name']}/run") except InvalidYamlIOError as e: raise HTTPException(status_code=422, detail=str(e)) from e - except PipelineModuleLoadError as e: - raise HTTPException(status_code=422, detail=str(e)) from e - except PipelineWrapperError as e: + except PipelineYamlError as e: raise HTTPException(status_code=422, detail=str(e)) from e except PipelineAlreadyExistsError as e: raise HTTPException(status_code=409, detail=str(e)) from e diff --git a/src/hayhooks/server/utils/create_valid_type.py b/src/hayhooks/server/utils/create_valid_type.py deleted file mode 100644 index 5b67938..0000000 --- a/src/hayhooks/server/utils/create_valid_type.py +++ /dev/null @@ -1,49 +0,0 @@ -from collections.abc import Callable as CallableABC -from types import GenericAlias -from typing import Callable, Optional, Union, get_args, get_origin - -from loguru import logger - - -def is_callable_type(t): - """Check if a type is any form of callable""" - if t in (Callable, CallableABC): - return True - - # Check origin type - origin = get_origin(t) - if origin in (Callable, CallableABC): - return True - - # Handle Optional/Union types - if origin in (Union, type(Optional[int])): # type(Optional[int]) handles runtime Optional type - args = get_args(t) - return any(is_callable_type(arg) for arg in args) - - return False - - -def handle_unsupported_types( - type_: type, types_mapping: dict, skip_callables: bool = True -) -> Union[GenericAlias, type, None]: - logger.debug(f"Handling unsupported type: {type_}") - - if skip_callables and is_callable_type(type_): - logger.warning(f"Skipping callable type: {type_}") - return None - - # Handle generic types (like List, Optional, etc.) - origin = get_origin(type_) - if origin is not None: - args = get_args(type_) - # Map the inner types using the same mapping - mapped_args = tuple(handle_unsupported_types(arg, types_mapping, skip_callables) or arg for arg in args) - # Reconstruct the generic type with mapped arguments - return origin[mapped_args] - - if type_ in types_mapping: - logger.debug(f"Mapping type: {type_} to {types_mapping[type_]}") - return types_mapping[type_] - - logger.debug(f"Returning original type: {type_}") - return type_ diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 17eb0a3..c732161 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -10,28 +10,26 @@ from typing import Any, Callable, Optional, Union import docstring_parser -from docstring_parser.common import Docstring from fastapi import FastAPI, Form, HTTPException from fastapi.concurrency import run_in_threadpool -from fastapi.responses import JSONResponse from fastapi.routing import APIRoute from haystack import AsyncPipeline, Pipeline -from pydantic import BaseModel, Field, create_model +from pydantic import BaseModel from hayhooks.server.exceptions import ( PipelineAlreadyExistsError, PipelineFilesError, PipelineModuleLoadError, + PipelineNotFoundError, PipelineWrapperError, + PipelineYamlError, ) from hayhooks.server.logger import log from hayhooks.server.pipelines import registry from hayhooks.server.pipelines.models import ( - PipelineDefinition, - convert_component_output, - get_request_model, + create_request_model_from_callable, + create_response_model_from_callable, get_request_model_from_resolved_io, - get_response_model, get_response_model_from_resolved_io, ) from hayhooks.server.utils.base_pipeline_wrapper import BasePipelineWrapper @@ -39,56 +37,6 @@ from hayhooks.settings import settings -def deploy_pipeline_def(app: FastAPI, pipeline_def: PipelineDefinition) -> dict[str, str]: - """ - Deploy a pipeline definition to the FastAPI application. - - NOTE: This is a legacy method which is used in YAML-only based deployments. - It's not maintained anymore and will be removed in a future version. - - Args: - app: FastAPI application instance - pipeline_def: PipelineDefinition instance - """ - try: - pipe = registry.add(pipeline_def.name, pipeline_def.source_code) - except ValueError as e: - raise HTTPException(status_code=409, detail=f"{e}") from e - except Exception as e: - raise HTTPException(status_code=500, detail=f"{e}") from e - - if isinstance(pipe, BasePipelineWrapper): - msg = "Pipelines of type BasePipelineWrapper are not supported" - raise ValueError(msg) - - PipelineRunRequest = get_request_model(pipeline_def.name, pipe.inputs()) - PipelineRunResponse = get_response_model(pipeline_def.name, pipe.outputs()) - - # There's no way in FastAPI to define the type of the request body other than annotating - # the endpoint handler. We have to ignore the type here to make FastAPI happy while - # silencing static type checkers (that would have good reasons to trigger!). - async def pipeline_run(pipeline_run_req: PipelineRunRequest) -> JSONResponse: # type:ignore[valid-type] - result = await run_in_threadpool(pipe.run, data=pipeline_run_req.dict()) # type:ignore[attr-defined] - final_output = {} - for component_name, output in result.items(): - final_output[component_name] = convert_component_output(output) - - return JSONResponse(PipelineRunResponse(**final_output).model_dump(), status_code=200) - - app.add_api_route( - path=f"/{pipeline_def.name}", - endpoint=pipeline_run, - methods=["POST"], - name=pipeline_def.name, - response_model=PipelineRunResponse, - tags=["pipelines"], - ) - app.openapi_schema = None - app.setup() - - return {"name": pipeline_def.name} - - def save_pipeline_files(pipeline_name: str, files: dict[str, str], pipelines_dir: str) -> dict[str, str]: """ Save pipeline files to disk and return their paths. @@ -222,54 +170,6 @@ def load_pipeline_module(pipeline_name: str, dir_path: Union[Path, str]) -> Modu raise PipelineModuleLoadError(error_msg) from e -def create_request_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: - """ - Create a dynamic Pydantic model based on callable's signature. - - Args: - func: The callable (function/method) to analyze - model_name: Name for the generated model - - Returns: - Pydantic model class for request - """ - - params = inspect.signature(func).parameters - param_docs = {p.arg_name: p.description for p in docstring.params} - - fields: dict[str, Any] = {} - for name, param in params.items(): - default_value = ... if param.default == param.empty else param.default - description = param_docs.get(name) or f"Parameter '{name}'" - field_info = Field(default=default_value, description=description) - fields[name] = (param.annotation, field_info) - - return create_model(f"{model_name}Request", **fields) - - -def create_response_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: - """ - Create a dynamic Pydantic model based on callable's return type. - - Args: - func: The callable (function/method) to analyze - model_name: Name for the generated model - - Returns: - Pydantic model class for response - """ - - return_type = inspect.signature(func).return_annotation - - if return_type is inspect.Signature.empty: - msg = f"Pipeline wrapper is missing a return type for '{func.__name__}' method" - raise PipelineWrapperError(msg) - - return_description = docstring.returns.description if docstring.returns else None - - return create_model(f"{model_name}Response", result=(return_type, Field(..., description=return_description))) - - def handle_pipeline_exceptions() -> Callable: """Decorator to handle pipeline execution exceptions.""" @@ -401,7 +301,7 @@ def add_pipeline_wrapper_api_route(app: FastAPI, pipeline_name: str, pipeline_wr app.setup() -def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str, source_code: str) -> None: +def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str) -> None: """ Create or replace the YAML pipeline run endpoint at /{pipeline_name}/run. @@ -410,52 +310,28 @@ def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str, source_code: s """ pipeline_instance = registry.get(pipeline_name) if pipeline_instance is None: - msg = f"Pipeline '{pipeline_name}' not found after registration" - raise HTTPException(status_code=500, detail=msg) + msg = f"Pipeline '{pipeline_name}' not found" + raise PipelineNotFoundError(msg) # Ensure the registered object is a Haystack Pipeline, not a wrapper if not isinstance(pipeline_instance, (Pipeline, AsyncPipeline)): msg = f"Pipeline '{pipeline_name}' is not a Haystack Pipeline instance" - raise HTTPException(status_code=500, detail=msg) + raise PipelineYamlError(msg) pipeline: Union[Pipeline, AsyncPipeline] = pipeline_instance - - # Compute IO resolution to map flat request fields to pipeline.run nested inputs - resolved_io = get_inputs_outputs_from_yaml(source_code) - declared_inputs = resolved_io["inputs"] - declared_outputs = resolved_io["outputs"] - metadata = registry.get_metadata(pipeline_name) or {} + PipelineRunRequest = metadata.get("request_model") PipelineRunResponse = metadata.get("response_model") if PipelineRunRequest is None or PipelineRunResponse is None: msg = f"Missing request/response models for YAML pipeline '{pipeline_name}'" - raise HTTPException(status_code=500, detail=msg) + raise PipelineYamlError(msg) @handle_pipeline_exceptions() - async def pipeline_run(run_req: PipelineRunRequest) -> PipelineRunResponse: # type: ignore - # Map flat declared inputs to the nested structure expected by Haystack Pipeline.run - payload: dict[str, dict[str, Any]] = {} - req_dict = run_req.model_dump() # type: ignore[attr-defined] - for input_name, in_resolution in declared_inputs.items(): - value = req_dict.get(input_name) - if value is None: - continue - component_inputs = payload.setdefault(in_resolution.component, {}) - component_inputs[in_resolution.name] = value - - # Execute the pipeline - result = await run_in_threadpool(pipeline.run, data=payload) - - # Map pipeline outputs back to declared outputs (flat) - final_output: dict[str, Any] = {} - for output_name, out_resolution in declared_outputs.items(): - component_result = (result or {}).get(out_resolution.component, {}) - raw_value = component_result.get(out_resolution.name) - final_output[output_name] = convert_component_output(raw_value) - - return PipelineRunResponse(**final_output) + async def pipeline_run(run_req: PipelineRunRequest) -> PipelineRunResponse: # type:ignore[valid-type] + result = await run_in_threadpool(pipeline.run, data=run_req.model_dump()) # type: ignore[attr-defined] + return PipelineRunResponse(result=result) # Clear existing YAML run route if it exists (old or new path) for route in list(app.routes): @@ -542,7 +418,7 @@ def deploy_pipeline_yaml( ) if app: - add_pipeline_yaml_api_route(app, pipeline_name, source_code) + add_pipeline_yaml_api_route(app, pipeline_name) return {"name": pipeline_name} diff --git a/tests/conftest.py b/tests/conftest.py index dac047a..319415d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,12 +54,12 @@ def cleanup_pipelines(test_settings): @pytest.fixture -def deploy_pipeline(): - def _deploy_pipeline(client: TestClient, pipeline_name: str, pipeline_source_code: str): - deploy_response = client.post("/deploy", json={"name": pipeline_name, "source_code": pipeline_source_code}) +def deploy_yaml_pipeline(): + def _deploy_yaml_pipeline(client: TestClient, pipeline_name: str, pipeline_source_code: str): + deploy_response = client.post("/deploy-yaml", json={"name": pipeline_name, "source_code": pipeline_source_code}) return deploy_response - return _deploy_pipeline + return _deploy_yaml_pipeline @pytest.fixture diff --git a/tests/test_convert_component_output.py b/tests/test_convert_component_output.py deleted file mode 100644 index 3edcb28..0000000 --- a/tests/test_convert_component_output.py +++ /dev/null @@ -1,44 +0,0 @@ -from openai.types.completion_usage import CompletionTokensDetails, PromptTokensDetails - -from hayhooks.server.pipelines.models import convert_component_output - - -def test_convert_component_output_with_nested_models(): - sample_response = [ - { - "model": "gpt-4o-mini-2024-07-18", - "index": 0, - "finish_reason": "stop", - "usage": { - "completion_tokens": 52, - "prompt_tokens": 29, - "total_tokens": 81, - "completion_tokens_details": CompletionTokensDetails( - accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0 - ), - "prompt_tokens_details": PromptTokensDetails(audio_tokens=0, cached_tokens=0), - }, - } - ] - - converted_output = convert_component_output(sample_response) - - assert converted_output == [ - { - "model": "gpt-4o-mini-2024-07-18", - "index": 0, - "finish_reason": "stop", - "usage": { - "completion_tokens": 52, - "prompt_tokens": 29, - "total_tokens": 81, - "completion_tokens_details": { - "accepted_prediction_tokens": 0, - "audio_tokens": 0, - "reasoning_tokens": 0, - "rejected_prediction_tokens": 0, - }, - "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0}, - }, - } - ] diff --git a/tests/test_deploy_at_startup.py b/tests/test_deploy_at_startup.py index f0e94a1..9d87f40 100644 --- a/tests/test_deploy_at_startup.py +++ b/tests/test_deploy_at_startup.py @@ -21,7 +21,7 @@ def test_files_pipelines_dir(): @pytest.fixture def test_yaml_pipelines_dir(): - return Path("tests/test_files/yaml/working_pipelines") + return Path("tests/test_files/yaml") @pytest.fixture diff --git a/tests/test_deploy_yaml.py b/tests/test_deploy_yaml.py index c66c6a0..4d19d06 100644 --- a/tests/test_deploy_yaml.py +++ b/tests/test_deploy_yaml.py @@ -42,13 +42,14 @@ def test_deploy_pipeline_with_inputs_outputs(): assert metadata["response_model"].model_json_schema() == { "properties": { - "replies": { - "type": "array", - "items": {"type": "string"}, - "title": "Replies", + "result": { + "additionalProperties": True, + "description": "Pipeline result", + "type": "object", + "title": "Result", }, }, - "required": ["replies"], + "required": ["result"], "type": "object", "title": "Inputs_outputs_pipelineRunResponse", } diff --git a/tests/test_files/yaml/broken_rag_pipeline.yml b/tests/test_files/yaml/broken/broken_rag_pipeline.yml similarity index 91% rename from tests/test_files/yaml/broken_rag_pipeline.yml rename to tests/test_files/yaml/broken/broken_rag_pipeline.yml index ca55569..c127740 100644 --- a/tests/test_files/yaml/broken_rag_pipeline.yml +++ b/tests/test_files/yaml/broken/broken_rag_pipeline.yml @@ -24,3 +24,9 @@ connections: sender: prompt_builder.prompt max_runs_per_component: 100 metadata: {} + +inputs: + question: llm.prompt + +outputs: + answer: llm.response diff --git a/tests/test_files/yaml/sample_pipeline.yml b/tests/test_files/yaml/sample_pipeline.yml deleted file mode 100644 index 6d26bf3..0000000 --- a/tests/test_files/yaml/sample_pipeline.yml +++ /dev/null @@ -1,19 +0,0 @@ -components: - first_addition: - init_parameters: - add: 2 - type: haystack.testing.sample_components.add_value.AddFixedValue - double: - init_parameters: {} - type: haystack.testing.sample_components.double.Double -connections: -- receiver: double.value - sender: first_addition.result - -metadata: {} - -inputs: - value: first_addition.value - -outputs: - result: double.value diff --git a/tests/test_files/yaml/working_pipelines/basic_rag_pipeline.yml b/tests/test_files/yaml/working_pipelines/basic_rag_pipeline.yml deleted file mode 100644 index 7118499..0000000 --- a/tests/test_files/yaml/working_pipelines/basic_rag_pipeline.yml +++ /dev/null @@ -1,72 +0,0 @@ -components: - llm: - init_parameters: - api_base_url: null - api_key: - env_vars: - - OPENAI_API_KEY - strict: true - type: env_var - generation_kwargs: {} - model: gpt-4o-mini - organization: null - streaming_callback: null - system_prompt: null - type: haystack.components.generators.openai.OpenAIGenerator - prompt_builder: - init_parameters: - required_variables: "*" - template: "\nGiven the following information, answer the question.\n\nContext:\n\ - {% for document in documents %}\n {{ document.content }}\n{% endfor %}\n\ - \nQuestion: {{question}}\nAnswer:\n" - variables: null - type: haystack.components.builders.prompt_builder.PromptBuilder - retriever: - init_parameters: - document_store: - init_parameters: - bm25_algorithm: BM25L - bm25_parameters: {} - bm25_tokenization_regex: (?u)\b\w\w+\b - embedding_similarity_function: dot_product - index: d8b1f58f-20e9-4a57-a84d-a44fc651de4e - type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore - filter_policy: replace - filters: null - return_embedding: false - scale_score: false - top_k: 10 - type: haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever - text_embedder: - init_parameters: - batch_size: 32 - config_kwargs: null - device: - device: mps - type: single - model: sentence-transformers/all-MiniLM-L6-v2 - model_kwargs: null - normalize_embeddings: false - precision: float32 - prefix: '' - progress_bar: true - suffix: '' - token: - env_vars: - - HF_API_TOKEN - - HF_TOKEN - strict: false - type: env_var - tokenizer_kwargs: null - truncate_dim: null - trust_remote_code: false - type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder -connections: -- receiver: retriever.query_embedding - sender: text_embedder.embedding -- receiver: prompt_builder.documents - sender: retriever.documents -- receiver: llm.prompt - sender: prompt_builder.prompt -max_runs_per_component: 100 -metadata: {} diff --git a/tests/test_files/yaml/working_pipelines/chat_with_website.yml b/tests/test_files/yaml/working_pipelines/chat_with_website.yml deleted file mode 100644 index ba41f11..0000000 --- a/tests/test_files/yaml/working_pipelines/chat_with_website.yml +++ /dev/null @@ -1,51 +0,0 @@ -components: - converter: - type: haystack.components.converters.html.HTMLToDocument - init_parameters: - extraction_kwargs: null - - fetcher: - init_parameters: - raise_on_failure: true - retry_attempts: 2 - timeout: 3 - user_agents: - - haystack/LinkContentFetcher/2.0.0b8 - type: haystack.components.fetchers.link_content.LinkContentFetcher - - llm: - init_parameters: - api_base_url: null - api_key: - env_vars: - - OPENAI_API_KEY - strict: true - type: env_var - generation_kwargs: {} - model: gpt-4o-mini - streaming_callback: null - system_prompt: null - type: haystack.components.generators.openai.OpenAIGenerator - - prompt: - init_parameters: - template: | - "According to the contents of this website: - {% for document in documents %} - {{document.content}} - {% endfor %} - Answer the given question: {{query}} - Answer: - " - required_variables: "*" - type: haystack.components.builders.prompt_builder.PromptBuilder - -connections: - - receiver: converter.sources - sender: fetcher.streams - - receiver: prompt.documents - sender: converter.documents - - receiver: llm.prompt - sender: prompt.prompt - -metadata: {} diff --git a/tests/test_files/yaml/working_pipelines/minimal_retriever.yml b/tests/test_files/yaml/working_pipelines/minimal_retriever.yml deleted file mode 100644 index ad7fb3e..0000000 --- a/tests/test_files/yaml/working_pipelines/minimal_retriever.yml +++ /dev/null @@ -1,79 +0,0 @@ -components: - document_embedder: - init_parameters: - batch_size: 32 - config_kwargs: null - device: - device: cpu - type: single - model: sentence-transformers/paraphrase-MiniLM-L3-v2 - model_kwargs: null - normalize_embeddings: false - precision: float32 - prefix: "" - progress_bar: true - suffix: "" - token: - env_vars: - - HF_API_TOKEN - - HF_TOKEN - strict: false - type: env_var - tokenizer_kwargs: null - truncate_dim: null - trust_remote_code: false - type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder - document_retriever: - init_parameters: - document_store: - init_parameters: - api_key: null - embedding_dim: 384 - force_disable_check_same_thread: false - grpc_port: 6334 - hnsw_config: null - host: null - https: null - index: Document - init_from: null - location: null - metadata: {} - on_disk: false - on_disk_payload: null - optimizers_config: null - path: null - payload_fields_to_index: null - port: 6333 - prefer_grpc: false - prefix: null - progress_bar: true - quantization_config: null - recreate_index: false - replication_factor: null - return_embedding: false - scroll_size: 10000 - shard_number: null - similarity: cosine - sparse_idf: false - timeout: null - url: http://localhost:6333 - use_sparse_embeddings: false - wait_result_from_api: true - wal_config: null - write_batch_size: 100 - write_consistency_factor: null - type: haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore - filter_policy: replace - filters: null - group_by: null - group_size: null - return_embedding: false - scale_score: false - score_threshold: null - top_k: 3 - type: haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever -connections: - - receiver: document_retriever.query_embedding - sender: document_embedder.embedding -max_runs_per_component: 100 -metadata: {} diff --git a/tests/test_files/yaml/working_pipelines/pipeline_qdrant.yml b/tests/test_files/yaml/working_pipelines/pipeline_qdrant.yml deleted file mode 100644 index 247348d..0000000 --- a/tests/test_files/yaml/working_pipelines/pipeline_qdrant.yml +++ /dev/null @@ -1,169 +0,0 @@ -components: - embedder: - init_parameters: - batch_size: 32 - config_kwargs: null - device: - device: cpu - type: single - model: sentence-transformers/all-MiniLM-L6-v2 - model_kwargs: null - normalize_embeddings: false - precision: float32 - prefix: '' - progress_bar: true - suffix: '' - token: - env_vars: - - HF_API_TOKEN - - HF_TOKEN - strict: false - type: env_var - tokenizer_kwargs: null - truncate_dim: null - trust_remote_code: false - type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder - list_to_str_adapter: - init_parameters: - custom_filters: {} - output_type: str - template: '{{ replies[0] }}' - unsafe: false - type: haystack.components.converters.output_adapter.OutputAdapter - llm: - init_parameters: - api_base_url: http://localhost:8000/v1 - api_key: - env_vars: - - OPENAI_API_KEY - strict: true - type: env_var - generation_kwargs: {} - model: mistralai/Mistral-Nemo-Instruct-2407 - organization: null - streaming_callback: null - type: haystack.components.generators.chat.openai.OpenAIChatGenerator - memory_joiner: - init_parameters: - type_: list[haystack.dataclasses.chat_message.ChatMessage] - type: haystack.components.joiners.branch.BranchJoiner - memory_retriever: - init_parameters: - last_k: 10 - message_store: - init_parameters: {} - type: haystack_experimental.chat_message_stores.in_memory.InMemoryChatMessageStore - type: haystack_experimental.components.retrievers.chat_message_retriever.ChatMessageRetriever - memory_writer: - init_parameters: - message_store: - init_parameters: {} - type: haystack_experimental.chat_message_stores.in_memory.InMemoryChatMessageStore - type: haystack_experimental.components.writers.chat_message_writer.ChatMessageWriter - prompt_builder: - init_parameters: - required_variables: &id001 !!python/tuple - - query - - documents - - memories - template: null - variables: *id001 - type: haystack.components.builders.chat_prompt_builder.ChatPromptBuilder - query_rephrase_llm: - init_parameters: - api_base_url: http://localhost:8000/v1 - api_key: - env_vars: - - OPENAI_API_KEY - strict: true - type: env_var - generation_kwargs: {} - model: mistralai/Mistral-Nemo-Instruct-2407 - organization: null - streaming_callback: null - system_prompt: null - type: haystack.components.generators.openai.OpenAIGenerator - query_rephrase_prompt_builder: - init_parameters: - required_variables: "*" - template: "\nRewrite the question for semantic search while keeping its meaning\ - \ and key terms intact.\nIf the conversation history is empty, DO NOT change\ - \ the query.\nDo not translate the question.\nUse conversation history only\ - \ if necessary, and avoid extending the query with your own knowledge.\nIf\ - \ no changes are needed, output the current question as is.\n\nConversation\ - \ history:\n{% for memory in memories %}\n {{ memory.content }}\n{% endfor\ - \ %}\n\nUser Query: {{query}}\nRewritten Query:\n" - variables: null - type: haystack.components.builders.prompt_builder.PromptBuilder - retriever: - init_parameters: - document_store: - init_parameters: - api_key: null - embedding_dim: 768 - force_disable_check_same_thread: false - grpc_port: 6334 - hnsw_config: null - host: null - https: null - index: Document - init_from: null - location: null - metadata: {} - on_disk: false - on_disk_payload: null - optimizers_config: null - path: null - payload_fields_to_index: null - port: 6333 - prefer_grpc: false - prefix: null - progress_bar: false - quantization_config: null - recreate_index: false - replication_factor: null - return_embedding: false - scroll_size: 10000 - shard_number: null - similarity: cosine - sparse_idf: false - timeout: null - url: http://localhost:6333 - use_sparse_embeddings: false - wait_result_from_api: true - wal_config: null - write_batch_size: 100 - write_consistency_factor: null - type: haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore - filter_policy: replace - filters: null - group_by: null - group_size: null - return_embedding: false - scale_score: false - score_threshold: null - top_k: 3 - type: haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever -connections: -- receiver: query_rephrase_llm.prompt - sender: query_rephrase_prompt_builder.prompt -- receiver: list_to_str_adapter.replies - sender: query_rephrase_llm.replies -- receiver: embedder.text - sender: list_to_str_adapter.output -- receiver: retriever.query_embedding - sender: embedder.embedding -- receiver: prompt_builder.documents - sender: retriever.documents -- receiver: llm.messages - sender: prompt_builder.prompt -- receiver: memory_joiner.value - sender: llm.replies -- receiver: query_rephrase_prompt_builder.memories - sender: memory_retriever.messages -- receiver: prompt_builder.memories - sender: memory_retriever.messages -- receiver: memory_writer.messages - sender: memory_joiner.value -max_runs_per_component: 100 -metadata: {} \ No newline at end of file diff --git a/tests/test_files/yaml/working_pipelines/pipeline_qdrant_2.yml b/tests/test_files/yaml/working_pipelines/pipeline_qdrant_2.yml deleted file mode 100644 index 9ae3ff3..0000000 --- a/tests/test_files/yaml/working_pipelines/pipeline_qdrant_2.yml +++ /dev/null @@ -1,46 +0,0 @@ -components: - document_embedder: - init_parameters: - batch_size: 32 - config_kwargs: null - device: - device: cpu - type: single - model: sentence-transformers/paraphrase-MiniLM-L3-v2 - model_kwargs: null - normalize_embeddings: false - precision: float32 - prefix: '' - progress_bar: true - suffix: '' - token: - env_vars: - - HF_API_TOKEN - - HF_TOKEN - strict: false - type: env_var - tokenizer_kwargs: null - truncate_dim: null - trust_remote_code: false - type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder - document_retriever: - init_parameters: - document_store: - init_parameters: - bm25_algorithm: BM25L - bm25_parameters: {} - bm25_tokenization_regex: (?u)\b\w\w+\b - embedding_similarity_function: dot_product - index: b39f1fea-7c83-4fdc-a9e0-928e3d5e4ae7 - type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore - filter_policy: replace - filters: null - return_embedding: false - scale_score: false - top_k: 3 - type: haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever -connections: -- receiver: document_retriever.query_embedding - sender: document_embedder.embedding -max_runs_per_component: 100 -metadata: {} diff --git a/tests/test_files/yaml/working_pipelines/st_retriever.yml b/tests/test_files/yaml/working_pipelines/st_retriever.yml deleted file mode 100644 index 9ae3ff3..0000000 --- a/tests/test_files/yaml/working_pipelines/st_retriever.yml +++ /dev/null @@ -1,46 +0,0 @@ -components: - document_embedder: - init_parameters: - batch_size: 32 - config_kwargs: null - device: - device: cpu - type: single - model: sentence-transformers/paraphrase-MiniLM-L3-v2 - model_kwargs: null - normalize_embeddings: false - precision: float32 - prefix: '' - progress_bar: true - suffix: '' - token: - env_vars: - - HF_API_TOKEN - - HF_TOKEN - strict: false - type: env_var - tokenizer_kwargs: null - truncate_dim: null - trust_remote_code: false - type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder - document_retriever: - init_parameters: - document_store: - init_parameters: - bm25_algorithm: BM25L - bm25_parameters: {} - bm25_tokenization_regex: (?u)\b\w\w+\b - embedding_similarity_function: dot_product - index: b39f1fea-7c83-4fdc-a9e0-928e3d5e4ae7 - type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore - filter_policy: replace - filters: null - return_embedding: false - scale_score: false - top_k: 3 - type: haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever -connections: -- receiver: document_retriever.query_embedding - sender: document_embedder.embedding -max_runs_per_component: 100 -metadata: {} diff --git a/tests/test_files/yaml/working_pipelines/test_pipeline_01.yml b/tests/test_files/yaml/working_pipelines/test_pipeline_01.yml deleted file mode 100644 index 0cb7384..0000000 --- a/tests/test_files/yaml/working_pipelines/test_pipeline_01.yml +++ /dev/null @@ -1,13 +0,0 @@ -components: - first_addition: - init_parameters: - add: 2 - type: haystack.testing.sample_components.add_value.AddFixedValue - double: - init_parameters: {} - type: haystack.testing.sample_components.double.Double -connections: -- receiver: double.value - sender: first_addition.result -max_loops_allowed: 100 -metadata: {} diff --git a/tests/test_files/yaml/working_pipelines/test_pipeline_02.yml b/tests/test_files/yaml/working_pipelines/test_pipeline_02.yml deleted file mode 100644 index a8e7a33..0000000 --- a/tests/test_files/yaml/working_pipelines/test_pipeline_02.yml +++ /dev/null @@ -1,14 +0,0 @@ -components: - hello: - init_parameters: {} - type: hayhooks.testing.components.Hello - fstring: - init_parameters: - template: "This is the greeting: {greeting}!" - variables: ["greeting"] - type: haystack.testing.sample_components.fstring.FString -connections: -- receiver: fstring.greeting - sender: hello.output -max_loops_allowed: 100 -metadata: {} diff --git a/tests/test_handle_callable_type.py b/tests/test_handle_callable_type.py deleted file mode 100644 index 1b68b24..0000000 --- a/tests/test_handle_callable_type.py +++ /dev/null @@ -1,53 +0,0 @@ -from collections.abc import Callable as CallableABC -from typing import Any, Callable, Optional, Union - -import haystack -import pytest - -from hayhooks.server.pipelines.models import get_request_model -from hayhooks.server.utils.create_valid_type import is_callable_type - - -@pytest.mark.parametrize( - "t, expected", - [ - (Callable, True), - (CallableABC, True), - (Callable[[int], str], True), - (Callable[..., Any], True), - (int, False), - (str, False), - (Any, False), - (Union[int, str], False), - (Optional[Callable[[haystack.dataclasses.streaming_chunk.StreamingChunk], type(None)]], True), - ], -) -def test_is_callable_type(t, expected): - assert is_callable_type(t) == expected - - -def test_skip_callables_when_creating_pipeline_models(): - pipeline_name = "test_pipeline" - pipeline_inputs = { - "generator": { - "system_prompt": {"type": Optional[str], "is_mandatory": False, "default_value": None}, - "streaming_callback": { - "type": Optional[Callable[[haystack.dataclasses.streaming_chunk.StreamingChunk], type(None)]], - "is_mandatory": False, - "default_value": None, - }, - "generation_kwargs": { - "type": Optional[dict[str, Any]], - "is_mandatory": False, - "default_value": None, - }, - } - } - - request_model = get_request_model(pipeline_name, pipeline_inputs) - - # This line used to throw an error because the Callable type was not handled correctly - # by the handle_unsupported_types function - assert request_model.model_json_schema() is not None - assert request_model.__name__ == "Test_pipelineRunRequest" - assert "streaming_callback" not in request_model.model_json_schema()["$defs"]["ComponentParams"]["properties"] diff --git a/tests/test_handle_unsupported_types.py b/tests/test_handle_unsupported_types.py deleted file mode 100644 index 439e712..0000000 --- a/tests/test_handle_unsupported_types.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Optional - -from hayhooks.server.utils.create_valid_type import handle_unsupported_types - - -def test_handle_simple_type(): - result = handle_unsupported_types(int, {}) - assert result is int - - -def test_handle_generic_type(): - result = handle_unsupported_types(list[int], {}) - assert result == list[int] - - -def test_handle_recursive_type(): - class Node: - def __init__(self, value: int, next: Optional["Node"] = None): # noqa: A002 - self.value = value - self.next = next - - result = handle_unsupported_types(Node, {}) - assert result == Node - - -def test_handle_circular_reference(): - class A: - def __init__(self, b: "B"): - self.b = b - - class B: - def __init__(self, a: "A"): - self.a = a - - result = handle_unsupported_types(A, {}) - assert result == A # Adjust assertion based on expected behavior - - -def test_handle_nested_generics(): - nested_type = dict[str, list[Optional[int]]] - result = handle_unsupported_types(nested_type, {}) - assert result == nested_type diff --git a/tests/test_it_deploy.py b/tests/test_it_deploy.py index 3b2797a..aa56e2b 100644 --- a/tests/test_it_deploy.py +++ b/tests/test_it_deploy.py @@ -12,13 +12,13 @@ def clear_registry(): # Load pipeline definitions from test_files -test_files = Path(__file__).parent / "test_files/yaml" / "working_pipelines" +test_files = Path(__file__).parent / "test_files/yaml" pipeline_data = [{"name": file.stem, "source_code": file.read_text()} for file in test_files.glob("*.yml")] @pytest.mark.parametrize("pipeline_data", pipeline_data) -def test_deploy_pipeline_def(client, deploy_pipeline, status_pipeline, pipeline_data: dict): - deploy_response = deploy_pipeline(client, pipeline_data["name"], pipeline_data["source_code"]) +def test_deploy_yaml_pipeline(client, deploy_yaml_pipeline, status_pipeline, pipeline_data: dict): + deploy_response = deploy_yaml_pipeline(client, pipeline_data["name"], pipeline_data["source_code"]) assert deploy_response.status_code == 200 status_response = status_pipeline(client, pipeline_data["name"]) @@ -28,11 +28,11 @@ def test_deploy_pipeline_def(client, deploy_pipeline, status_pipeline, pipeline_ assert docs_response.status_code == 200 -def test_undeploy_pipeline_def(client, deploy_pipeline, undeploy_pipeline, status_pipeline): - pipeline_file = Path(__file__).parent / "test_files/yaml" / "working_pipelines/test_pipeline_01.yml" +def test_undeploy_yaml_pipeline(client, deploy_yaml_pipeline, undeploy_pipeline, status_pipeline): + pipeline_file = Path(__file__).parent / "test_files/yaml" / "inputs_outputs_pipeline.yml" pipeline_data = {"name": pipeline_file.stem, "source_code": pipeline_file.read_text()} - deploy_response = deploy_pipeline(client, pipeline_data["name"], pipeline_data["source_code"]) + deploy_response = deploy_yaml_pipeline(client, pipeline_data["name"], pipeline_data["source_code"]) assert deploy_response.status_code == 200 undeploy_response = undeploy_pipeline(client, pipeline_data["name"]) @@ -42,11 +42,11 @@ def test_undeploy_pipeline_def(client, deploy_pipeline, undeploy_pipeline, statu assert status_response.status_code == 404 -def test_undeploy_non_existent_pipeline(client, undeploy_pipeline): +def test_undeploy_non_existent_yaml_pipeline(client, undeploy_pipeline): undeploy_response = undeploy_pipeline(client, "non_existent_pipeline") assert undeploy_response.status_code == 404 -def test_undeploy_no_pipelines(client, undeploy_pipeline): +def test_undeploy_no_yaml_pipelines(client, undeploy_pipeline): undeploy_response = undeploy_pipeline(client, "non_existent_pipeline") assert undeploy_response.status_code == 404 diff --git a/tests/test_it_deploy_yaml_route.py b/tests/test_it_deploy_yaml_route.py index 5e0d74a..03e9785 100644 --- a/tests/test_it_deploy_yaml_route.py +++ b/tests/test_it_deploy_yaml_route.py @@ -5,11 +5,11 @@ SAMPLE_CALC_PIPELINE_PATH = Path(__file__).parent / "test_files" / "yaml" / "sample_calc_pipeline.yml" -def test_deploy_yaml_route_and_run_ok(client): +def test_deploy_yaml_route_and_run_ok(client, deploy_yaml_pipeline): yaml_source = SAMPLE_CALC_PIPELINE_PATH.read_text().strip() # Deploy via the new route - response = client.post("/deploy-yaml", json={"name": "calc", "source_code": yaml_source, "overwrite": True}) + response = deploy_yaml_pipeline(client, "calc", yaml_source) assert response.status_code == 200 assert response.json() == DeployResponse(name="calc", success=True, endpoint="/calc/run").model_dump() @@ -27,7 +27,7 @@ def test_deploy_yaml_route_and_run_ok(client): assert run_response.status_code == 200 # (3 + 2) * 2 = 10 - assert run_response.json() == {"result": 10} + assert run_response.json() == {"result": {"double": {"value": 10}}} def test_deploy_yaml_saves_file(client, test_settings): diff --git a/tests/test_it_handling_deploy_exceptions.py b/tests/test_it_handling_deploy_exceptions.py index 67ad32a..6d666a6 100644 --- a/tests/test_it_handling_deploy_exceptions.py +++ b/tests/test_it_handling_deploy_exceptions.py @@ -1,12 +1,10 @@ from pathlib import Path -def test_gracefully_handle_deploy_exception(client, deploy_pipeline): +def test_gracefully_handle_deploy_exception(client, deploy_yaml_pipeline): pipeline_name = "broken_rag_pipeline" - pipeline_def = (Path(__file__).parent / "test_files/yaml" / "broken_rag_pipeline.yml").read_text() + pipeline_source_code = (Path(__file__).parent / "test_files/yaml/broken/broken_rag_pipeline.yml").read_text() - deploy_response = deploy_pipeline(client, pipeline_name, pipeline_def) - # NOTE: The deprecated deploy method returns 409 for any ValueError (incorrectly) - # We can simply check that the status code is not 200. - assert deploy_response.status_code != 200 + deploy_response = deploy_yaml_pipeline(client, pipeline_name, pipeline_source_code) + assert deploy_response.status_code == 500 assert "Couldn't deserialize component 'llm'" in deploy_response.json()["detail"] diff --git a/tests/test_it_status.py b/tests/test_it_status.py index e4f3f4d..53c9f62 100644 --- a/tests/test_it_status.py +++ b/tests/test_it_status.py @@ -16,11 +16,11 @@ def test_status_all_pipelines(client, status_pipeline): assert "pipelines" in status_response.json() -def test_status_single_pipeline(client, deploy_pipeline, status_pipeline): - pipeline_file = Path(__file__).parent / "test_files/yaml" / "working_pipelines/test_pipeline_01.yml" +def test_status_single_pipeline(client, deploy_yaml_pipeline, status_pipeline): + pipeline_file = Path(__file__).parent / "test_files/yaml" / "inputs_outputs_pipeline.yml" pipeline_data = {"name": pipeline_file.stem, "source_code": pipeline_file.read_text()} - deploy_response = deploy_pipeline(client, pipeline_data["name"], pipeline_data["source_code"]) + deploy_response = deploy_yaml_pipeline(client, pipeline_data["name"], pipeline_data["source_code"]) assert deploy_response.status_code == 200 status_response = status_pipeline(client, pipeline_data["name"]) diff --git a/tests/test_registry.py b/tests/test_registry.py index 46d0ac5..0856d3d 100644 --- a/tests/test_registry.py +++ b/tests/test_registry.py @@ -15,7 +15,7 @@ def pipeline_registry(): @pytest.fixture def sample_pipeline_yaml(): - return (Path(__file__).parent / "test_files/yaml" / "working_pipelines" / "basic_rag_pipeline.yml").read_text() + return (Path(__file__).parent / "test_files/yaml" / "inputs_outputs_pipeline.yml").read_text() @pytest.fixture diff --git a/tests/test_undeploy.py b/tests/test_undeploy.py index 276f1be..b4d493e 100644 --- a/tests/test_undeploy.py +++ b/tests/test_undeploy.py @@ -12,29 +12,32 @@ } -def test_undeploy_standard_pipeline(client: TestClient, deploy_pipeline, undeploy_pipeline): - deploy_response = deploy_pipeline( +def test_undeploy_yaml_pipeline(client: TestClient, deploy_yaml_pipeline, undeploy_pipeline): + pipeline_file = Path(__file__).parent / "test_files/yaml" / "inputs_outputs_pipeline.yml" + pipeline_data = {"name": pipeline_file.stem, "source_code": pipeline_file.read_text()} + + deploy_response = deploy_yaml_pipeline( client, - pipeline_name="test_undeploy_pipeline", - pipeline_source_code=SAMPLE_PIPELINE_FILES["chat_with_website.yml"], + pipeline_name=pipeline_data["name"], + pipeline_source_code=pipeline_data["source_code"], ) assert deploy_response.status_code == 200 - assert deploy_response.json()["name"] == "test_undeploy_pipeline" + assert deploy_response.json()["name"] == pipeline_data["name"] # Verify pipeline exists in registry - assert "test_undeploy_pipeline" in registry.get_names() + assert pipeline_data["name"] in registry.get_names() # Undeploy the pipeline - undeploy_response = undeploy_pipeline(client, pipeline_name="test_undeploy_pipeline") + undeploy_response = undeploy_pipeline(client, pipeline_name=pipeline_data["name"]) assert undeploy_response.status_code == 200 assert undeploy_response.json()["success"] is True - assert undeploy_response.json()["name"] == "test_undeploy_pipeline" + assert undeploy_response.json()["name"] == pipeline_data["name"] # Verify pipeline no longer exists in registry - assert "test_undeploy_pipeline" not in registry.get_names() + assert pipeline_data["name"] not in registry.get_names() # Verify pipeline endpoint no longer exists - response = client.post("/test_undeploy_pipeline", json={}) + response = client.post(f"/{pipeline_data['name']}/run", json={}) assert response.status_code == 404 From 48e7428542b8e8c9ba9ed7545a47b586a4b3e8d7 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Fri, 12 Sep 2025 10:58:54 +0200 Subject: [PATCH 16/42] Remove old CLI deploy command --- src/hayhooks/cli/pipeline.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/hayhooks/cli/pipeline.py b/src/hayhooks/cli/pipeline.py index f1dd03d..e184210 100644 --- a/src/hayhooks/cli/pipeline.py +++ b/src/hayhooks/cli/pipeline.py @@ -38,25 +38,6 @@ def _deploy_with_progress(ctx: typer.Context, name: str, endpoint: str, payload: show_error_and_abort(f"Pipeline '[bold]{name}[/bold]' already exists! ⚠️") -@pipeline.command() -def deploy( - ctx: typer.Context, - name: Annotated[Optional[str], typer.Option("--name", "-n", help="The name of the pipeline to deploy.")], - pipeline_file: Path = typer.Argument( # noqa: B008 - help="The path to the pipeline file to deploy." - ), -) -> None: - """Deploy a pipeline to the Hayhooks server.""" - if not pipeline_file.exists(): - show_error_and_abort("Pipeline file does not exist.", str(pipeline_file)) - - if name is None: - name = pipeline_file.stem - - payload = {"name": name, "source_code": pipeline_file.read_text()} - _deploy_with_progress(ctx=ctx, name=name, endpoint="deploy", payload=payload) - - @pipeline.command(name="deploy-yaml") def deploy_yaml( # noqa: PLR0913 ctx: typer.Context, From 487cb63d945cbc7e00c81e9fe4db99839731d01d Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Fri, 12 Sep 2025 11:18:45 +0200 Subject: [PATCH 17/42] Add CLI alias: deploy -> deploy-files --- src/hayhooks/cli/pipeline.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hayhooks/cli/pipeline.py b/src/hayhooks/cli/pipeline.py index e184210..a651fd6 100644 --- a/src/hayhooks/cli/pipeline.py +++ b/src/hayhooks/cli/pipeline.py @@ -119,6 +119,10 @@ def deploy_files( _deploy_with_progress(ctx=ctx, name=name, endpoint="deploy_files", payload=payload) +# Register alias: `deploy` -> `deploy-files` +pipeline.command(name="deploy")(deploy_files) + + @pipeline.command() def undeploy( ctx: typer.Context, From b1af4d05fa92325a880ebdcc8b721c84bf1f4492 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Fri, 12 Sep 2025 15:35:06 +0200 Subject: [PATCH 18/42] Update README --- README.md | 80 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 5471829..d48782f 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ With Hayhooks, you can: - [Async Run API Method](#run_api_async) - [PipelineWrapper development with `overwrite` option](#pipelinewrapper-development-with-overwrite-option) - [Additional Dependencies](#additional-dependencies) +- [Deploy a YAML Pipeline](#deploy-a-yaml-pipeline) - [Deploy an Agent](#deploy-an-agent) - [Support file uploads](#support-file-uploads) - [Run pipelines from the CLI](#run-pipelines-from-the-cli) @@ -61,15 +62,13 @@ With Hayhooks, you can: - [Run Hayhooks Programmatically](#run-hayhooks-programmatically) - [Sharing code between pipeline wrappers](#sharing-code-between-pipeline-wrappers) - [Deployment Guidelines](#deployment-guidelines) -- [Legacy Features](#legacy-features) - - [Deploy Pipeline Using YAML](#deploy-a-pipeline-using-only-its-yaml-definition) - [License](#license) ## Quick start with Docker Compose To quickly get started with Hayhooks, we provide a ready-to-use Docker Compose 🐳 setup with pre-configured integration with [open-webui](https://openwebui.com/). -It's available [here](https://github.com/deepset-ai/hayhooks-open-webui-docker-compose). +It's available in the [Hayhooks + Open WebUI Docker Compose repository](https://github.com/deepset-ai/hayhooks-open-webui-docker-compose). ## Quick start @@ -162,8 +161,9 @@ CLI commands are basically wrappers around the HTTP API of the server. The full hayhooks run # Start the server hayhooks status # Check the status of the server and show deployed pipelines -hayhooks pipeline deploy-files # Deploy a pipeline using PipelineWrapper -hayhooks pipeline deploy # Deploy a pipeline from a YAML file +hayhooks pipeline deploy-yaml # Deploy a pipeline from a YAML file (preferred) +hayhooks pipeline deploy-files # Deploy a pipeline using PipelineWrapper files +hayhooks pipeline deploy # Alias for deploy-files hayhooks pipeline undeploy # Undeploy a pipeline hayhooks pipeline run # Run a pipeline ``` @@ -195,7 +195,7 @@ The pipeline wrapper provides a flexible foundation for deploying Haystack pipel - Define custom execution logic with configurable inputs and outputs - Optionally expose OpenAI-compatible chat endpoints with streaming support for integration with interfaces like [open-webui](https://openwebui.com/) -The `pipeline_wrapper.py` file must contain an implementation of the `BasePipelineWrapper` class (see [here](src/hayhooks/server/utils/base_pipeline_wrapper.py) for more details). +The `pipeline_wrapper.py` file must contain an implementation of the `BasePipelineWrapper` class (see [BasePipelineWrapper source](src/hayhooks/server/utils/base_pipeline_wrapper.py) for more details). A minimal `PipelineWrapper` looks like this: @@ -274,6 +274,8 @@ hayhooks pipeline deploy-files -n chat_with_website examples/pipeline_wrappers/c This will deploy the pipeline with the name `chat_with_website`. Any error encountered during development will be printed to the console and show in the server logs. +Alternatively, you can deploy via HTTP: `POST /deploy_files` (CLI alias: `hayhooks pipeline deploy`). + #### PipelineWrapper development with `overwrite` option During development, you can use the `--overwrite` flag to redeploy your pipeline without restarting the Hayhooks server: @@ -318,6 +320,54 @@ Then, assuming you've installed the Hayhooks package in a virtual environment, y pip install trafilatura ``` +## Deploy a YAML Pipeline + +You can deploy a Haystack pipeline directly from its YAML definition using the preferred `/deploy-yaml` endpoint. This mode builds request/response schemas from the YAML-declared `inputs` and `outputs`. + +Note: You can also deploy YAML pipelines from the CLI with `hayhooks pipeline deploy-yaml`. Wrapper-based deployments continue to use `/deploy_files` or the CLI alias `hayhooks pipeline deploy`. + +Tip: You can obtain a pipeline's YAML from an existing `Pipeline` instance using `pipeline.dumps()`. See the [Haystack serialization docs](https://docs.haystack.deepset.ai/docs/serialization) for details. + +Requirements: + +- The YAML must declare both `inputs` and `outputs` fields so the API request/response schemas can be generated. +- `inputs`/`outputs` entries map friendly names to pipeline component fields (e.g. `fetcher.urls`, `prompt.query`). + +Minimal example: + +```yaml +# ... pipeline definition ... + +inputs: + urls: + - fetcher.urls + query: + - prompt.query +outputs: + replies: llm.replies +``` + +CLI: + +```shell +hayhooks pipeline deploy-yaml -n inputs_outputs_pipeline pipelines/inputs_outputs_pipeline.yml +``` + +Alternatively, you can deploy via HTTP: `POST /deploy-yaml`. + +If successful, the server exposes a run endpoint at `/{name}/run` with a request/response schema derived from the YAML IO. For example: + +```shell +curl -X POST \ + http://HAYHOOKS_HOST:HAYHOOKS_PORT/inputs_outputs_pipeline/run \ + -H 'Content-Type: application/json' \ + -d '{"urls": ["https://haystack.deepset.ai"], "query": "What is Haystack?"}' +``` + +Limitations: + +- YAML-deployed pipelines do not support OpenAI-compatible chat completion endpoints, so they cannot be used with Open WebUI. If you need chat completion/streaming, use a `PipelineWrapper` and implement `run_chat_completion` or `run_chat_completion_async` (see the OpenAI compatibility section below). + ## Deploy an Agent Deploying a [Haystack Agent](https://docs.haystack.deepset.ai/docs/agents) is very similar to deploying a pipeline. @@ -971,24 +1021,6 @@ We have some dedicated documentation for deployment: We also have some additional deployment guidelines, see [deployment_guidelines.md](docs/deployment_guidelines.md). -### Legacy Features - -#### Deploy a pipeline using only its YAML definition - -**⚠️ This way of deployment is not maintained anymore and will be deprecated in the future**. - -We're still supporting the Hayhooks _former_ way to deploy a pipeline. - -The former command `hayhooks deploy` is now changed to `hayhooks pipeline deploy` and can be used to deploy a pipeline only from a YAML definition file. - -For example: - -```shell -hayhooks pipeline deploy -n chat_with_website examples/pipeline_wrappers/chat_with_website/chat_with_website.yml -``` - -This will deploy the pipeline with the name `chat_with_website` from the YAML definition file `examples/pipeline_wrappers/chat_with_website/chat_with_website.yml`. You then can check the generated docs at `http://HAYHOOKS_HOST:HAYHOOKS_PORT/docs` or `http://HAYHOOKS_HOST:HAYHOOKS_PORT/redoc`, looking at the `POST /chat_with_website` endpoint. - ### License This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details. From 6ca670e8ffa2eeb89cd130599cb7120101571598 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Mon, 15 Sep 2025 11:49:42 +0200 Subject: [PATCH 19/42] Use AsyncPipeline when loading YAML pipelines (to avoid using run_in_threadpool when running it) --- src/hayhooks/server/utils/deploy_utils.py | 14 ++++++++------ tests/test_deploy_yaml.py | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index c732161..0d1cbad 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -314,11 +314,11 @@ def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str) -> None: raise PipelineNotFoundError(msg) # Ensure the registered object is a Haystack Pipeline, not a wrapper - if not isinstance(pipeline_instance, (Pipeline, AsyncPipeline)): - msg = f"Pipeline '{pipeline_name}' is not a Haystack Pipeline instance" + if not isinstance(pipeline_instance, AsyncPipeline): + msg = f"Pipeline '{pipeline_name}' is not a Haystack AsyncPipeline instance" raise PipelineYamlError(msg) - pipeline: Union[Pipeline, AsyncPipeline] = pipeline_instance + pipeline: AsyncPipeline = pipeline_instance metadata = registry.get_metadata(pipeline_name) or {} PipelineRunRequest = metadata.get("request_model") @@ -330,7 +330,7 @@ def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str) -> None: @handle_pipeline_exceptions() async def pipeline_run(run_req: PipelineRunRequest) -> PipelineRunResponse: # type:ignore[valid-type] - result = await run_in_threadpool(pipeline.run, data=run_req.model_dump()) # type: ignore[attr-defined] + result = await pipeline.run_async(data=run_req.model_dump()) # type: ignore[attr-defined] return PipelineRunResponse(result=result) # Clear existing YAML run route if it exists (old or new path) @@ -475,10 +475,12 @@ def add_yaml_pipeline_to_registry( clog.debug(f"Adding YAML pipeline to registry with metadata: {metadata}") # Store the instantiated pipeline together with its metadata + # NOTE: We want to create an AsyncPipeline here so we can avoid using + # run_in_threadpool when running the pipeline. try: - from haystack import Pipeline + from haystack import AsyncPipeline - pipeline = Pipeline.loads(source_code) + pipeline = AsyncPipeline.loads(source_code) except Exception as e: msg = f"Unable to parse Haystack Pipeline {pipeline_name}: {e!s}" raise ValueError(msg) from e diff --git a/tests/test_deploy_yaml.py b/tests/test_deploy_yaml.py index 4d19d06..1d099ff 100644 --- a/tests/test_deploy_yaml.py +++ b/tests/test_deploy_yaml.py @@ -1,9 +1,19 @@ from pathlib import Path +import pytest +from haystack import AsyncPipeline + from hayhooks.server.pipelines.registry import registry from hayhooks.server.utils.deploy_utils import add_yaml_pipeline_to_registry +@pytest.fixture(autouse=True) +def cleanup_test_pipelines(): + yield + for pipeline_name in registry.get_names(): + registry.remove(pipeline_name) + + def test_deploy_pipeline_with_inputs_outputs(): pipeline_file = Path(__file__).parent / "test_files/yaml/inputs_outputs_pipeline.yml" pipeline_data = { @@ -53,3 +63,14 @@ def test_deploy_pipeline_with_inputs_outputs(): "type": "object", "title": "Inputs_outputs_pipelineRunResponse", } + + +def test_yaml_pipeline_is_async_pipeline(): + pipeline_file = Path(__file__).parent / "test_files/yaml/inputs_outputs_pipeline.yml" + pipeline_name = pipeline_file.stem + source_code = pipeline_file.read_text() + + add_yaml_pipeline_to_registry(pipeline_name=pipeline_name, source_code=source_code) + + pipeline_instance = registry.get(pipeline_name) + assert isinstance(pipeline_instance, AsyncPipeline) From c94d0ef4abcae5b036aed67f471a8548dfc9a792 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Mon, 15 Sep 2025 11:50:21 +0200 Subject: [PATCH 20/42] Fix lint --- src/hayhooks/server/utils/deploy_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 0d1cbad..64bfb3e 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -13,7 +13,7 @@ from fastapi import FastAPI, Form, HTTPException from fastapi.concurrency import run_in_threadpool from fastapi.routing import APIRoute -from haystack import AsyncPipeline, Pipeline +from haystack import AsyncPipeline from pydantic import BaseModel from hayhooks.server.exceptions import ( From 4446dd8087eaf8349809321567ebe9c8b5b39839 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Mon, 15 Sep 2025 12:19:20 +0200 Subject: [PATCH 21/42] Add a section for loading pipelines or agents at startup --- README.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/README.md b/README.md index d48782f..8091733 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ With Hayhooks, you can: - [Additional Dependencies](#additional-dependencies) - [Deploy a YAML Pipeline](#deploy-a-yaml-pipeline) - [Deploy an Agent](#deploy-an-agent) +- [Load pipelines or agents at startup](#load-pipelines-or-agents-at-startup) - [Support file uploads](#support-file-uploads) - [Run pipelines from the CLI](#run-pipelines-from-the-cli) - [Run a pipeline from the CLI JSON-compatible parameters](#run-a-pipeline-from-the-cli-json-compatible-parameters) @@ -408,6 +409,41 @@ As you can see, the `run_chat_completion_async` method is the one that will be u The `async_streaming_generator` function is a utility function that [will handle the streaming of the agent's responses](#async_streaming_generator). +## Load pipelines or agents at startup + +Hayhooks can automatically deploy pipelines or agents on startup by scanning a pipelines directory. + +- Set `HAYHOOKS_PIPELINES_DIR` (defaults to `./pipelines`). +- On startup, Hayhooks will: + - Deploy every YAML file at the directory root (`*.yml`/`*.yaml`) using the file name as the pipeline name. + - Deploy every immediate subfolder as a wrapper-based pipeline/agent if it contains a `pipeline_wrapper.py`. + +Example layout: + +```text +my-project/ +β”œβ”€β”€ .env +└── pipelines/ + β”œβ”€β”€ inputs_outputs_pipeline.yml # YAML-only pipeline -> POST /inputs_outputs_pipeline/run + β”œβ”€β”€ chat_with_website/ # Wrapper-based pipeline -> POST /chat_with_website/run (+ chat endpoints if implemented) + β”‚ β”œβ”€β”€ pipeline_wrapper.py + β”‚ └── chat_with_website.yml + └── agent_streaming/ + └── pipeline_wrapper.py +``` + +Configure via environment or `.env`: + +```shell +# .env +HAYHOOKS_PIPELINES_DIR=./pipelines +``` + +Notes: + +- YAML-deployed pipelines require `inputs` and `outputs` in the YAML and do not expose OpenAI-compatible chat endpoints. For chat/streaming, use a `PipelineWrapper` and implement `run_chat_completion`/`run_chat_completion_async`. +- If your wrappers import shared code, set `HAYHOOKS_ADDITIONAL_PYTHON_PATH` (see β€œSharing code between pipeline wrappers”). + ## Support file uploads Hayhooks can easily handle uploaded files in your pipeline wrapper `run_api` method by adding `files: Optional[List[UploadFile]] = None` as an argument. From d9420c266884e14189b3e1f142bb4941cf937bf2 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Mon, 15 Sep 2025 16:55:07 +0200 Subject: [PATCH 22/42] Enable YAML pipelines as MCP tools --- src/hayhooks/server/utils/mcp_utils.py | 29 +++++++++++------- tests/test_it_mcp_server.py | 41 +++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/hayhooks/server/utils/mcp_utils.py b/src/hayhooks/server/utils/mcp_utils.py index 928e25a..e4cd83a 100644 --- a/src/hayhooks/server/utils/mcp_utils.py +++ b/src/hayhooks/server/utils/mcp_utils.py @@ -1,4 +1,5 @@ import asyncio +import json import traceback from collections.abc import AsyncIterator from contextlib import asynccontextmanager @@ -7,6 +8,7 @@ from typing import Union from fastapi.concurrency import run_in_threadpool +from haystack import AsyncPipeline from haystack.lazy_imports import LazyImport from starlette.applications import Starlette from starlette.responses import JSONResponse @@ -147,20 +149,25 @@ async def run_pipeline_as_tool(name: str, arguments: dict) -> list["TextContent" msg = f"Pipeline '{name}' not found" raise ValueError(msg) - # Only BasePipelineWrapper instances support run_api/run_api_async methods - if not isinstance(pipeline, BasePipelineWrapper): - msg = f"Pipeline '{name}' is not a BasePipelineWrapper and cannot be used as an MCP tool" - raise ValueError(msg) + if isinstance(pipeline, BasePipelineWrapper): + if pipeline._is_run_api_async_implemented: + result = await pipeline.run_api_async(**arguments) + else: + result = await run_in_threadpool(pipeline.run_api, **arguments) - # Use the same async/sync pattern as in deploy_utils.py - if pipeline._is_run_api_async_implemented: - result = await pipeline.run_api_async(**arguments) - else: - result = await run_in_threadpool(pipeline.run_api, **arguments) + log.trace(f"Pipeline '{name}' returned result: {result}") + return [TextContent(text=result, type="text")] - log.trace(f"Pipeline '{name}' returned result: {result}") + if isinstance(pipeline, AsyncPipeline): + result = await pipeline.run_async(data=arguments) + log.trace(f"YAML Pipeline '{name}' returned result: {result}") + return [TextContent(text=json.dumps(result), type="text")] - return [TextContent(text=result, type="text")] + msg = ( + f"Pipeline '{name}' is not a supported type for MCP tools. " + "Expected a BasePipelineWrapper or AsyncPipeline instance." + ) + raise ValueError(msg) async def notify_client(server: "Server") -> None: diff --git a/tests/test_it_mcp_server.py b/tests/test_it_mcp_server.py index fb35c22..39a361e 100644 --- a/tests/test_it_mcp_server.py +++ b/tests/test_it_mcp_server.py @@ -5,7 +5,7 @@ import pytest from hayhooks.server.pipelines import registry -from hayhooks.server.utils.deploy_utils import add_pipeline_wrapper_to_registry +from hayhooks.server.utils.deploy_utils import add_pipeline_wrapper_to_registry, add_yaml_pipeline_to_registry from hayhooks.server.utils.mcp_utils import CoreTools, create_mcp_server MCP_AVAILABLE = importlib.util.find_spec("mcp") is not None @@ -159,6 +159,45 @@ async def test_call_pipeline_as_tool_with_invalid_pipeline_name(mcp_server_insta assert "Pipeline 'invalid_pipeline_name' not found" in text_response +@pytest.fixture +def deploy_yaml_calc_pipeline(): + pipeline_name = "calc" + yaml_path = Path("tests/test_files/yaml/sample_calc_pipeline.yml") + add_yaml_pipeline_to_registry(pipeline_name=pipeline_name, source_code=yaml_path.read_text()) + return pipeline_name + + +@pytest.mark.asyncio +async def test_list_tools_with_yaml_pipeline_deployed(mcp_server_instance, deploy_yaml_calc_pipeline): + async with client_session(mcp_server_instance) as client: + list_tools_result = await client.list_tools() + + # Core tools + 1 YAML pipeline tool + assert len(list_tools_result.tools) == len(CoreTools) + 1 + + # Find YAML pipeline tool and verify basic schema + pipeline_tool = next((t for t in list_tools_result.tools if t.name == deploy_yaml_calc_pipeline), None) + assert pipeline_tool is not None + assert pipeline_tool.inputSchema["type"] == "object" + assert "value" in pipeline_tool.inputSchema["properties"] + + +@pytest.mark.asyncio +async def test_call_yaml_pipeline_as_tool(mcp_server_instance, deploy_yaml_calc_pipeline): + async with client_session(mcp_server_instance) as client: + result = await client.call_tool(deploy_yaml_calc_pipeline, {"value": 3}) + + assert isinstance(result, CallToolResult) + assert result.isError is False + + # YAML pipelines return JSON text content; parse and assert + payload = result.content[0].text + import json + + parsed = json.loads(payload) + assert parsed == {"double": {"value": 10}} + + @pytest.mark.asyncio async def test_ensure_send_tool_list_changed_notification_after_deploy_or_undeploy(mcp_server_instance): with patch("hayhooks.server.utils.mcp_utils.notify_client") as mock_notify_client: From 1593afd1989577e2b50041a3ea2de58d97adacec Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 09:41:48 +0200 Subject: [PATCH 23/42] Update README --- README.md | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8091733..97b8847 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ With Hayhooks, you can: - [MCP support](#mcp-support) - [MCP Server](#mcp-server) - [Create a PipelineWrapper for exposing a Haystack pipeline as a MCP Tool](#create-a-pipelinewrapper-for-exposing-a-haystack-pipeline-as-a-mcp-tool) + - [Expose a YAML pipeline as a MCP Tool](#expose-a-yaml-pipeline-as-a-mcp-tool) - [Using Hayhooks MCP Server with Claude Desktop](#using-hayhooks-mcp-server-with-claude-desktop) - [Using Hayhooks Core MCP Tools in IDEs like Cursor](#using-hayhooks-core-mcp-tools-in-ides-like-cursor) - [Development and deployment of Haystack pipelines directly from Cursor](#development-and-deployment-of-haystack-pipelines-directly-from-cursor) @@ -351,7 +352,7 @@ outputs: CLI: ```shell -hayhooks pipeline deploy-yaml -n inputs_outputs_pipeline pipelines/inputs_outputs_pipeline.yml +hayhooks pipeline deploy-yaml -n inputs_outputs_pipeline --description "My pipeline" pipelines/inputs_outputs_pipeline.yml ``` Alternatively, you can deploy via HTTP: `POST /deploy-yaml`. @@ -369,6 +370,14 @@ Limitations: - YAML-deployed pipelines do not support OpenAI-compatible chat completion endpoints, so they cannot be used with Open WebUI. If you need chat completion/streaming, use a `PipelineWrapper` and implement `run_chat_completion` or `run_chat_completion_async` (see the OpenAI compatibility section below). +Available CLI options for `hayhooks pipeline deploy-yaml`: + +- `--name, -n`: override the pipeline name (default: YAML file stem) +- `--description`: optional human-readable description (used in MCP tool listing) +- `--overwrite, -o`: overwrite if the pipeline already exists +- `--skip-mcp`: skip exposing this pipeline as an MCP Tool +- `--save-file/--no-save-file`: save the YAML under `pipelines/{name}.yml` on the server (default: `--save-file`) + ## Deploy an Agent Deploying a [Haystack Agent](https://docs.haystack.deepset.ai/docs/agents) is very similar to deploying a pipeline. @@ -531,6 +540,33 @@ hayhooks mcp run This will start the Hayhooks MCP Server on `HAYHOOKS_MCP_HOST:HAYHOOKS_MCP_PORT`. +### Expose a YAML pipeline as a MCP Tool + +Hayhooks can expose YAML-deployed pipelines as MCP Tools. When you deploy a pipeline via `/deploy-yaml` (or the CLI `hayhooks pipeline deploy-yaml`), Hayhooks: + +- Builds flat request/response models from YAML-declared `inputs` and `outputs`. +- Registers the pipeline as an `AsyncPipeline` and adds it to the registry with metadata required for MCP Tools. +- Lists it in MCP `list_tools()` with: + - `name`: the pipeline name (YAML file stem or provided `--name`) + - `description`: the optional description you pass during deployment (defaults to the pipeline name) + - `inputSchema`: JSON schema derived from YAML `inputs` + +Calling a YAML pipeline via MCP `call_tool` executes the pipeline asynchronously and returns the pipeline result as a JSON string in `TextContent`. + +Example (Streamable HTTP via MCP client): + +```python +tools = await client.list_tools() +# Find YAML tool by name, e.g., "calc" (the pipeline name) +result = await client.call_tool("calc", {"value": 3}) +assert result.content[0].text == '{"double": {"value": 10}}' +``` + +Notes and limitations: + +- YAML pipelines must declare `inputs` and `outputs`. +- YAML pipelines are run-only via MCP and return JSON text; if you need OpenAI-compatible chat endpoints or streaming, use a `PipelineWrapper` and implement `run_chat_completion`/`run_chat_completion_async`. + ### Create a PipelineWrapper for exposing a Haystack pipeline as a MCP Tool A [MCP Tool](https://modelcontextprotocol.io/docs/concepts/tools) requires the following properties: From 12e210bef69aa30e5a5367e31a8c351c54154fa3 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 15:51:40 +0200 Subject: [PATCH 24/42] Update README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 97b8847..b4404ce 100644 --- a/README.md +++ b/README.md @@ -163,8 +163,8 @@ CLI commands are basically wrappers around the HTTP API of the server. The full hayhooks run # Start the server hayhooks status # Check the status of the server and show deployed pipelines -hayhooks pipeline deploy-yaml # Deploy a pipeline from a YAML file (preferred) -hayhooks pipeline deploy-files # Deploy a pipeline using PipelineWrapper files +hayhooks pipeline deploy-files # Deploy a pipeline using PipelineWrapper files (preferred) +hayhooks pipeline deploy-yaml # Deploy a pipeline from a YAML file hayhooks pipeline deploy # Alias for deploy-files hayhooks pipeline undeploy # Undeploy a pipeline hayhooks pipeline run # Run a pipeline From bd0a0603af4b7f15ff4a41d28471792b87d36d79 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 15:53:12 +0200 Subject: [PATCH 25/42] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b4404ce..99041fe 100644 --- a/README.md +++ b/README.md @@ -332,7 +332,7 @@ Tip: You can obtain a pipeline's YAML from an existing `Pipeline` instance using Requirements: -- The YAML must declare both `inputs` and `outputs` fields so the API request/response schemas can be generated. +- The YAML must declare both `inputs` and `outputs` fields so the API request/response schemas can be generated. If you have generated the YAML from a `Pipeline` using `pipeline.dumps()`, you will need to add the `inputs` and `outputs` fields _manually_. - `inputs`/`outputs` entries map friendly names to pipeline component fields (e.g. `fetcher.urls`, `prompt.query`). Minimal example: From 04f961e118a4cf3aead9f2691855c9df0c65c6eb Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 15:56:18 +0200 Subject: [PATCH 26/42] Refactor: add skip_mcp to payload --- src/hayhooks/cli/pipeline.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/hayhooks/cli/pipeline.py b/src/hayhooks/cli/pipeline.py index a651fd6..d736039 100644 --- a/src/hayhooks/cli/pipeline.py +++ b/src/hayhooks/cli/pipeline.py @@ -74,14 +74,12 @@ def deploy_yaml( # noqa: PLR0913 "source_code": pipeline_file.read_text(), "overwrite": overwrite, "save_file": save_file, + "skip_mcp": skip_mcp, } if description is not None: payload["description"] = description - # Always include skip_mcp flag (defaults to False) - payload["skip_mcp"] = skip_mcp - _deploy_with_progress(ctx=ctx, name=name, endpoint="deploy-yaml", payload=payload) From c26281b9c17319a3d53a5775c0694e2b4738dfef Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 16:01:35 +0200 Subject: [PATCH 27/42] Remove unneeded import --- src/hayhooks/server/utils/deploy_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 64bfb3e..58b18d6 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -478,8 +478,6 @@ def add_yaml_pipeline_to_registry( # NOTE: We want to create an AsyncPipeline here so we can avoid using # run_in_threadpool when running the pipeline. try: - from haystack import AsyncPipeline - pipeline = AsyncPipeline.loads(source_code) except Exception as e: msg = f"Unable to parse Haystack Pipeline {pipeline_name}: {e!s}" From 8df9812915073e30ae9e7667002415ee7ed7abd6 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 16:17:32 +0200 Subject: [PATCH 28/42] Restore disabled tests --- tests/test_deploy_at_startup.py | 4 +--- tests/test_files/mixed/basic_rag_pipeline.yml | 9 +++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/test_deploy_at_startup.py b/tests/test_deploy_at_startup.py index 9d87f40..5ba436d 100644 --- a/tests/test_deploy_at_startup.py +++ b/tests/test_deploy_at_startup.py @@ -84,16 +84,14 @@ def test_app_loads_pipeline_from_files_directory(test_client_files, test_files_p assert "chat_with_website" in pipelines -@pytest.mark.skip(reason="To be reviewed when old YAML deployment is removed") def test_app_loads_pipeline_from_yaml_directory(test_client_yaml, test_yaml_pipelines_dir): response = test_client_yaml.get("/status") assert response.status_code == 200 pipelines = response.json()["pipelines"] - assert len(pipelines) == len(list(test_yaml_pipelines_dir.rglob("*"))) + assert len(pipelines) == 2 -@pytest.mark.skip(reason="To be reviewed when old YAML deployment is removed") def test_app_loads_pipeline_from_mixed_directory(test_client_mixed, test_mixed_pipelines_dir): response = test_client_mixed.get("/status") assert response.status_code == 200 diff --git a/tests/test_files/mixed/basic_rag_pipeline.yml b/tests/test_files/mixed/basic_rag_pipeline.yml index 7118499..072d587 100644 --- a/tests/test_files/mixed/basic_rag_pipeline.yml +++ b/tests/test_files/mixed/basic_rag_pipeline.yml @@ -70,3 +70,12 @@ connections: sender: prompt_builder.prompt max_runs_per_component: 100 metadata: {} + + +inputs: + question: + - llm.prompt + +outputs: + answer: + - llm.response From a2e7106288549fadfe92b0f7ce6d372935dad5ee Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 16:25:29 +0200 Subject: [PATCH 29/42] Re-added required_variables on prompt_builder component --- tests/test_deploy_yaml.py | 3 +-- tests/test_files/yaml/inputs_outputs_pipeline.yml | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_deploy_yaml.py b/tests/test_deploy_yaml.py index 1d099ff..00a5ea0 100644 --- a/tests/test_deploy_yaml.py +++ b/tests/test_deploy_yaml.py @@ -41,11 +41,10 @@ def test_deploy_pipeline_with_inputs_outputs(): "items": {"type": "string"}, }, "query": { - "default": None, "title": "Query", }, }, - "required": ["urls"], + "required": ["urls", "query"], "title": "Inputs_outputs_pipelineRunRequest", "type": "object", } diff --git a/tests/test_files/yaml/inputs_outputs_pipeline.yml b/tests/test_files/yaml/inputs_outputs_pipeline.yml index cb94864..a4cab19 100644 --- a/tests/test_files/yaml/inputs_outputs_pipeline.yml +++ b/tests/test_files/yaml/inputs_outputs_pipeline.yml @@ -29,6 +29,7 @@ components: prompt: init_parameters: + required_variables: "*" template: | "According to the contents of this website: {% for document in documents %} From 8d891afc65b421c2cbf56fe73da3ee5ff44c6ae5 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 16:44:45 +0200 Subject: [PATCH 30/42] Update docstrings --- src/hayhooks/server/pipelines/models.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index 9c05cdf..75b0cc3 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -42,7 +42,7 @@ def get_response_model_from_resolved_io( declared_outputs: Mapping of declared output name to OutputResolution. Returns: - A Pydantic model with top-level fields matching declared output names. + A Pydantic model with a single required field 'result' containing the pipeline result. """ fields: dict[str, Any] = {} @@ -57,14 +57,15 @@ def get_response_model_from_resolved_io( def create_request_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: """ - Create a dynamic Pydantic model based on callable's signature. + Create a dynamic Pydantic model based on the callable's signature. Args: - func: The callable (function/method) to analyze - model_name: Name for the generated model + func: The callable (function or method) to analyze. + model_name: Name to use for the generated model. + docstring: Parsed docstring for the callable used to populate parameter descriptions. Returns: - Pydantic model class for request + Pydantic model class for the request. """ params = inspect.signature(func).parameters @@ -82,14 +83,15 @@ def create_request_model_from_callable(func: Callable, model_name: str, docstrin def create_response_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: """ - Create a dynamic Pydantic model based on callable's return type. + Create a dynamic Pydantic model based on the callable's return type. Args: - func: The callable (function/method) to analyze - model_name: Name for the generated model + func: The callable (function or method) to analyze. + model_name: Name to use for the generated model. + docstring: Parsed docstring for the callable used to populate the return description. Returns: - Pydantic model class for response + Pydantic model class for the response with a single required field 'result'. """ return_type = inspect.signature(func).return_annotation From 9880094d7f92407272a7b095f737b8b4946c4f60 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 21:37:43 +0200 Subject: [PATCH 31/42] Update docstrings --- src/hayhooks/server/pipelines/models.py | 20 ++-- src/hayhooks/server/utils/deploy_utils.py | 109 +++++++++++++++++++--- 2 files changed, 106 insertions(+), 23 deletions(-) diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index 75b0cc3..9c05cdf 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -42,7 +42,7 @@ def get_response_model_from_resolved_io( declared_outputs: Mapping of declared output name to OutputResolution. Returns: - A Pydantic model with a single required field 'result' containing the pipeline result. + A Pydantic model with top-level fields matching declared output names. """ fields: dict[str, Any] = {} @@ -57,15 +57,14 @@ def get_response_model_from_resolved_io( def create_request_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: """ - Create a dynamic Pydantic model based on the callable's signature. + Create a dynamic Pydantic model based on callable's signature. Args: - func: The callable (function or method) to analyze. - model_name: Name to use for the generated model. - docstring: Parsed docstring for the callable used to populate parameter descriptions. + func: The callable (function/method) to analyze + model_name: Name for the generated model Returns: - Pydantic model class for the request. + Pydantic model class for request """ params = inspect.signature(func).parameters @@ -83,15 +82,14 @@ def create_request_model_from_callable(func: Callable, model_name: str, docstrin def create_response_model_from_callable(func: Callable, model_name: str, docstring: Docstring) -> type[BaseModel]: """ - Create a dynamic Pydantic model based on the callable's return type. + Create a dynamic Pydantic model based on callable's return type. Args: - func: The callable (function or method) to analyze. - model_name: Name to use for the generated model. - docstring: Parsed docstring for the callable used to populate the return description. + func: The callable (function/method) to analyze + model_name: Name for the generated model Returns: - Pydantic model class for the response with a single required field 'result'. + Pydantic model class for response """ return_type = inspect.signature(func).return_annotation diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 58b18d6..809da3d 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -128,7 +128,8 @@ def load_pipeline_module(pipeline_name: str, dir_path: Union[Path, str]) -> Modu The loaded module Raises: - ValueError: If the module cannot be loaded + PipelineWrapperError: If required files or symbols are missing + PipelineModuleLoadError: If the module cannot be loaded """ log.trace(f"Loading pipeline module from {dir_path}") log.trace(f"Is folder present: {Path(dir_path).exists()}") @@ -171,7 +172,13 @@ def load_pipeline_module(pipeline_name: str, dir_path: Union[Path, str]) -> Modu def handle_pipeline_exceptions() -> Callable: - """Decorator to handle pipeline execution exceptions.""" + """ + Decorator factory that wraps endpoint handlers and converts unexpected exceptions + into HTTP 500 responses, optionally including tracebacks based on settings. + + Returns: + A decorator that can be applied to async endpoint handlers. + """ def decorator(func): @wraps(func) # Preserve the original function's metadata @@ -213,6 +220,9 @@ def create_run_endpoint_handler( request_model: The request model response_model: The response model requires_files: Whether the pipeline requires file uploads + + Returns: + A FastAPI endpoint function that executes the pipeline and returns the response model. """ @handle_pipeline_exceptions() @@ -237,6 +247,19 @@ async def run_endpoint_without_files(run_req: request_model) -> response_model: def add_pipeline_wrapper_api_route(app: FastAPI, pipeline_name: str, pipeline_wrapper: BasePipelineWrapper) -> None: + """ + Create or replace the wrapper-based pipeline run endpoint at /{pipeline_name}/run. + + Args: + app: FastAPI application instance. + pipeline_name: Name of the pipeline. + pipeline_wrapper: Initialized pipeline wrapper instance to use as handler target. + + Side Effects: + - Removes any existing route at /{pipeline_name}/run + - Rebuilds and invalidates the OpenAPI schema + - Updates registry metadata with request/response models and file requirement flag + """ clog = log.bind(pipeline_name=pipeline_name) # Determine which run_api method to use (prefer async if available) @@ -307,6 +330,14 @@ def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str) -> None: Builds the flat request/response models from declared YAML inputs/outputs and wires a handler that maps the flat body into the nested structure required by Haystack Pipeline.run. + + Args: + app: FastAPI application instance. + pipeline_name: Name of the YAML pipeline. + + Raises: + PipelineNotFoundError: If the pipeline is not registered in the registry. + PipelineYamlError: If the registered object is not an AsyncPipeline or metadata is missing. """ pipeline_instance = registry.get(pipeline_name) if pipeline_instance is None: @@ -371,6 +402,15 @@ def deploy_pipeline_files( app: Optional FastAPI application instance. If provided, the API route will be added. save_files: Whether to save the pipeline files to disk overwrite: Whether to overwrite an existing pipeline + + Returns: + A dictionary containing the deployed pipeline name, e.g. {"name": pipeline_name}. + + Raises: + PipelineAlreadyExistsError: If the pipeline exists and overwrite is False. + PipelineFilesError: If saving files fails. + PipelineModuleLoadError: If loading the pipeline module fails. + PipelineWrapperError: If wrapper creation or setup fails. """ pipeline_wrapper = add_pipeline_wrapper_to_registry(pipeline_name, files, save_files, overwrite) @@ -394,13 +434,22 @@ def deploy_pipeline_yaml( declared inputs/outputs, and set up the API route at /{pipeline_name}/run. Args: - app: Optional FastAPI application instance. If provided, the API route will be added. pipeline_name: Name of the pipeline source_code: YAML pipeline source code overwrite: Whether to overwrite an existing pipeline options: Optional dict with additional deployment options. Supported keys: + - save_file: Optional[bool] - whether to persist the YAML to disk (default: True) - description: Optional[str] - skip_mcp: Optional[bool] + app: Optional FastAPI application instance. If provided, the API route will be added. + + Returns: + A dictionary containing the deployed pipeline name, e.g. {"name": pipeline_name}. + + Raises: + PipelineAlreadyExistsError: If the pipeline exists and overwrite is False. + ValueError: If the YAML cannot be parsed into an AsyncPipeline. + PipelineYamlError: If route creation fails due to invalid registry state. """ # Optionally save YAML to disk as pipelines/{name}.yml (default True) @@ -434,8 +483,16 @@ def add_yaml_pipeline_to_registry( Add a YAML pipeline to the registry. Args: - pipeline_name: Name of the pipeline to deploy - source_code: Source code of the pipeline + pipeline_name: Name of the pipeline to deploy. + source_code: YAML source code of the pipeline. + overwrite: Whether to overwrite an existing pipeline with the same name. + description: Optional description to store in registry metadata. + skip_mcp: Whether to disable MCP integration for this pipeline. + + Raises: + PipelineAlreadyExistsError: If the pipeline exists and overwrite is False. + ValueError: If the YAML cannot be parsed into an AsyncPipeline. + Exception: If inputs/outputs cannot be resolved to build request/response models. """ log.debug(f"Checking if YAML pipeline '{pipeline_name}' already exists: {registry.get(pipeline_name)}") @@ -491,16 +548,22 @@ def add_pipeline_wrapper_to_registry( pipeline_name: str, files: dict[str, str], save_files: bool = True, overwrite: bool = False ) -> BasePipelineWrapper: """ - Add a pipeline to the registry. + Add a wrapper-based pipeline to the registry. Args: - - Args: - pipeline_name: Name of the pipeline to deploy - files: Dictionary mapping filenames to their contents + pipeline_name: Name of the pipeline to deploy. + files: Mapping of relative filenames to their contents. + save_files: Whether to save files under settings.pipelines_dir; if False, uses a temp dir. + overwrite: Whether to overwrite an existing pipeline of the same name. Returns: - dict: Dictionary containing the deployed pipeline name + The initialized and registered PipelineWrapper instance. + + Raises: + PipelineAlreadyExistsError: If the pipeline exists and overwrite is False. + PipelineFilesError: If saving files fails. + PipelineModuleLoadError: If loading the pipeline module fails. + PipelineWrapperError: If wrapper instantiation or setup fails, or required methods are missing. """ log.debug(f"Checking if pipeline '{pipeline_name}' already exists: {registry.get(pipeline_name)}") @@ -581,6 +644,18 @@ def add_pipeline_wrapper_to_registry( def create_pipeline_wrapper_instance(pipeline_module: ModuleType) -> BasePipelineWrapper: + """ + Instantiate a `PipelineWrapper` from a loaded module and verify supported methods. + + Args: + pipeline_module: The loaded module exposing a `PipelineWrapper` class. + + Returns: + An initialized PipelineWrapper instance with capability flags set. + + Raises: + PipelineWrapperError: If instantiation or setup fails, or if no supported run methods are implemented. + """ try: pipeline_wrapper = pipeline_module.PipelineWrapper() except Exception as e: @@ -631,7 +706,14 @@ def create_pipeline_wrapper_instance(pipeline_module: ModuleType) -> BasePipelin def _set_method_implementation_flag(pipeline_wrapper: BasePipelineWrapper, attr_name: str, method_name: str) -> None: - """Helper to check if a method is implemented on the wrapper compared to the base.""" + """ + Helper to check if a method is implemented on the wrapper compared to the base. + + Args: + pipeline_wrapper: The wrapper instance to annotate. + attr_name: The attribute name to set on the wrapper (e.g., "_is_run_api_implemented"). + method_name: The method name to check (e.g., "run_api"). + """ wrapper_method = getattr(pipeline_wrapper, method_name, None) base_method = getattr(BasePipelineWrapper, method_name, None) if wrapper_method and base_method: @@ -687,6 +769,9 @@ def undeploy_pipeline(pipeline_name: str, app: Optional[FastAPI] = None) -> None Args: pipeline_name: Name of the pipeline to undeploy. app: Optional FastAPI application instance. If provided, API routes will be removed. + + Raises: + HTTPException: If the pipeline is not found in the registry (404). """ # Check if pipeline exists in registry if pipeline_name not in registry.get_names(): From f1f892ef38d192586a35e0ffa4a557969584c099 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 16 Sep 2025 21:41:29 +0200 Subject: [PATCH 32/42] Update docstrings --- src/hayhooks/server/utils/deploy_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 809da3d..38909d1 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -173,11 +173,10 @@ def load_pipeline_module(pipeline_name: str, dir_path: Union[Path, str]) -> Modu def handle_pipeline_exceptions() -> Callable: """ - Decorator factory that wraps endpoint handlers and converts unexpected exceptions - into HTTP 500 responses, optionally including tracebacks based on settings. + Decorator factory that wraps pipeline run methods and processes unexpected exceptions. Returns: - A decorator that can be applied to async endpoint handlers. + A decorator that can be applied to async pipeline run methods. """ def decorator(func): From 0097b860c9235a65c0d3bcebda2ac69cff1c8f40 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 09:44:26 +0200 Subject: [PATCH 33/42] Add warning when using removed hayhooks pipeline deploy command --- src/hayhooks/cli/pipeline.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/hayhooks/cli/pipeline.py b/src/hayhooks/cli/pipeline.py index d736039..7415d61 100644 --- a/src/hayhooks/cli/pipeline.py +++ b/src/hayhooks/cli/pipeline.py @@ -117,8 +117,15 @@ def deploy_files( _deploy_with_progress(ctx=ctx, name=name, endpoint="deploy_files", payload=payload) -# Register alias: `deploy` -> `deploy-files` -pipeline.command(name="deploy")(deploy_files) +@pipeline.command(name="deploy", context_settings={"ignore_unknown_options": True, "allow_extra_args": True}) +def deploy(_ctx: typer.Context) -> None: + """Removed command; use 'deploy-yaml' or 'deploy-files' instead.""" + show_warning_panel( + "[bold yellow]`hayhooks pipeline deploy` has been removed.[/bold yellow]\n" + "Use: \n" + "`hayhooks pipeline deploy-yaml ` for YAML pipelines or\n" + "`hayhooks pipeline deploy-files ` for PipelineWrapper-based deployments." + ) @pipeline.command() From 32791e1752fabce684565726fd8d5c53380f1138 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 09:47:38 +0200 Subject: [PATCH 34/42] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 99041fe..dff42fb 100644 --- a/README.md +++ b/README.md @@ -324,7 +324,7 @@ pip install trafilatura ## Deploy a YAML Pipeline -You can deploy a Haystack pipeline directly from its YAML definition using the preferred `/deploy-yaml` endpoint. This mode builds request/response schemas from the YAML-declared `inputs` and `outputs`. +You can deploy a Haystack pipeline directly from its YAML definition using the `/deploy-yaml` endpoint. This mode builds request/response schemas from the YAML-declared `inputs` and `outputs`. Note: You can also deploy YAML pipelines from the CLI with `hayhooks pipeline deploy-yaml`. Wrapper-based deployments continue to use `/deploy_files` or the CLI alias `hayhooks pipeline deploy`. From 6f8e3eb446b85c78af9771da895b124bef090e61 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 09:47:49 +0200 Subject: [PATCH 35/42] Add warning when using hayhooks pipeline deploy command --- src/hayhooks/cli/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hayhooks/cli/pipeline.py b/src/hayhooks/cli/pipeline.py index 7415d61..3868d2d 100644 --- a/src/hayhooks/cli/pipeline.py +++ b/src/hayhooks/cli/pipeline.py @@ -123,7 +123,7 @@ def deploy(_ctx: typer.Context) -> None: show_warning_panel( "[bold yellow]`hayhooks pipeline deploy` has been removed.[/bold yellow]\n" "Use: \n" - "`hayhooks pipeline deploy-yaml ` for YAML pipelines or\n" + "`hayhooks pipeline deploy-yaml ` for YAML-based deployments or\n" "`hayhooks pipeline deploy-files ` for PipelineWrapper-based deployments." ) From 0fc170afbb42836d12caef639162caaac47f4f52 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 09:51:20 +0200 Subject: [PATCH 36/42] Consistent method naming + add note about type:ignore --- src/hayhooks/server/utils/deploy_utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 38909d1..0c054fd 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -323,13 +323,18 @@ def add_pipeline_wrapper_api_route(app: FastAPI, pipeline_name: str, pipeline_wr app.setup() -def add_pipeline_yaml_api_route(app: FastAPI, pipeline_name: str) -> None: +def add_yaml_pipeline_api_route(app: FastAPI, pipeline_name: str) -> None: """ Create or replace the YAML pipeline run endpoint at /{pipeline_name}/run. Builds the flat request/response models from declared YAML inputs/outputs and wires a handler that maps the flat body into the nested structure required by Haystack Pipeline.run. + Note: + There's no way in FastAPI to define the type of the request body other than annotating + the endpoint handler. We have to **ignore types several times in this method** to make FastAPI happy while + silencing static type checkers (that would have good reasons to trigger!). + Args: app: FastAPI application instance. pipeline_name: Name of the YAML pipeline. @@ -466,7 +471,7 @@ def deploy_pipeline_yaml( ) if app: - add_pipeline_yaml_api_route(app, pipeline_name) + add_yaml_pipeline_api_route(app, pipeline_name) return {"name": pipeline_name} From 99fe7867f9a1626bc0f13813f39e9043d25e9f2f Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 09:53:25 +0200 Subject: [PATCH 37/42] Update description / comments --- src/hayhooks/cli/pipeline.py | 2 +- src/hayhooks/server/routers/deploy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hayhooks/cli/pipeline.py b/src/hayhooks/cli/pipeline.py index 3868d2d..481d13b 100644 --- a/src/hayhooks/cli/pipeline.py +++ b/src/hayhooks/cli/pipeline.py @@ -62,7 +62,7 @@ def deploy_yaml( # noqa: PLR0913 ), ] = True, ) -> None: - """Deploy a YAML pipeline using the preferred /deploy-yaml endpoint.""" + """Deploy a YAML pipeline using the /deploy-yaml endpoint.""" if not pipeline_file.exists(): show_error_and_abort("Pipeline file does not exist.", str(pipeline_file)) diff --git a/src/hayhooks/server/routers/deploy.py b/src/hayhooks/server/routers/deploy.py index 6fe53f4..f62c567 100644 --- a/src/hayhooks/server/routers/deploy.py +++ b/src/hayhooks/server/routers/deploy.py @@ -97,7 +97,7 @@ class YamlDeployRequest(BaseModel): tags=["config"], operation_id="yaml_pipeline_deploy", response_model=DeployResponse, - summary="Deploy a pipeline from a YAML definition (preferred)", + summary="Deploy a pipeline from a YAML definition", description=( "Deploys a Haystack pipeline from a YAML string. Builds request/response schemas from declared " "inputs/outputs. Returns 409 if the pipeline already exists and overwrite is false." From 46d3f2376efd94f4fd704bb07b19914322e89321 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 10:11:16 +0200 Subject: [PATCH 38/42] Update README --- README.md | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index dff42fb..288932b 100644 --- a/README.md +++ b/README.md @@ -553,13 +553,32 @@ Hayhooks can expose YAML-deployed pipelines as MCP Tools. When you deploy a pipe Calling a YAML pipeline via MCP `call_tool` executes the pipeline asynchronously and returns the pipeline result as a JSON string in `TextContent`. +Sample YAML for a simple `sum` pipeline using only the `haystack.testing.sample_components.sum.Sum` component: + +```yaml +components: + sum: + init_parameters: {} + type: haystack.testing.sample_components.sum.Sum + +connections: [] + +metadata: {} + +inputs: + values: sum.values + +outputs: + total: sum.total +``` + Example (Streamable HTTP via MCP client): ```python tools = await client.list_tools() -# Find YAML tool by name, e.g., "calc" (the pipeline name) -result = await client.call_tool("calc", {"value": 3}) -assert result.content[0].text == '{"double": {"value": 10}}' +# Find YAML tool by name, e.g., "sum" (the pipeline name) +result = await client.call_tool("sum", {"values": [1, 2, 3]}) +assert result.content[0].text == '{"total": 6}' ``` Notes and limitations: From ee2eb15b6f66eaa02f6bd72ca7c002e07d0c1ff7 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 10:33:24 +0200 Subject: [PATCH 39/42] Update error messages --- src/hayhooks/server/utils/deploy_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 0c054fd..b258d04 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -72,7 +72,7 @@ def save_pipeline_files(pipeline_name: str, files: dict[str, str], pipelines_dir return saved_files except Exception as e: - msg = f"Failed to save pipeline files: {e!s}" + msg = f"Failed to save pipeline files for '{pipeline_name}': {e!s}" raise PipelineFilesError(msg) from e @@ -99,7 +99,7 @@ def save_yaml_pipeline_file(pipeline_name: str, source_code: str, pipelines_dir: file_path.write_text(source_code) return str(file_path) except Exception as e: - msg = f"Failed to save YAML pipeline file: {e!s}" + msg = f"Failed to save YAML pipeline file for '{pipeline_name}': {e!s}" raise PipelineFilesError(msg) from e @@ -523,7 +523,7 @@ def add_yaml_pipeline_to_registry( request_model = get_request_model_from_resolved_io(pipeline_name, pipeline_inputs) response_model = get_response_model_from_resolved_io(pipeline_name, pipeline_outputs) except Exception as e: - clog.error(f"Failed creating request/response models for YAML pipeline: {e!s}") + clog.error(f"Failed creating request/response models for YAML pipeline '{pipeline_name}': {e!s}") raise metadata = { From 62630b4355a5b8bbbf17f1a768bc7a61dcbd5f68 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 10:37:08 +0200 Subject: [PATCH 40/42] Add note about using AsyncPipeline only ; Removed comment --- src/hayhooks/server/utils/deploy_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index b258d04..c094641 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -348,7 +348,6 @@ def add_yaml_pipeline_api_route(app: FastAPI, pipeline_name: str) -> None: msg = f"Pipeline '{pipeline_name}' not found" raise PipelineNotFoundError(msg) - # Ensure the registered object is a Haystack Pipeline, not a wrapper if not isinstance(pipeline_instance, AsyncPipeline): msg = f"Pipeline '{pipeline_name}' is not a Haystack AsyncPipeline instance" raise PipelineYamlError(msg) @@ -486,6 +485,12 @@ def add_yaml_pipeline_to_registry( """ Add a YAML pipeline to the registry. + Note: + We are always creating an AsyncPipeline instance from YAML source code. + This is because we are in an async context, so we should avoid running sync methods + using e.g. `run_in_threadpool`. With AsyncPipeline, we can await `run_async` directly, + so we make use of the current event loop. + Args: pipeline_name: Name of the pipeline to deploy. source_code: YAML source code of the pipeline. From e73d3ae14f041ec0adb5e3babab13a39932e6259 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 11:54:30 +0200 Subject: [PATCH 41/42] Update README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 288932b..cf93e06 100644 --- a/README.md +++ b/README.md @@ -366,6 +366,8 @@ curl -X POST \ -d '{"urls": ["https://haystack.deepset.ai"], "query": "What is Haystack?"}' ``` +Note: when deploying a YAML pipeline, Hayhooks will create an `AsyncPipeline` instance from the YAML source code. This is because we are in an async context, so we should avoid running sync methods using e.g. `run_in_threadpool`. With AsyncPipeline, we can await `run_async` directly, so we make use of the current event loop. + Limitations: - YAML-deployed pipelines do not support OpenAI-compatible chat completion endpoints, so they cannot be used with Open WebUI. If you need chat completion/streaming, use a `PipelineWrapper` and implement `run_chat_completion` or `run_chat_completion_async` (see the OpenAI compatibility section below). From 86cfc71fa2b883625391f03924a5b4b766a3bf2b Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Wed, 17 Sep 2025 13:50:36 +0200 Subject: [PATCH 42/42] Remove hayhooks pipeline deploy references --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cf93e06..b25c8c0 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,6 @@ hayhooks status # Check the status of the server and show deployed pipelines hayhooks pipeline deploy-files # Deploy a pipeline using PipelineWrapper files (preferred) hayhooks pipeline deploy-yaml # Deploy a pipeline from a YAML file -hayhooks pipeline deploy # Alias for deploy-files hayhooks pipeline undeploy # Undeploy a pipeline hayhooks pipeline run # Run a pipeline ``` @@ -276,7 +275,7 @@ hayhooks pipeline deploy-files -n chat_with_website examples/pipeline_wrappers/c This will deploy the pipeline with the name `chat_with_website`. Any error encountered during development will be printed to the console and show in the server logs. -Alternatively, you can deploy via HTTP: `POST /deploy_files` (CLI alias: `hayhooks pipeline deploy`). +Alternatively, you can deploy via HTTP: `POST /deploy_files`. #### PipelineWrapper development with `overwrite` option @@ -326,7 +325,7 @@ pip install trafilatura You can deploy a Haystack pipeline directly from its YAML definition using the `/deploy-yaml` endpoint. This mode builds request/response schemas from the YAML-declared `inputs` and `outputs`. -Note: You can also deploy YAML pipelines from the CLI with `hayhooks pipeline deploy-yaml`. Wrapper-based deployments continue to use `/deploy_files` or the CLI alias `hayhooks pipeline deploy`. +Note: You can also deploy YAML pipelines from the CLI with `hayhooks pipeline deploy-yaml`. Wrapper-based deployments continue to use `/deploy_files`. Tip: You can obtain a pipeline's YAML from an existing `Pipeline` instance using `pipeline.dumps()`. See the [Haystack serialization docs](https://docs.haystack.deepset.ai/docs/serialization) for details.