-
Notifications
You must be signed in to change notification settings - Fork 23
feat: Add workflow support to MCP server #178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5266e26
c6e8bf5
ab4cdb4
b4b12f7
870a864
ef286c4
6fea82d
1b0d4da
9b5ed83
f48b673
4d1633c
0095946
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,9 @@ | |
| create_glossary_category_assets, | ||
| create_glossary_assets, | ||
| create_glossary_term_assets, | ||
| get_workflow_package_names, | ||
| get_workflows, | ||
| get_workflow_runs, | ||
| create_data_domain_assets, | ||
| create_data_product_assets, | ||
| create_dq_rules, | ||
|
|
@@ -908,6 +911,253 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: | |
| return create_glossary_category_assets(categories) | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def get_workflow_package_names_tool() -> List[str]: | ||
| """ | ||
| Get the values of all available workflow packages. | ||
|
|
||
| This tool returns a list of all workflow package names that can be used | ||
| with other workflow tools. | ||
|
|
||
| Returns: | ||
| List[str]: List of workflow package names (e.g., ['atlan-snowflake', 'atlan-bigquery', ...]) | ||
|
|
||
| Examples: | ||
| # Get all workflow package names | ||
| packages = get_workflow_package_names_tool() | ||
| """ | ||
| return get_workflow_package_names() | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def get_workflows_tool( | ||
| id: str | None = None, | ||
| workflow_package_name: str | None = None, | ||
| max_results: int = 10, | ||
| ) -> Dict[str, Any]: | ||
| """ | ||
| Retrieve workflows by ID or by package type. | ||
|
|
||
| This tool supports two main use cases: | ||
| 1. Get a specific workflow by ID (returns full workflow details including steps and spec) | ||
| 2. Get multiple workflows by package type (returns metadata only, no full details) | ||
|
|
||
| IMPORTANT: When getting by ID, this tool returns full workflow instructions with all steps and transformations. | ||
| When getting by type, this tool returns only basic metadata to avoid large responses. | ||
|
|
||
| Note: This tool only returns workflows (templates), not workflow_runs. | ||
| To get workflow_run information, use get_workflow_runs_tool. | ||
|
|
||
| When to use this tool: | ||
| - Use when you need to get a specific workflow by ID with full details (steps, spec, transformations) | ||
| - Use when you need to list multiple workflows by package type (metadata only) | ||
| - Use when you need to understand the full workflow execution flow (by ID only) | ||
| - Do NOT use if you need workflow_run (execution) information | ||
|
|
||
| Args: | ||
| id (str, optional): The unique identifier (ID) of the workflow (e.g., 'atlan-snowflake-miner-1714638976'). | ||
| If provided, returns a single workflow with full details (workflow_steps and workflow_spec). | ||
| Either id or workflow_package_name must be provided, but not both. | ||
| workflow_package_name (str, optional): The workflow package name (e.g., 'atlan-snowflake', 'atlan-bigquery', 'SNOWFLAKE'). | ||
| Can be the full package name like 'atlan-snowflake' or the enum name like 'SNOWFLAKE'. | ||
| If provided, returns list of workflows with metadata only (no workflow_steps or workflow_spec). | ||
| Either id or workflow_package_name must be provided, but not both. | ||
| max_results (int, optional): Maximum number of workflows to return when getting by type. Defaults to 10. | ||
| Only used when workflow_package_name is provided. | ||
|
|
||
| Returns: | ||
| Dict[str, Any]: Dictionary containing: | ||
| - workflows: List of workflow dictionaries. When getting by ID, contains single item. | ||
| Each workflow contains: | ||
| - template_name: Name of the template | ||
| - package_name, package_version: Package information | ||
| - source_system, source_category, workflow_type: Source information | ||
| - certified, verified: Certification status | ||
| - creator_email, creator_username: Creator information | ||
| - modifier_email, modifier_username: Last modifier information | ||
| - creation_timestamp: When template was created | ||
| - package_author, package_description, package_repository: Package metadata | ||
| - workflow_steps: Workflow steps (only when getting by ID) | ||
| - workflow_spec: Full workflow specification (only when getting by ID) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to send the spec? What use cases are you targetting with this tool?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| - total: Total count of workflows (1 when getting by ID, actual count when getting by type) | ||
| - error: None if no error occurred, otherwise the error message | ||
|
|
||
| Examples: | ||
| # Get a specific workflow by ID (full details with steps and spec) | ||
| result = get_workflows_tool(id="atlan-snowflake-miner-1714638976") | ||
| workflow = result.get("workflows", [])[0] # Single workflow in list | ||
| # workflow contains complete workflow definition with workflow_steps and workflow_spec | ||
|
|
||
| # Get Snowflake workflows by type (metadata only) | ||
| result = get_workflows_tool(workflow_package_name="atlan-snowflake") | ||
| workflows = result.get("workflows", []) # List of workflows | ||
|
|
||
| # Get BigQuery workflows with custom limit | ||
| result = get_workflows_tool(workflow_package_name="atlan-bigquery", max_results=50) | ||
|
|
||
| # Get workflows using enum name | ||
| result = get_workflows_tool(workflow_package_name="SNOWFLAKE") | ||
| """ | ||
| return get_workflows( | ||
| id=id, | ||
| workflow_package_name=workflow_package_name, | ||
| max_results=max_results, | ||
| ) | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def get_workflow_runs_tool( | ||
| workflow_name: str | None = None, | ||
| workflow_phase: str | None = None, | ||
| status: str | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like status and workflow_phase seem to do the same thing |
||
| | None = None, # This is technically a list but we get the input as a string so we parse it into a list | ||
| started_at: str | None = None, | ||
| finished_at: str | None = None, | ||
| from_: int = 0, | ||
| size: int = 100, | ||
| ) -> Dict[str, Any]: | ||
| """ | ||
| Retrieve workflow_runs with flexible filtering options. | ||
|
|
||
| This tool supports two main use cases: | ||
| 1. Filter by specific workflow name and phase (single workflow) | ||
| 2. Filter across all workflows by status list and optional time ranges | ||
|
|
||
| Each workflow_run contains execution details, timing, resource usage, and status information. | ||
|
|
||
| IMPORTANT: This tool returns only metadata and execution status. It does NOT return full workflow | ||
| instructions, steps, or transformations. To get complete workflow details including all steps and | ||
| transformations, use get_workflows_tool with an id parameter instead. | ||
|
|
||
| When to use this tool: | ||
| - Use when you need to find all workflow_runs of a specific workflow by execution phase | ||
| → Provide workflow_name AND workflow_phase (do NOT use status) | ||
| - Use when you need to find workflow_runs across multiple workflows filtered by status and time | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can it get workflow runs across multiple workflows? Since it takes in workflow_name only |
||
| → Provide status (and optionally started_at/finished_at), do NOT provide workflow_name or workflow_phase | ||
| - Use when monitoring workflow execution patterns or analyzing failures | ||
| - Use when you need execution metadata (status, timing, resource usage) for multiple workflow_runs | ||
| - Do NOT use if you need the complete workflow specification (use get_workflows_tool with id instead) | ||
|
|
||
| Args: | ||
| workflow_name (str, optional): Name of the workflow (template) as displayed in the UI | ||
| (e.g., 'atlan-snowflake-miner-1714638976'). If provided, filters runs for that specific workflow. | ||
| This refers to the workflow name, not individual workflow_run IDs. | ||
| workflow_phase (str, optional): Phase of the workflow_run. Common values: 'Succeeded', 'Running', 'Failed', 'Error', 'Pending'. | ||
| Case-insensitive matching is supported. | ||
| REQUIRED when workflow_name is provided. DO NOT use status when workflow_name is provided. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Statement is contradictory
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Verbiage fixed. The instructions should tell the LLM to use This is because the Pyatlan SDK accepts an array of statuses when querying without a workflow name, but only accepts a single status when a workflow name is specified. This SDK limitation impacted how we consolidated both functions into a single MCP tool. |
||
| status (List[str], optional): Array/list of workflow_run phases to filter by. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But the function definition suggest a string
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's confusing, when I had it ask for a list from the MCP tool, the input would always be a string. I searched existing tools in the repo on how they handled this scenario, and what they did was use the |
||
| IMPORTANT: Must be an array/list type, NOT a JSON string. | ||
| Correct format: ["Succeeded", "Failed"] (array/list) | ||
| Incorrect format: '["Succeeded", "Failed"]' (string - will be rejected by schema validation) | ||
| Common values: | ||
| - 'Succeeded': Successfully completed workflow_runs | ||
| - 'Failed': Workflow_runs that failed | ||
| - 'Running': Currently executing workflow_runs | ||
| - 'Error': Workflow_runs that encountered errors | ||
| - 'Pending': Workflow_runs waiting to start | ||
| Case-insensitive matching is supported. | ||
| REQUIRED when workflow_name is NOT provided. DO NOT use status when workflow_name is provided - use workflow_phase instead. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check statement |
||
| Example: ["Succeeded", "Failed"] # Array/list, not a string | ||
|
|
||
| Parameter Usage Rules: | ||
| - If workflow_name is provided: MUST use workflow_phase (single phase string). Do NOT use status. | ||
| - If workflow_name is NOT provided: MUST use status (list of phases). Do NOT use workflow_phase. | ||
| - These are mutually exclusive usage patterns - do not mix them. | ||
| started_at (str, optional): Lower bound on 'status.startedAt' timestamp. Accepts: | ||
| - Relative time format: 'now-2h', 'now-24h', 'now-7d', 'now-30d' | ||
| - ISO 8601 format: '2024-01-01T00:00:00Z' | ||
| Filters workflow_runs that started at or after this time. | ||
| Only used when workflow_name is not provided. | ||
| finished_at (str, optional): Lower bound on 'status.finishedAt' timestamp. Accepts: | ||
| - Relative time format: 'now-1h', 'now-12h' | ||
| - ISO 8601 format: '2024-01-01T00:00:00Z' | ||
| Filters workflow_runs that finished at or after this time. | ||
| Only used when workflow_name is not provided. | ||
| from_ (int, optional): Starting index of the search results for pagination. Defaults to 0. | ||
| size (int, optional): Maximum number of search results to return. Defaults to 100. | ||
|
|
||
| Returns: | ||
| Dict[str, Any]: Dictionary containing: | ||
| - runs: List of workflow_run dictionaries. Each workflow_run contains: | ||
| Run Metadata: | ||
| - run_id: Unique identifier for this workflow_run | ||
| - run_phase: Execution phase (Succeeded, Running, Failed, etc.) | ||
| - run_started_at: When the workflow_run started (ISO timestamp) | ||
| - run_finished_at: When the workflow_run finished (ISO timestamp, None if still running) | ||
| - run_estimated_duration: Estimated execution duration | ||
| - run_progress: Progress indicator (e.g., "1/3") | ||
| - run_cpu_usage: CPU resource usage duration | ||
| - run_memory_usage: Memory resource usage duration | ||
|
|
||
| Workflow Metadata: | ||
| - workflow_id: Reference to the workflow (template) used | ||
| - workflow_package_name: Package identifier | ||
| - workflow_cron_schedule: Cron schedule if scheduled | ||
| - workflow_cron_timezone: Timezone for cron schedule | ||
| - workflow_creator_id, workflow_creator_email, workflow_creator_username: Creator information | ||
|
christopher-tin-atlan marked this conversation as resolved.
|
||
| - workflow_modifier_id, workflow_modifier_email, workflow_modifier_username: Last modifier information | ||
| - workflow_creation_timestamp: When the workflow was created | ||
| - workflow_archiving_status: Archiving status | ||
|
christopher-tin-atlan marked this conversation as resolved.
|
||
| - total: Total count of workflow_runs matching the criteria (may be larger than the returned list if paginated) | ||
| - error: None if no error occurred, otherwise the error message | ||
|
|
||
| Examples: | ||
| # CORRECT: Get succeeded workflow_runs for a specific workflow (use workflow_phase, NOT status) | ||
|
christopher-tin-atlan marked this conversation as resolved.
|
||
| result = get_workflow_runs_tool(workflow_name="atlan-snowflake-miner-1714638976", workflow_phase="Succeeded") | ||
|
|
||
| # CORRECT: Get running workflow_runs for a specific workflow (use workflow_phase, NOT status) | ||
| result = get_workflow_runs_tool(workflow_name="atlan-snowflake-miner-1714638976", workflow_phase="Running") | ||
|
|
||
| # CORRECT: Get failed workflow_runs with pagination (use workflow_phase, NOT status) | ||
| result = get_workflow_runs_tool(workflow_name="atlan-snowflake-miner-1714638976", workflow_phase="Failed", from_=0, size=50) | ||
|
|
||
| # CORRECT: Get succeeded workflow_runs from the last 2 hours (across all workflows - use status, NOT workflow_phase) | ||
| result = get_workflow_runs_tool(status=["Succeeded"], started_at="now-2h") | ||
|
|
||
| # CORRECT: Get failed workflow_runs from the last 24 hours (across all workflows - use status, NOT workflow_phase) | ||
| result = get_workflow_runs_tool(status=["Failed"], started_at="now-24h") | ||
|
|
||
| # CORRECT: Get multiple statuses with both time filters (across all workflows - use status, NOT workflow_phase) | ||
| result = get_workflow_runs_tool( | ||
| status=["Succeeded", "Failed"], | ||
| started_at="now-7d", | ||
| finished_at="now-1h" | ||
| ) | ||
|
|
||
| # CORRECT: Get running workflow_runs (across all workflows - use status, NOT workflow_phase) | ||
| result = get_workflow_runs_tool(status=["Running"]) | ||
|
|
||
| # Analyze workflow_run durations | ||
| runs = result.get("runs", []) | ||
| for run in runs: | ||
| if run.get("run_finished_at") and run.get("run_started_at"): | ||
| print(f"Workflow run {run['run_id']} took {run.get('run_estimated_duration')}") | ||
|
|
||
| # Analyze failure rates | ||
| failed_runs = [r for r in result.get("runs", []) if r.get("run_phase") == "Failed"] | ||
| print(f"Found {len(failed_runs)} failed workflow_runs in the time range") | ||
| """ | ||
| try: | ||
| # Parse list parameter if it comes as a JSON string (common with MCP clients) | ||
| status = parse_list_parameter(status) | ||
| except (json.JSONDecodeError, ValueError) as e: | ||
| return { | ||
| "runs": [], | ||
| "total": 0, | ||
| "error": f"Parameter parsing error for status: {str(e)}", | ||
| } | ||
|
|
||
| return get_workflow_runs( | ||
| workflow_name=workflow_name, | ||
| workflow_phase=workflow_phase, | ||
| status=status, | ||
| started_at=started_at, | ||
| finished_at=finished_at, | ||
| from_=from_, | ||
| size=size, | ||
| ) | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def create_domains(domains) -> List[Dict[str, Any]]: | ||
| """ | ||
|
|
||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should not send all this information
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed
I left the creator information and package name, as it does give useful information for our end users on who last created/modified the package internally which they can use for troubleshooting.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Noted I saw your request on removing PII data in later comments. I have removed created/modified user information.