Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 250 additions & 0 deletions modelcontextprotocol/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
create_glossary_category_assets,
create_glossary_assets,
create_glossary_term_assets,
get_workflow_package_names,
get_workflows,
get_workflow_runs,
create_data_domain_assets,
create_data_product_assets,
create_dq_rules,
Expand Down Expand Up @@ -908,6 +911,253 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]:
return create_glossary_category_assets(categories)


@mcp.tool()
def get_workflow_package_names_tool() -> List[str]:
"""
Get the values of all available workflow packages.

This tool returns a list of all workflow package names that can be used
with other workflow tools.

Returns:
List[str]: List of workflow package names (e.g., ['atlan-snowflake', 'atlan-bigquery', ...])

Examples:
# Get all workflow package names
packages = get_workflow_package_names_tool()
"""
return get_workflow_package_names()


@mcp.tool()
def get_workflows_tool(
id: str | None = None,
workflow_package_name: str | None = None,
max_results: int = 10,
) -> Dict[str, Any]:
"""
Retrieve workflows by ID or by package type.

This tool supports two main use cases:
1. Get a specific workflow by ID (returns full workflow details including steps and spec)
2. Get multiple workflows by package type (returns metadata only, no full details)

IMPORTANT: When getting by ID, this tool returns full workflow instructions with all steps and transformations.
When getting by type, this tool returns only basic metadata to avoid large responses.

Note: This tool only returns workflows (templates), not workflow_runs.
To get workflow_run information, use get_workflow_runs_tool.

When to use this tool:
- Use when you need to get a specific workflow by ID with full details (steps, spec, transformations)
- Use when you need to list multiple workflows by package type (metadata only)
- Use when you need to understand the full workflow execution flow (by ID only)
- Do NOT use if you need workflow_run (execution) information

Args:
id (str, optional): The unique identifier (ID) of the workflow (e.g., 'atlan-snowflake-miner-1714638976').
If provided, returns a single workflow with full details (workflow_steps and workflow_spec).
Either id or workflow_package_name must be provided, but not both.
workflow_package_name (str, optional): The workflow package name (e.g., 'atlan-snowflake', 'atlan-bigquery', 'SNOWFLAKE').
Can be the full package name like 'atlan-snowflake' or the enum name like 'SNOWFLAKE'.
If provided, returns list of workflows with metadata only (no workflow_steps or workflow_spec).
Either id or workflow_package_name must be provided, but not both.
max_results (int, optional): Maximum number of workflows to return when getting by type. Defaults to 10.
Only used when workflow_package_name is provided.

Returns:
Dict[str, Any]: Dictionary containing:
- workflows: List of workflow dictionaries. When getting by ID, contains single item.
Each workflow contains:
- template_name: Name of the template
- package_name, package_version: Package information
- source_system, source_category, workflow_type: Source information
- certified, verified: Certification status
- creator_email, creator_username: Creator information
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should not send all this information

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed

  • package_version
  • source_system
  • source_category
  • workflow_type
  • certified
  • verified

I left the creator information and package name, as it does give useful information for our end users on who last created/modified the package internally which they can use for troubleshooting.

image

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noted I saw your request on removing PII data in later comments. I have removed created/modified user information.

- modifier_email, modifier_username: Last modifier information
- creation_timestamp: When template was created
- package_author, package_description, package_repository: Package metadata
- workflow_steps: Workflow steps (only when getting by ID)
- workflow_spec: Full workflow specification (only when getting by ID)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to send the spec? What use cases are you targetting with this tool?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. workflow_specs has been removed.

Keeping workflow_steps as it is useful to identify patterns of failures which can help the LLM make recommendations on possible issues. However, it does expose our internal pipeline patterns, which is visible in the UI, so I'm assuming that is not an issue.

image

- total: Total count of workflows (1 when getting by ID, actual count when getting by type)
- error: None if no error occurred, otherwise the error message

Examples:
# Get a specific workflow by ID (full details with steps and spec)
result = get_workflows_tool(id="atlan-snowflake-miner-1714638976")
workflow = result.get("workflows", [])[0] # Single workflow in list
# workflow contains complete workflow definition with workflow_steps and workflow_spec

# Get Snowflake workflows by type (metadata only)
result = get_workflows_tool(workflow_package_name="atlan-snowflake")
workflows = result.get("workflows", []) # List of workflows

# Get BigQuery workflows with custom limit
result = get_workflows_tool(workflow_package_name="atlan-bigquery", max_results=50)

# Get workflows using enum name
result = get_workflows_tool(workflow_package_name="SNOWFLAKE")
"""
return get_workflows(
id=id,
workflow_package_name=workflow_package_name,
max_results=max_results,
)


@mcp.tool()
def get_workflow_runs_tool(
workflow_name: str | None = None,
workflow_phase: str | None = None,
status: str
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like status and workflow_phase seem to do the same thing

| None = None, # This is technically a list but we get the input as a string so we parse it into a list
started_at: str | None = None,
finished_at: str | None = None,
from_: int = 0,
size: int = 100,
) -> Dict[str, Any]:
"""
Retrieve workflow_runs with flexible filtering options.

This tool supports two main use cases:
1. Filter by specific workflow name and phase (single workflow)
2. Filter across all workflows by status list and optional time ranges

Each workflow_run contains execution details, timing, resource usage, and status information.

IMPORTANT: This tool returns only metadata and execution status. It does NOT return full workflow
instructions, steps, or transformations. To get complete workflow details including all steps and
transformations, use get_workflows_tool with an id parameter instead.

When to use this tool:
- Use when you need to find all workflow_runs of a specific workflow by execution phase
→ Provide workflow_name AND workflow_phase (do NOT use status)
- Use when you need to find workflow_runs across multiple workflows filtered by status and time
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can it get workflow runs across multiple workflows? Since it takes in workflow_name only

→ Provide status (and optionally started_at/finished_at), do NOT provide workflow_name or workflow_phase
- Use when monitoring workflow execution patterns or analyzing failures
- Use when you need execution metadata (status, timing, resource usage) for multiple workflow_runs
- Do NOT use if you need the complete workflow specification (use get_workflows_tool with id instead)

Args:
workflow_name (str, optional): Name of the workflow (template) as displayed in the UI
(e.g., 'atlan-snowflake-miner-1714638976'). If provided, filters runs for that specific workflow.
This refers to the workflow name, not individual workflow_run IDs.
workflow_phase (str, optional): Phase of the workflow_run. Common values: 'Succeeded', 'Running', 'Failed', 'Error', 'Pending'.
Case-insensitive matching is supported.
REQUIRED when workflow_name is provided. DO NOT use status when workflow_name is provided.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Statement is contradictory

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Verbiage fixed.

workflow_phase (str, optional): Phase of the workflow_run. Common values: 'Succeeded', 'Running', 'Failed', 'Error', 'Pending'.
            Case-insensitive matching is supported.
            REQUIRED when workflow_name is provided. USE the status parameter when workflow_name is NOT provided.

The instructions should tell the LLM to use workflow_phase when workflow_name is provided. If no workflow name is provided, use status instead.

This is because the Pyatlan SDK accepts an array of statuses when querying without a workflow name, but only accepts a single status when a workflow name is specified. This SDK limitation impacted how we consolidated both functions into a single MCP tool.

status (List[str], optional): Array/list of workflow_run phases to filter by.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the function definition suggest a string

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's confusing, when I had it ask for a list from the MCP tool, the input would always be a string. I searched existing tools in the repo on how they handled this scenario, and what they did was use the parse_list_parameter to perform the conversion. From what I've researched, it's a painpoint of MCP (or maybe just FastMCP).

IMPORTANT: Must be an array/list type, NOT a JSON string.
Correct format: ["Succeeded", "Failed"] (array/list)
Incorrect format: '["Succeeded", "Failed"]' (string - will be rejected by schema validation)
Common values:
- 'Succeeded': Successfully completed workflow_runs
- 'Failed': Workflow_runs that failed
- 'Running': Currently executing workflow_runs
- 'Error': Workflow_runs that encountered errors
- 'Pending': Workflow_runs waiting to start
Case-insensitive matching is supported.
REQUIRED when workflow_name is NOT provided. DO NOT use status when workflow_name is provided - use workflow_phase instead.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check statement

Example: ["Succeeded", "Failed"] # Array/list, not a string

Parameter Usage Rules:
- If workflow_name is provided: MUST use workflow_phase (single phase string). Do NOT use status.
- If workflow_name is NOT provided: MUST use status (list of phases). Do NOT use workflow_phase.
- These are mutually exclusive usage patterns - do not mix them.
started_at (str, optional): Lower bound on 'status.startedAt' timestamp. Accepts:
- Relative time format: 'now-2h', 'now-24h', 'now-7d', 'now-30d'
- ISO 8601 format: '2024-01-01T00:00:00Z'
Filters workflow_runs that started at or after this time.
Only used when workflow_name is not provided.
finished_at (str, optional): Lower bound on 'status.finishedAt' timestamp. Accepts:
- Relative time format: 'now-1h', 'now-12h'
- ISO 8601 format: '2024-01-01T00:00:00Z'
Filters workflow_runs that finished at or after this time.
Only used when workflow_name is not provided.
from_ (int, optional): Starting index of the search results for pagination. Defaults to 0.
size (int, optional): Maximum number of search results to return. Defaults to 100.

Returns:
Dict[str, Any]: Dictionary containing:
- runs: List of workflow_run dictionaries. Each workflow_run contains:
Run Metadata:
- run_id: Unique identifier for this workflow_run
- run_phase: Execution phase (Succeeded, Running, Failed, etc.)
- run_started_at: When the workflow_run started (ISO timestamp)
- run_finished_at: When the workflow_run finished (ISO timestamp, None if still running)
- run_estimated_duration: Estimated execution duration
- run_progress: Progress indicator (e.g., "1/3")
- run_cpu_usage: CPU resource usage duration
- run_memory_usage: Memory resource usage duration

Workflow Metadata:
- workflow_id: Reference to the workflow (template) used
- workflow_package_name: Package identifier
- workflow_cron_schedule: Cron schedule if scheduled
- workflow_cron_timezone: Timezone for cron schedule
- workflow_creator_id, workflow_creator_email, workflow_creator_username: Creator information
Comment thread
christopher-tin-atlan marked this conversation as resolved.
- workflow_modifier_id, workflow_modifier_email, workflow_modifier_username: Last modifier information
- workflow_creation_timestamp: When the workflow was created
- workflow_archiving_status: Archiving status
Comment thread
christopher-tin-atlan marked this conversation as resolved.
- total: Total count of workflow_runs matching the criteria (may be larger than the returned list if paginated)
- error: None if no error occurred, otherwise the error message

Examples:
# CORRECT: Get succeeded workflow_runs for a specific workflow (use workflow_phase, NOT status)
Comment thread
christopher-tin-atlan marked this conversation as resolved.
result = get_workflow_runs_tool(workflow_name="atlan-snowflake-miner-1714638976", workflow_phase="Succeeded")

# CORRECT: Get running workflow_runs for a specific workflow (use workflow_phase, NOT status)
result = get_workflow_runs_tool(workflow_name="atlan-snowflake-miner-1714638976", workflow_phase="Running")

# CORRECT: Get failed workflow_runs with pagination (use workflow_phase, NOT status)
result = get_workflow_runs_tool(workflow_name="atlan-snowflake-miner-1714638976", workflow_phase="Failed", from_=0, size=50)

# CORRECT: Get succeeded workflow_runs from the last 2 hours (across all workflows - use status, NOT workflow_phase)
result = get_workflow_runs_tool(status=["Succeeded"], started_at="now-2h")

# CORRECT: Get failed workflow_runs from the last 24 hours (across all workflows - use status, NOT workflow_phase)
result = get_workflow_runs_tool(status=["Failed"], started_at="now-24h")

# CORRECT: Get multiple statuses with both time filters (across all workflows - use status, NOT workflow_phase)
result = get_workflow_runs_tool(
status=["Succeeded", "Failed"],
started_at="now-7d",
finished_at="now-1h"
)

# CORRECT: Get running workflow_runs (across all workflows - use status, NOT workflow_phase)
result = get_workflow_runs_tool(status=["Running"])

# Analyze workflow_run durations
runs = result.get("runs", [])
for run in runs:
if run.get("run_finished_at") and run.get("run_started_at"):
print(f"Workflow run {run['run_id']} took {run.get('run_estimated_duration')}")

# Analyze failure rates
failed_runs = [r for r in result.get("runs", []) if r.get("run_phase") == "Failed"]
print(f"Found {len(failed_runs)} failed workflow_runs in the time range")
"""
try:
# Parse list parameter if it comes as a JSON string (common with MCP clients)
status = parse_list_parameter(status)
except (json.JSONDecodeError, ValueError) as e:
return {
"runs": [],
"total": 0,
"error": f"Parameter parsing error for status: {str(e)}",
}

return get_workflow_runs(
workflow_name=workflow_name,
workflow_phase=workflow_phase,
status=status,
started_at=started_at,
finished_at=finished_at,
from_=from_,
size=size,
)


@mcp.tool()
def create_domains(domains) -> List[Dict[str, Any]]:
"""
Expand Down
8 changes: 8 additions & 0 deletions modelcontextprotocol/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
DQRuleType,
DQRuleSpecification,
)
from .workflows import (
get_workflow_package_names,
get_workflows,
get_workflow_runs,
)

__all__ = [
"search_assets",
Expand All @@ -31,6 +36,9 @@
"create_glossary_category_assets",
"create_glossary_assets",
"create_glossary_term_assets",
"get_workflow_package_names",
"get_workflows",
"get_workflow_runs",
"create_data_domain_assets",
"create_data_product_assets",
"CertificateStatus",
Expand Down
Loading