Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -921,7 +921,9 @@ Guidelines:
- Use natural, conversational tone throughout

COMMAND DISCOVERY:
- When you want to execute JupyterLab commands, ALWAYS use the 'discover_commands' tool first to find available commands and their metadata.
- When you want to execute JupyterLab commands, ALWAYS use the 'discover_commands' tool first to find available commands and their metadata, with the optional query parameter.
- The query should typically be a single word, e.g., 'terminal', 'notebook', 'cell', 'file', 'edit', 'view', 'run', etc, to find relevant commands.
- If searching with a query does not yield the desired command, try again with a different query or use an empty query to list all commands.
- This ensures you have complete information about command IDs, descriptions, and required arguments before attempting to execute them. Only after discovering the available commands should you use the 'execute_command' tool with the correct command ID and arguments.

TOOL SELECTION GUIDELINES:
Expand Down
85 changes: 45 additions & 40 deletions src/tools/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,51 +20,56 @@ export function createDiscoverCommandsTool(commands: CommandRegistry): ITool {
.nullable()
.describe('Optional search query to filter commands')
}),
execute: async () => {
try {
const commandList: Array<{
id: string;
label?: string;
caption?: string;
description?: string;
args?: any;
}> = [];
execute: async (input: { query?: string | null }) => {
const { query } = input;
const commandList: Array<{
id: string;
label?: string;
caption?: string;
description?: string;
args?: any;
}> = [];

// Get all command IDs
const commandIds = commands.listCommands();
// Get all command IDs
const commandIds = commands.listCommands();

for (const id of commandIds) {
try {
// Get command metadata using various CommandRegistry methods
const description = await commands.describedBy(id);
const label = commands.label(id);
const caption = commands.caption(id);
const usage = commands.usage(id);
for (const id of commandIds) {
// Get command metadata using various CommandRegistry methods
const description = await commands.describedBy(id);
const label = commands.label(id);
const caption = commands.caption(id);
const usage = commands.usage(id);

const command = {
id,
label: label || undefined,
caption: caption || undefined,
description: usage || undefined,
args: description?.args || undefined
};

commandList.push({
id,
label: label || undefined,
caption: caption || undefined,
description: usage || undefined,
args: description?.args || undefined
});
} catch (error) {
// Some commands might not have descriptions, skip them
commandList.push({ id });
// Filter by query if provided
if (query) {
const searchTerm = query.toLowerCase();
const matchesQuery =
id.toLowerCase().includes(searchTerm) ||
label?.toLowerCase().includes(searchTerm) ||
caption?.toLowerCase().includes(searchTerm) ||
usage?.toLowerCase().includes(searchTerm);

if (matchesQuery) {
commandList.push(command);
}
} else {
commandList.push(command);
}

return {
success: true,
commandCount: commandList.length,
commands: commandList
};
} catch (error) {
return {
success: false,
error: `Failed to discover commands: ${error instanceof Error ? error.message : String(error)}`
};
}

return {
success: true,
commandCount: commandList.length,
commands: commandList
};
}
});
}
Expand All @@ -87,7 +92,7 @@ export function createExecuteCommandTool(
.optional()
.describe('Optional arguments to pass to the command')
}),
needsApproval: async (_context, { commandId }) => {
needsApproval: async (context, { commandId }) => {
// Use configurable list of commands requiring approval
const commandsRequiringApproval =
settingsModel.config.commandsRequiringApproval;
Expand Down
127 changes: 127 additions & 0 deletions ui-tests/tests/commands-tool.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright (c) Jupyter Development Team.
* Distributed under the terms of the Modified BSD License.
*/

import { expect, galata, test } from '@jupyterlab/galata';
import { DEFAULT_SETTINGS_MODEL_SETTINGS, openChatPanel } from './test-utils';

const EXPECT_TIMEOUT = 120000;

test.use({
mockSettings: {
...galata.DEFAULT_SETTINGS,
'@jupyterlab/apputils-extension:notification': {
checkForUpdates: false,
fetchNews: 'false',
doNotDisturbMode: true
},
'@jupyterlite/ai:settings-model': {
...DEFAULT_SETTINGS_MODEL_SETTINGS['@jupyterlite/ai:settings-model'],
toolsEnabled: true,
// To nudge the model to call the tool with specific parameters
systemPrompt:
'When asked to discover commands, call the discover_commands tool with the exact query parameter provided in the user message. Always use the query parameter exactly as specified.'
}
}
});

test.describe('#commandsTool', () => {
test('should filter commands using query parameter', async ({ page }) => {
test.setTimeout(120 * 1000);

const panel = await openChatPanel(page);
const input = panel
.locator('.jp-chat-input-container')
.getByRole('combobox');
const sendButton = panel.locator(
'.jp-chat-input-container .jp-chat-send-button'
);

// Very specific prompt to ensure the query parameter is used
const PROMPT =
'Use the discover_commands tool with query parameter set to "notebook" to find notebook-related commands';

await input.pressSequentially(PROMPT);
await sendButton.click();

// Wait for AI response
await expect(
panel.locator('.jp-chat-message-header:has-text("Jupyternaut")')
).toHaveCount(1, { timeout: EXPECT_TIMEOUT });

// Wait for tool call to appear
const toolCall = panel.locator('.jp-ai-tool-call');
await expect(toolCall).toHaveCount(1, { timeout: EXPECT_TIMEOUT });

// Verify the tool was called
await expect(toolCall).toContainText('discover_commands', {
timeout: EXPECT_TIMEOUT
});

// Click to expand the tool call
await toolCall.click();

// Get the tool call result to check the command count
const toolResultText = await toolCall.textContent();

// Parse the commandCount from the JSON response
const countMatch = toolResultText?.match(/"commandCount":\s*(\d+)/);
expect(countMatch).toBeTruthy();
const count = parseInt(countMatch![1], 10);

// The filtered results should have significantly fewer than 300 commands
// (JupyterLab typically has 300+ total commands, but only a subset contain "notebook")
expect(count).toBeLessThan(300);
expect(count).toBeGreaterThan(0);
});

test('should return all commands without query parameter', async ({
page
}) => {
test.setTimeout(120 * 1000);

const panel = await openChatPanel(page);
const input = panel
.locator('.jp-chat-input-container')
.getByRole('combobox');
const sendButton = panel.locator(
'.jp-chat-input-container .jp-chat-send-button'
);

// Prompt without specifying a query parameter
const PROMPT =
'Use the discover_commands tool without any query parameter to list all available commands';

await input.pressSequentially(PROMPT);
await sendButton.click();

// Wait for AI response
await expect(
panel.locator('.jp-chat-message-header:has-text("Jupyternaut")')
).toHaveCount(1, { timeout: EXPECT_TIMEOUT });

// Wait for tool call to appear
const toolCall = panel.locator('.jp-ai-tool-call');
await expect(toolCall).toHaveCount(1, { timeout: EXPECT_TIMEOUT });

// Verify the tool was called
await expect(toolCall).toContainText('discover_commands', {
timeout: EXPECT_TIMEOUT
});

// Click to expand the tool call
await toolCall.click();

// Get the tool call result to check the command count
const toolResultText = await toolCall.textContent();

// Parse the commandCount from the JSON response
const countMatch = toolResultText?.match(/"commandCount":\s*(\d+)/);
expect(countMatch).toBeTruthy();
const count = parseInt(countMatch![1], 10);

// Should have many commands (typically 400+)
expect(count).toBeGreaterThan(400);
});
});
Loading