From 67e6a3a66fdf29dd4a620d7e188d60475b794be6 Mon Sep 17 00:00:00 2001 From: Weston Platter Date: Wed, 31 Dec 2025 11:15:41 -0700 Subject: [PATCH 1/4] docs: add Cursor mcp instructions --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index 59887d6..568e912 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,27 @@ docker buildx build -f mcp-local/Dockerfile -t arm-mcp mcp-local Choose the configuration that matches your MCP client: +#### Cursor + +Add to `.mcp.json` in your project: + +```json +{ + "mcpServers": { + "arm-mcp": { + "command": "docker", + "args": [ + "run", + "--rm", + "-i", + "-v", "/path/to/your/workspace:/workspace", + "arm-mcp" + ] + } + } +} +``` + #### GitHub Copilot (VS Code) Add to `.vscode/mcp.json` in your project, or globally at `~/Library/Application Support/Code/User/mcp.json` (macOS): From e61a13e7c465b5c78ff3d9121e1c83609199190a Mon Sep 17 00:00:00 2001 From: Weston Platter Date: Wed, 31 Dec 2025 11:17:11 -0700 Subject: [PATCH 2/4] refactor(pep 8): move imports to the top; 2 lines between functions --- embedding-generation/generate-chunks.py | 29 +++++++++---------- .../local_vectorstore_creation.py | 4 +++ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/embedding-generation/generate-chunks.py b/embedding-generation/generate-chunks.py index 7f777fe..6bd2845 100644 --- a/embedding-generation/generate-chunks.py +++ b/embedding-generation/generate-chunks.py @@ -12,12 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import requests -from requests.adapters import HTTPAdapter -from urllib3.util.retry import Retry -from bs4 import BeautifulSoup import argparse -import sys, os +import sys +import os import re import uuid import yaml @@ -25,6 +22,14 @@ import datetime import json +import boto3 +from botocore.exceptions import NoCredentialsError, ClientError +from bs4 import BeautifulSoup +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + + # Create a session with retry logic for resilient HTTP requests def create_retry_session(retries=5, backoff_factor=1, status_forcelist=(500, 502, 503, 504)): """Create a requests session with automatic retry on failures.""" @@ -45,10 +50,6 @@ def create_retry_session(retries=5, backoff_factor=1, status_forcelist=(500, 502 # Global session for all HTTP requests http_session = create_retry_session() -# Boto3 for S3 operations -import boto3 -from botocore.exceptions import NoCredentialsError, ClientError - def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', s3_bucket='arm-github-copilot-extension', @@ -57,7 +58,6 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', Ensure the local 'intrinsic_chunks' folder exists and is populated with files from S3. If the folder does not exist, create it and download all files from the S3 prefix. """ - import os if not os.path.exists(local_folder): os.makedirs(local_folder, exist_ok=True) print(f"Created local folder: {local_folder}") @@ -86,10 +86,8 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', To fix: 1. Prevent multiple learning paths from being used (compare URLs to existing chunks OR delete overlaps) 2. Learning Path titles must come from index page...send through function along with Graviton. - ''' - yaml_dir = 'yaml_data' details_file = 'info/chunk_details.csv' @@ -98,7 +96,6 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', # Global var to prevent duplication entries from cross platform learning paths cross_platform_lps_dont_duplicate = [] - # Increase the file size limit, which defaults to '131,072' csv.field_size_limit(10**9) #1,000,000,000 (1 billion), smaller than 64-bit space but avoids 'python overflowerror' @@ -196,7 +193,6 @@ def createTextSnippet(main_row): return - def createIntrinsicsDatabaseChunks(): def htmlToMarkdown(html_string): # Step 0: Remove '

Operation

' as it isn't needed @@ -315,7 +311,6 @@ def htmlToMarkdown(html_string): ''' - def processLearningPath(url,type): github_raw_link = "https://raw.githubusercontent.com/ArmDeveloperEcosystem/arm-learning-paths/refs/heads/production/content" site_link = "https://learn.arm.com" @@ -462,6 +457,7 @@ def readInCSV(csv_file): return csv_dict, csv_length + def getMarkdownGitHubURLsFromPage(url): GH_urls = [] SITE_urls = [] @@ -523,6 +519,7 @@ def obtainMarkdownContentFromGitHubMDFile(gh_url): return md_content + def obtainTextSnippets__Markdown(content, min_words=300, max_words=500, min_final_words=200): """Split content into chunks based on headers and word count constraints.""" @@ -620,6 +617,7 @@ def createChunk(text_snippet,WEBSITE_url,keywords,title): return chunk + def printChunks(chunks): for chunk_dict in chunks: print('='*100) @@ -690,7 +688,6 @@ def recordChunk(): print(f"{file_name} === {chunk.title}") - def main(): diff --git a/embedding-generation/local_vectorstore_creation.py b/embedding-generation/local_vectorstore_creation.py index 45e6ec5..08f5899 100644 --- a/embedding-generation/local_vectorstore_creation.py +++ b/embedding-generation/local_vectorstore_creation.py @@ -24,6 +24,7 @@ from sentence_transformers import SentenceTransformer from usearch.index import Index + def load_local_yaml_files() -> List[Dict]: """Load locally stored YAML files and return their contents as a list of dictionaries.""" print("Loading local YAML files") @@ -63,6 +64,7 @@ def load_local_yaml_files() -> List[Dict]: print(f"Successfully loaded {len(yaml_contents)} YAML files") return yaml_contents + def create_embeddings(contents: List[str], model_name: str = 'all-MiniLM-L6-v2') -> np.ndarray: """Create embeddings for the given contents using SentenceTransformers.""" print(f"Creating embeddings using model: {model_name}") @@ -71,6 +73,7 @@ def create_embeddings(contents: List[str], model_name: str = 'all-MiniLM-L6-v2') print(f"Created embeddings with shape: {embeddings.shape}") return embeddings + def create_usearch_index(embeddings: np.ndarray, metadata: List[Dict]) -> Tuple[Index, List[Dict]]: """Create a USearch index with the given embeddings and metadata.""" print("Creating USearch index") @@ -100,6 +103,7 @@ def create_usearch_index(embeddings: np.ndarray, metadata: List[Dict]) -> Tuple[ print(f"Added {len(index)} vectors to the index") return index, metadata + def main(): print("Starting the USearch datastore creation process") From 18c135e30ec26c294692f4b4d36b0557faa3fa07 Mon Sep 17 00:00:00 2001 From: Weston Platter Date: Wed, 31 Dec 2025 11:25:00 -0700 Subject: [PATCH 3/4] docs: clarify the mcp is for Claude Code --- README.md | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 568e912..e7aa74c 100644 --- a/README.md +++ b/README.md @@ -34,31 +34,32 @@ docker buildx build --platform linux/arm64,linux/amd64 -f mcp-local/Dockerfile - For a single-platform build (faster): ```bash -docker buildx build -f mcp-local/Dockerfile -t arm-mcp mcp-local +# NOTE - building locally involves generating the Knowledge Base semantic embeddings and can take up to 20 minutes +docker buildx build -f mcp-local/Dockerfile -t arm-mcp mcp-local --load ``` ### 2. Configure Your MCP Client Choose the configuration that matches your MCP client: -#### Cursor +#### Claude Code Add to `.mcp.json` in your project: ```json { - "mcpServers": { - "arm-mcp": { - "command": "docker", - "args": [ - "run", - "--rm", - "-i", - "-v", "/path/to/your/workspace:/workspace", - "arm-mcp" - ] - } + "mcpServers": { + "arm-mcp": { + "command": "docker", + "args": [ + "run", + "--rm", + "-i", + "-v", "/path/to/your/workspace:/workspace", + "arm-mcp" + ] } + } } ``` From 6b4e45f4881a7a8b7582d30a22c011d8473ecdb5 Mon Sep 17 00:00:00 2001 From: Weston Platter Date: Wed, 31 Dec 2025 11:46:37 -0700 Subject: [PATCH 4/4] refactor: ignore the invocation_reasons.yaml --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..40f9a2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +invocation_reasons.yaml +