diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..40f9a2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +invocation_reasons.yaml + diff --git a/README.md b/README.md index 59887d6..e7aa74c 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,35 @@ docker buildx build --platform linux/arm64,linux/amd64 -f mcp-local/Dockerfile - For a single-platform build (faster): ```bash -docker buildx build -f mcp-local/Dockerfile -t arm-mcp mcp-local +# NOTE - building locally involves generating the Knowledge Base semantic embeddings and can take up to 20 minutes +docker buildx build -f mcp-local/Dockerfile -t arm-mcp mcp-local --load ``` ### 2. Configure Your MCP Client Choose the configuration that matches your MCP client: +#### Claude Code + +Add to `.mcp.json` in your project: + +```json +{ + "mcpServers": { + "arm-mcp": { + "command": "docker", + "args": [ + "run", + "--rm", + "-i", + "-v", "/path/to/your/workspace:/workspace", + "arm-mcp" + ] + } + } +} +``` + #### GitHub Copilot (VS Code) Add to `.vscode/mcp.json` in your project, or globally at `~/Library/Application Support/Code/User/mcp.json` (macOS): diff --git a/embedding-generation/generate-chunks.py b/embedding-generation/generate-chunks.py index 7f777fe..6bd2845 100644 --- a/embedding-generation/generate-chunks.py +++ b/embedding-generation/generate-chunks.py @@ -12,12 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import requests -from requests.adapters import HTTPAdapter -from urllib3.util.retry import Retry -from bs4 import BeautifulSoup import argparse -import sys, os +import sys +import os import re import uuid import yaml @@ -25,6 +22,14 @@ import datetime import json +import boto3 +from botocore.exceptions import NoCredentialsError, ClientError +from bs4 import BeautifulSoup +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + + # Create a session with retry logic for resilient HTTP requests def create_retry_session(retries=5, backoff_factor=1, status_forcelist=(500, 502, 503, 504)): """Create a requests session with automatic retry on failures.""" @@ -45,10 +50,6 @@ def create_retry_session(retries=5, backoff_factor=1, status_forcelist=(500, 502 # Global session for all HTTP requests http_session = create_retry_session() -# Boto3 for S3 operations -import boto3 -from botocore.exceptions import NoCredentialsError, ClientError - def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', s3_bucket='arm-github-copilot-extension', @@ -57,7 +58,6 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', Ensure the local 'intrinsic_chunks' folder exists and is populated with files from S3. If the folder does not exist, create it and download all files from the S3 prefix. """ - import os if not os.path.exists(local_folder): os.makedirs(local_folder, exist_ok=True) print(f"Created local folder: {local_folder}") @@ -86,10 +86,8 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', To fix: 1. Prevent multiple learning paths from being used (compare URLs to existing chunks OR delete overlaps) 2. Learning Path titles must come from index page...send through function along with Graviton. - ''' - yaml_dir = 'yaml_data' details_file = 'info/chunk_details.csv' @@ -98,7 +96,6 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks', # Global var to prevent duplication entries from cross platform learning paths cross_platform_lps_dont_duplicate = [] - # Increase the file size limit, which defaults to '131,072' csv.field_size_limit(10**9) #1,000,000,000 (1 billion), smaller than 64-bit space but avoids 'python overflowerror' @@ -196,7 +193,6 @@ def createTextSnippet(main_row): return - def createIntrinsicsDatabaseChunks(): def htmlToMarkdown(html_string): # Step 0: Remove '