diff --git a/.github/workflows/deploy-docs-prod.yaml b/.github/workflows/deploy-docs-prod.yaml index 3e89648607..78461b0113 100644 --- a/.github/workflows/deploy-docs-prod.yaml +++ b/.github/workflows/deploy-docs-prod.yaml @@ -12,6 +12,10 @@ on: jobs: deploy: + if: | + github.event_name == 'push' || + (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || + github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest steps: diff --git a/.github/workflows/deploy-docs-staging.yaml b/.github/workflows/deploy-docs-staging.yaml index f95ad0061c..cf4fb5772e 100644 --- a/.github/workflows/deploy-docs-staging.yaml +++ b/.github/workflows/deploy-docs-staging.yaml @@ -12,6 +12,10 @@ on: jobs: deploy: + if: | + github.event_name == 'push' || + (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || + github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest steps: diff --git a/.github/workflows/lighthouse-check.yaml b/.github/workflows/lighthouse-check.yaml new file mode 100644 index 0000000000..3d8c9730ae --- /dev/null +++ b/.github/workflows/lighthouse-check.yaml @@ -0,0 +1,536 @@ +name: Lighthouse check + +on: + pull_request: + types: [opened, synchronize, ready_for_review] + +permissions: + issues: write + pull-requests: write + +env: + # To change the default depth level: + # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.) + # 1 — All first-level subdirectories (e.g. /guide/*.html) + # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html) + # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete + DEFAULT_DEPTH: '0' + +jobs: + lighthouse: + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Get commit SHA + id: get_sha + run: | + echo "COMMIT_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV + echo "COMMIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + + - name: Set PR preview URL + id: set_url + run: | + echo "PREVIEW_URL=https://docs-demo.vm.validmind.ai/pr_previews/${{ github.head_ref }}" >> $GITHUB_ENV + echo "DEPTH=${{ env.DEFAULT_DEPTH }}" >> $GITHUB_ENV + + - name: Check for PR preview URL and sitemap + id: check_preview + run: | + # Function to check if URL exists + check_url() { + curl --head --silent --fail "$1" > /dev/null + return $? + } + + # Wait for preview URL to be available (up to 60 minutes) + echo "Waiting for preview site to become available ..." + for i in {1..60}; do + if check_url "$PREVIEW_URL/index.html"; then + echo "Info: Preview site is now available" + break + fi + + if [ $i -eq 60 ]; then + echo "Error: Preview URL did not become available after 60 minutes at $PREVIEW_URL/index.html" + exit 1 + fi + + echo "Attempt $i/60: Preview site not ready yet, waiting 1 minute..." + sleep 60 + done + + # Now check for sitemap + if ! check_url "$PREVIEW_URL/sitemap.xml"; then + echo "Error: Sitemap does not exist at $PREVIEW_URL/sitemap.xml" + exit 1 + fi + + # Check if installation page is accessible with auth + echo "Debug: Checking installation page with URL-based auth..." + auth_url="https://${{ secrets.INSTALLATION_USER }}:${{ secrets.INSTALLATION_PW }}@docs-demo.vm.validmind.ai/pr_previews/${{ github.head_ref }}/installation/index.html" + if ! curl -v --head --silent --fail --anyauth "$auth_url" 2>&1; then + echo "Error: Installation page is not accessible with authentication at $auth_url" + exit 1 + fi + echo "Info: Successfully accessed password-protected installation page" + + echo "preview_exists=true" >> $GITHUB_OUTPUT + + - name: Install Lighthouse CI + if: steps.check_preview.outputs.preview_exists == 'true' + run: npm install -g @lhci/cli + + - name: Install required Python packages + if: steps.check_preview.outputs.preview_exists == 'true' + run: | + python -m pip install --upgrade pip + pip install requests beautifulsoup4 + + - name: Generate URLs to check + if: steps.check_preview.outputs.preview_exists == 'true' + id: generate_urls + run: | + BASE_URL="$PREVIEW_URL" + + # Create a Python script to crawl the site + cat > crawl.py << 'EOF' + import requests + from bs4 import BeautifulSoup + import sys + from urllib.parse import urljoin, urlparse + import json + import xml.etree.ElementTree as ET + import base64 + import os + + # Define root pages to check + ROOT_PAGES = [ + "index.html", + "get-started/get-started.html", + "guide/guides.html", + "developer/validmind-library.html", + "support/support.html", + "releases/all-releases.html", + "training/training.html" + ] + + def get_auth_headers(): + # Only use auth for installation pages + if 'installation/' in url: + # Create auth headers from environment variables + auth_string = base64.b64encode(f"{os.environ['INSTALLATION_USER']}:{os.environ['INSTALLATION_PW']}".encode()).decode() + return {"Authorization": f"Basic {auth_string}"} + return {} + + def get_url_depth(url): + # Parse the URL to get just the path + path = urlparse(url).path + # Remove .html extension for depth calculation + path = path.replace('.html', '') + # Remove any leading/trailing slashes + path = path.strip('/') + + # Split into segments and count non-empty ones + segments = [x for x in path.split('/') if x] + + # For PR preview URLs, we need to skip the first 5 segments: + # /pr_previews/username/branch/name/ + if 'pr_previews' in path: + # Skip the first 5 segments (pr_previews/username/branch/name/) + segments = segments[5:] + + # Debug the depth calculation + # print(f"URL depth calculation - Path: {path}, Segments: {segments}, Depth: {len(segments)}", file=sys.stderr) + + return len(segments) + + def get_urls_from_sitemap(sitemap_url, max_depth): + try: + print(f"Fetching sitemap from {sitemap_url}", file=sys.stderr) + # Don't use auth for sitemap + response = requests.get(sitemap_url) + print(f"Sitemap response status: {response.status_code}", file=sys.stderr) + if response.status_code == 200: + print(f"Sitemap content: {response.text[:500]}...", file=sys.stderr) + root = ET.fromstring(response.content) + # Get all URLs from sitemap + all_urls = set() + + for url in root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}url'): + loc = url.find('{http://www.sitemaps.org/schemas/sitemap/0.9}loc') + if loc is not None: + full_url = loc.text + parsed_url = urlparse(full_url) + + # Extract the path part after the base URL + path = parsed_url.path + # Remove leading slash if present + path = path.lstrip('/') + + # Only include .html files + if path.endswith('.html'): + # Check depth + if get_url_depth(path) <= max_depth: + # Remove any segments that match the PR preview path + segments = path.split('/') + # Keep only the segments after the PR preview path + pr_preview_index = -1 + for i, segment in enumerate(segments): + if segment == 'pr_previews': + pr_preview_index = i + break + if pr_preview_index >= 0: + segments = segments[pr_preview_index + 4:] # Skip pr_previews/username/branch/name + path = '/'.join(segments) + all_urls.add(path) + print(f"Found URL in sitemap: {path}", file=sys.stderr) + + print(f"Found {len(all_urls)} URLs in sitemap:", file=sys.stderr) + for url in sorted(all_urls): + print(f" {url}", file=sys.stderr) + return sorted(list(all_urls)) + else: + print(f"Failed to fetch sitemap: {response.status_code}", file=sys.stderr) + except Exception as e: + print(f"Error processing sitemap {sitemap_url}: {str(e)}", file=sys.stderr) + return [] + + def get_links(url, max_depth, visited=None): + if visited is None: + visited = set() + + current_depth = get_url_depth(url) + print(f"Checking URL {url} at depth {current_depth}", file=sys.stderr) + + if current_depth > max_depth or url in visited: + print(f"Skipping {url} - depth {current_depth} > {max_depth} or already visited", file=sys.stderr) + return set() + + visited.add(url) + links = set() + + try: + print(f"Fetching {url}", file=sys.stderr) + headers = get_auth_headers() + response = requests.get(url, headers=headers) + print(f"Response status: {response.status_code}", file=sys.stderr) + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'html.parser') + print(f"Found {len(soup.find_all('a', href=True))} links on page", file=sys.stderr) + + for a in soup.find_all('a', href=True): + href = a['href'] + print(f"Processing link: {href}", file=sys.stderr) + + # Skip external links and anchors + if href.startswith('#') or href.startswith('http'): + print(f"Skipping external/anchor link: {href}", file=sys.stderr) + continue + + # Convert relative URLs to absolute + full_url = urljoin(url, href) + print(f"Converted to full URL: {full_url}", file=sys.stderr) + + # Only include URLs from the same base domain + if urlparse(full_url).netloc == urlparse(url).netloc: + # Extract just the path part + path = urlparse(full_url).path + # Remove leading slash if present + path = path.lstrip('/') + + # Only include .html files + if path.endswith('.html'): + print(f"Found HTML link: {path}", file=sys.stderr) + links.add(path) + # Only recursively get links if we haven't hit max depth + if get_url_depth(path) < max_depth: + print(f"Recursively checking {path} at depth {get_url_depth(path)}", file=sys.stderr) + links.update(get_links(full_url, max_depth, visited)) + else: + print(f"Skipping recursive check for {path} - at max depth", file=sys.stderr) + else: + print(f"Skipping external domain link: {href}", file=sys.stderr) + except Exception as e: + print(f"Error processing {url}: {str(e)}", file=sys.stderr) + + return links + + # Get command line arguments + base_url = sys.argv[1] + max_depth = int(sys.argv[2]) + + print(f"Base URL: {base_url}", file=sys.stderr) + print(f"Max depth: {max_depth}", file=sys.stderr) + + # Get all URLs + all_urls = set() + + if max_depth == 0: + # For depth 0, only check ROOT_PAGES + print("Depth is 0, only checking ROOT_PAGES", file=sys.stderr) + for root in ROOT_PAGES: + all_urls.add(root) + print(f"Added root page: {root}", file=sys.stderr) + else: + # For depth > 0, use sitemap + print(f"Depth is {max_depth}, using sitemap", file=sys.stderr) + sitemap_url = f"{base_url}/sitemap.xml" + sitemap_urls = get_urls_from_sitemap(sitemap_url, max_depth) + print(f"Found {len(sitemap_urls)} URLs in sitemap", file=sys.stderr) + all_urls.update(sitemap_urls) + + # Print URLs to stdout, ensuring proper URL construction + print(f"Total URLs found: {len(all_urls)}", file=sys.stderr) + for url in sorted(all_urls): + # Remove any leading slashes from the URL to avoid double slashes + url = url.lstrip('/') + # Construct the full URL by joining base_url and url with a single slash + full_url = f"{base_url.rstrip('/')}/{url}" + print(full_url) + print(f"Added URL: {full_url}", file=sys.stderr) + EOF + + # Run the crawler + python crawl.py "$BASE_URL" "$DEPTH" > lhci-urls.txt + + echo "Lighthouse will check the following URLs:" + cat lhci-urls.txt + echo -e "\nTotal number of URLs: $(wc -l < lhci-urls.txt)" + + # Verify we have URLs + if [ ! -s lhci-urls.txt ]; then + echo "Error: No URLs were generated. Check the debug output above." + exit 1 + fi + + - name: Create Lighthouse config + if: steps.check_preview.outputs.preview_exists == 'true' + run: | + cat > .lighthouserc.js << 'EOF' + const fs = require('fs'); + const urls = fs.readFileSync('lhci-urls.txt', 'utf-8').split('\n').filter(Boolean); + + // Add auth to installation URLs using the same format as the URL check step + const urlsWithAuth = urls.map(url => { + if (url.includes('/installation/')) { + return `https://${process.env.INSTALLATION_USER}:${process.env.INSTALLATION_PW}@${new URL(url).host}${new URL(url).pathname}`; + } + return url; + }); + + module.exports = { + ci: { + collect: { + url: urlsWithAuth, + numberOfRuns: 1, + settings: { + formFactor: 'desktop', + screenEmulation: { + mobile: false, + width: 1350, + height: 940, + deviceScaleFactor: 1, + disabled: false, + }, + throttling: { + rttMs: 40, + throughputKbps: 10240, + cpuSlowdownMultiplier: 1, + requestLatencyMs: 0, + downloadThroughputKbps: 0, + uploadThroughputKbps: 0, + }, + }, + }, + assert: { + assertions: { + 'categories:accessibility': ['error', { minScore: 0.9 }], + }, + }, + upload: { + target: 'temporary-public-storage', + }, + }, + }; + EOF + + - name: Run Lighthouse audit + if: steps.check_preview.outputs.preview_exists == 'true' + uses: treosh/lighthouse-ci-action@v11 + id: lighthouse + continue-on-error: true + env: + INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }} + INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }} + with: + configPath: .lighthouserc.js + uploadArtifacts: true + temporaryPublicStorage: true + + - name: Check Lighthouse audit result + if: steps.check_preview.outputs.preview_exists == 'true' + run: | + # Check if the manifest exists and is valid JSON + if [ -z "${{ steps.lighthouse.outputs.manifest }}" ]; then + echo "Error: Lighthouse audit failed - no manifest output" + exit 1 + fi + + # Try to parse the manifest as JSON + if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq . > /dev/null 2>&1; then + echo "Error: Lighthouse audit failed - invalid manifest format" + exit 1 + fi + + # Check if any URLs were successfully audited + if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq 'length > 0' > /dev/null 2>&1; then + echo "Error: Lighthouse audit failed - no URLs were successfully audited" + exit 1 + fi + + - name: Post Lighthouse results comment + if: steps.check_preview.outputs.preview_exists == 'true' + uses: actions/github-script@v6 + with: + script: | + const runId = context.runId; + const baseUrl = process.env.PREVIEW_URL; + const commitSha = process.env.COMMIT_SHA; + const commitShaShort = process.env.COMMIT_SHA_SHORT; + + // Get artifacts for this run + const { data: artifacts } = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: runId, + }); + + // Lighthouse artifact + const lighthouseArtifact = artifacts.artifacts.find(a => a.name === 'lighthouse-report'); + const lighthouseArtifactUrl = lighthouseArtifact + ? `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}/artifacts/${lighthouseArtifact.id}` + : null; + + // Lighthouse + const manifest = '${{ steps.lighthouse.outputs.manifest }}'; + let manifestJson; + try { + manifestJson = JSON.parse(manifest); + if (!Array.isArray(manifestJson) || manifestJson.length === 0) { + throw new Error('Invalid manifest format or empty results'); + } + } catch (error) { + console.error('Error parsing Lighthouse manifest:', error); + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: `## Lighthouse check results\n\n⚠️ WARN: Failed to parse Lighthouse results. Please check the [workflow run](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}) for details.` + }); + return; + } + + // Delete old Lighthouse comments + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + // Delete any previous comments from this workflow + for (const comment of comments) { + if (comment.user.login === 'github-actions[bot]' && + comment.body.includes('## Lighthouse check results')) { + try { + console.log(`Deleting Lighthouse comment ${comment.id}`); + await github.rest.issues.deleteComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: comment.id, + }); + console.log(`Successfully deleted Lighthouse comment ${comment.id}`); + } catch (error) { + console.error(`Failed to delete Lighthouse comment ${comment.id}:`, error); + } + } + } + + // Calculate average accessibility score + const scores = manifestJson.map(run => run.summary.accessibility); + const avgScore = scores.reduce((a, b) => a + b, 0) / scores.length; + const lighthouseScore = avgScore.toFixed(2); + + const lighthouseReportUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`; + let lighthouseComment = ''; + if (parseFloat(lighthouseScore) >= 0.9) { + lighthouseComment = `✓ INFO: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [View the workflow run](${lighthouseReportUrl})`; + } else { + lighthouseComment = `⚠️ WARN: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [Check the workflow run](${lighthouseReportUrl})`; + } + + const stripAuth = url => { + try { + const u = new URL(url); + u.username = ''; + u.password = ''; + return u.toString(); + } catch { + return url; + } + }; + + // Helper to get the public report URL from htmlPath + const getReportUrl = (run) => { + if (run.report && Array.isArray(run.report)) { + // Find the public .report.html URL + const htmlReport = run.report.find(r => r.endsWith('.report.html') && r.startsWith('http')); + if (htmlReport) return htmlReport; + // Fallback: first report if available + if (run.report.length > 0) return run.report[0]; + } + // Fallback: just show the workflow run if nothing else + return lighthouseReportUrl; + }; + + // Parse the links output from the Lighthouse step + const links = (() => { + try { + return JSON.parse(`${{ steps.lighthouse.outputs.links }}`); + } catch { + return {}; + } + })(); + + const scoresTable = manifestJson + .map(run => { + const formatScore = (score) => score === null ? 'N/A' : score.toFixed(2); + const displayPath = stripAuth(run.url).replace(baseUrl, ''); + // Use the public report URL from the links output, fallback to workflow run if missing + const reportUrl = links[run.url] || lighthouseReportUrl; + return `| [${displayPath}](${reportUrl}) | ${formatScore(run.summary.accessibility)} | ${formatScore(run.summary.performance)} | ${formatScore(run.summary['best-practices'])} | ${formatScore(run.summary.seo)} |`; + }) + .join('\n'); + + let comment = `## Lighthouse check results\n\n`; + comment += `${lighthouseComment}\n\n`; + comment += `
\nShow Lighthouse scores\n\n`; + comment += `Folder depth level checked: **${process.env.DEPTH}**\n\n`; + comment += `Commit SHA: [${commitShaShort}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${commitSha})\n\n`; + comment += `Modify the workflow to check a different depth:\n`; + comment += `- 0: Top-level navigation only — /index.html, /guide/guides.html, ...\n`; + comment += `- 1: All first-level subdirectories — /guide/\*.html, /developer/\*.html, ...\n`; + comment += `- 2: All second-level subdirectories — /guide/attestation/\*.html, ...\n\n`; + comment += `| Page | Accessibility | Performance | Best Practices | SEO |\n`; + comment += `|------|---------------|-------------|----------------|-----|\n`; + comment += `${scoresTable}\n\n`; + comment += `
\n\n`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: comment + }); \ No newline at end of file diff --git a/.github/workflows/merge-main-into-staging.yaml b/.github/workflows/merge-main-into-staging.yaml index c9a712ca40..4a6f6afb96 100644 --- a/.github/workflows/merge-main-into-staging.yaml +++ b/.github/workflows/merge-main-into-staging.yaml @@ -39,11 +39,16 @@ jobs: title: 'Merge main into staging' body: 'Automatically merge main into staging branch.' - - name: Merge pull request + - name: Merge pull request (with retries) if: ${{ steps.pr-number.outputs.pull-request-number != '' }} - run: gh pr merge --merge --auto "${{ steps.pr-number.outputs.pull-request-number }}" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + for i in {1..5}; do + gh pr merge --merge --auto "${{ steps.pr-number.outputs.pull-request-number }}" && break + echo "Merge failed, retrying in 10 seconds ..." + sleep 10 + done - name: Delete pull request branch if: ${{ success() && steps.pr-number.outputs.pull-request-number != '' }} diff --git a/.github/workflows/vale-lint.yaml b/.github/workflows/vale-lint.yaml new file mode 100644 index 0000000000..8a037dfc97 --- /dev/null +++ b/.github/workflows/vale-lint.yaml @@ -0,0 +1,140 @@ +name: Vale linter + +on: + pull_request: + types: [opened, synchronize, ready_for_review] + +permissions: + issues: write + pull-requests: write + +jobs: + vale: + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Install Vale + run: | + wget https://github.com/errata-ai/vale/releases/download/v2.28.0/vale_2.28.0_Linux_64-bit.tar.gz + tar -xvzf vale_2.28.0_Linux_64-bit.tar.gz + sudo mv vale /usr/local/bin/ + + - name: Run Vale + id: vale + continue-on-error: true + run: | + vale --output=JSON site/ > vale-report.json || { + echo "Vale linting found issues" + exit 1 + } + + - name: Format Vale report for PR comment + run: jq . vale-report.json > vale-report-pretty.json + + - name: Upload Vale report as artifact + uses: actions/upload-artifact@v4 + with: + name: vale-report + path: vale-report.json + retention-days: 1 + + - name: Post Vale results comment + uses: actions/github-script@v6 + with: + script: | + const runId = context.runId; + // Get artifacts for this run + const { data: artifacts } = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: runId, + }); + const valeArtifact = artifacts.artifacts.find(a => a.name === 'vale-report'); + const valeArtifactUrl = valeArtifact + ? `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}/artifacts/${valeArtifact.id}` + : null; + + // Delete old Vale comments + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + for (const comment of comments) { + if (comment.user.login === 'github-actions[bot]' && comment.body.includes('## Vale source linter')) { + try { + console.log(`Deleting Vale comment ${comment.id}`); + await github.rest.issues.deleteComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: comment.id, + }); + console.log(`Successfully deleted Vale comment ${comment.id}`); + } catch (error) { + console.error(`Failed to delete Vale comment ${comment.id}:`, error); + } + } + } + + const fs = require('fs'); + const report = JSON.parse(fs.readFileSync('vale-report.json', 'utf8')); + + let comment = '## Vale source linter\n\n'; + + if (Object.keys(report).length === 0) { + comment += `✓ INFO: No writing issues were found ([report](${valeArtifactUrl}))\n\n`; + } else { + // Build the summary output + let summaryOutput = ''; + let totalIssues = 0; + let fileCount = 0; + + for (const [file, issues] of Object.entries(report)) { + totalIssues += issues.length; + fileCount++; + + // Include first 30 files to keep comment size manageable + if (fileCount <= 30) { + summaryOutput += `### ${file}\n`; + // Only include first 10 issues per file + const issuesToShow = issues.slice(0, 10); + for (const issue of issuesToShow) { + summaryOutput += `- Line ${issue.Line}: ${issue.Message} (${issue.Severity})\n`; + } + if (issues.length > 10) { + summaryOutput += `- ... and ${issues.length - 10} more issues\n`; + } + summaryOutput += '\n'; + } + } + + const workflowRunUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`; + comment += `⚠️ WARN: Found **${totalIssues}** issues across **${fileCount}** files — [Check the workflow run](${workflowRunUrl}) or [download the full report](${valeArtifactUrl})\n\n`; + + // Add the summary output in a collapsed section + comment += `
\nShow first 30 files with issues\n\n`; + comment += summaryOutput; + if (fileCount > 30) { + comment += `\n... and ${fileCount - 30} more files with issues\n`; + } + comment += `\n
\n\n`; + } + + // Ensure comment doesn't exceed GitHub's limit + const MAX_COMMENT_LENGTH = 60000; // Leave some buffer + if (comment.length > MAX_COMMENT_LENGTH) { + comment = comment.substring(0, MAX_COMMENT_LENGTH) + + `\n\n... (comment truncated due to length) - See full report in artifacts`; + } + + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: comment + }); \ No newline at end of file diff --git a/.github/workflows/validate-docs-site.yaml b/.github/workflows/validate-docs-site.yaml index 2bdd873180..3c7052e4c9 100644 --- a/.github/workflows/validate-docs-site.yaml +++ b/.github/workflows/validate-docs-site.yaml @@ -46,7 +46,6 @@ jobs: cat render_errors.log; exit 1; } - # See if site/notebooks/ has updates # Checks the current PR branch against the target branch - name: Filter changed files @@ -102,7 +101,7 @@ jobs: echo "No warnings or errors detected during Quarto render" fi - # Demo bucket is in us-east-1 + # Demo bucket is in us-east-1 - name: Configure AWS credentials run: aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }} && aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }} && aws configure set default.region us-east-1 @@ -113,10 +112,31 @@ jobs: uses: actions/github-script@v6 with: script: | - const url = `https://docs-demo.vm.validmind.ai/pr_previews/${{ github.head_ref }}/index.html`; + const previewUrl = `https://docs-demo.vm.validmind.ai/pr_previews/${{ github.head_ref }}/index.html`; + + // Delete old preview comments + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + for (const comment of comments) { + if (comment.user.login === 'github-actions[bot]' && comment.body.includes('## Validate docs site')) { + await github.rest.issues.deleteComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: comment.id, + }); + } + } + + let comment = `## Validate docs site\n\n`; + comment += `✓ INFO: A live preview of the docs site is available — [Open the preview](${previewUrl})\n\n`; + github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: `A PR preview is available: [Preview URL](${url})` + body: comment }); diff --git a/.vale.ini b/.vale.ini new file mode 100644 index 0000000000..2e53c3e747 --- /dev/null +++ b/.vale.ini @@ -0,0 +1,13 @@ +StylesPath = .vale/styles +MinAlertLevel = suggestion + +Skips = _*, */_*, plugin/* + +Packages = Google + +[*.md] +BasedOnStyles = Vale, Google + +[*.qmd] +BasedOnStyles = Vale, Google + diff --git a/.vale/styles/Google/AMPM.yml b/.vale/styles/Google/AMPM.yml new file mode 100644 index 0000000000..37b49edf87 --- /dev/null +++ b/.vale/styles/Google/AMPM.yml @@ -0,0 +1,9 @@ +extends: existence +message: "Use 'AM' or 'PM' (preceded by a space)." +link: "https://developers.google.com/style/word-list" +level: error +nonword: true +tokens: + - '\d{1,2}[AP]M\b' + - '\d{1,2} ?[ap]m\b' + - '\d{1,2} ?[aApP]\.[mM]\.' diff --git a/.vale/styles/Google/Acronyms.yml b/.vale/styles/Google/Acronyms.yml new file mode 100644 index 0000000000..f41af0189b --- /dev/null +++ b/.vale/styles/Google/Acronyms.yml @@ -0,0 +1,64 @@ +extends: conditional +message: "Spell out '%s', if it's unfamiliar to the audience." +link: 'https://developers.google.com/style/abbreviations' +level: suggestion +ignorecase: false +# Ensures that the existence of 'first' implies the existence of 'second'. +first: '\b([A-Z]{3,5})\b' +second: '(?:\b[A-Z][a-z]+ )+\(([A-Z]{3,5})\)' +# ... with the exception of these: +exceptions: + - API + - ASP + - CLI + - CPU + - CSS + - CSV + - DEBUG + - DOM + - DPI + - FAQ + - GCC + - GDB + - GET + - GPU + - GTK + - GUI + - HTML + - HTTP + - HTTPS + - IDE + - JAR + - JSON + - JSX + - LESS + - LLDB + - NET + - NOTE + - NVDA + - OSS + - PATH + - PDF + - PHP + - POST + - RAM + - REPL + - RSA + - SCM + - SCSS + - SDK + - SQL + - SSH + - SSL + - SVG + - TBD + - TCP + - TODO + - URI + - URL + - USB + - UTF + - XML + - XSS + - YAML + - ZIP diff --git a/.vale/styles/Google/Colons.yml b/.vale/styles/Google/Colons.yml new file mode 100644 index 0000000000..4a027c307d --- /dev/null +++ b/.vale/styles/Google/Colons.yml @@ -0,0 +1,8 @@ +extends: existence +message: "'%s' should be in lowercase." +link: 'https://developers.google.com/style/colons' +nonword: true +level: warning +scope: sentence +tokens: + - '(?=1.0.0" +} diff --git a/.vale/styles/Google/vocab.txt b/.vale/styles/Google/vocab.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/README.md b/README.md index c840837606..22b9fa7f9d 100644 --- a/README.md +++ b/README.md @@ -227,3 +227,48 @@ Configure in `config.json`, generated with the Docker image: "JUPYTERHUB_URL": "https://your-custom-jupyter.validmind.ai" } ``` + +## Configuring Lighthouse checks + +Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites to enable a better, accessible documentation for everyone. + +By default, Lighthouse checks only the top-level pages in our site navigation, such as `/index.html`, `/guide/guides.html`, `/developer/validmind-library.html`, and so forth. You can configure this behavior in the workflow: + +```sh +env: + # To change the default depth level: + # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.) + # 1 — All first-level subdirectories (e.g. /guide/*.html) + # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html) + # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete + DEFAULT_DEPTH: '0' +``` + +**Tips:** + +- On the first run, the workflow waits for a preview site to become available. For subsequent runs, it checks the currently available site, which may be behind HEAD. The PR comment shows which commit SHA was checked — rerun the check if needed. +- Use folder depths greater than zero only on working branches when you need a thorough site audit. Deeper checks take 2-12 hours to complete and significantly slow down the CI/CD pipeline. Do not merge depth changes to `main`. + +## Vale linter + +The Vale linter is used to enforce consistent writing style and catch common language issues in our documentation source. Vale runs automatically on pull requests but can also be run locally when addressing source issues. + +### Running Vale locally + +```sh +brew install vale +vale site/ +``` + +**Tip:** Locally, you can use Vale to check specific content areas you are working on, such as `site/guides/`. + +### Configuring Vale + +- The linter is configured via a `vale.ini` file in the root of the repository. This file specifies which styles to use and which files or directories to skip. +- Community styles such as `Vale` and `Google` are installed automatically in the CI workflow. +- The workflow is set up to ignore files and folders starting with an underscore (`_`) and the `site/plugin` directory. + +### FUTURE: Customizing rules + +- To add or remove styles, edit the `BasedOnStyles` lines in your `vale.ini`. +- To skip additional files or folders, update the `Skips` setting in `vale.ini` or adjust the workflow globs. diff --git a/site/Makefile b/site/Makefile index e4a0c54b93..49087d371e 100644 --- a/site/Makefile +++ b/site/Makefile @@ -6,14 +6,14 @@ SRC_DIR := $(SRC_ROOT)/validmind-library DEST_DIR_NB := notebooks DEST_DIR_PYTHON := validmind DEST_DIR_TESTS := tests -GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD) +GIT_BRANCH := $(or $(GITHUB_HEAD_REF),$(GITHUB_REF_NAME),$(shell git rev-parse --abbrev-ref HEAD)) INSTALLATION_BRANCH := main LIBRARY_BRANCH ?= $(or $(BRANCH),main) SRC_ROOT := _source SRC_DIR := $(SRC_ROOT)/validmind-library # Define .PHONY target for help section -.PHONY: help clean clone copy-installation delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute get-source notebooks python-docs release-notes test-descriptions yearly-releases +.PHONY: help clean clone copy-installation delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute generate-sitemap get-source notebooks python-docs release-notes test-descriptions yearly-releases # Help section help: @@ -32,6 +32,7 @@ help: @echo " docker-site Get source, render site with Docker profile, execute notebooks" @echo " docker-site-lite Get source and render site with Docker profile (skips notebook execution)" @echo " docs-site Get all source files and render the production docs site with Quarto" + @echo " generate-sitemap Generate a sitemap for the static HTML site" @echo " execute Execute a Jupyter Notebook or notebook directory" @echo " get-source Get all source files (clean, clone, copy-installation, notebooks, python-docs, test-descriptions)" @echo " notebooks Copy Jupyter notebooks into notebooks/" @@ -164,6 +165,74 @@ execute: quarto render --profile "$$PROFILE" "$$FILE_PATH"; \ rm $$env_file ' +# Generate sitemap for the site +generate-sitemap: + @echo "Generating sitemaps ..." + @find _site -name "*.html" -not -path "*/internal/*" -not -path "*/site_libs/*" -not -path "*/sitemap.html/*" -not -path "*/training.html/*" | while read -r file; do \ + url_path=$$(echo "$$file" | sed 's|^_site/||'); \ + if [ "$$(uname)" = "Darwin" ]; then \ + lastmod=$$(stat -f "%Sm" -t "%Y-%m-%dT%H:%M:%SZ" "$$file"); \ + else \ + lastmod=$$(stat -c "%y" "$$file" | sed 's/ /T/;s/$$/Z/'); \ + fi; \ + printf " \n https://docs-demo.vm.validmind.ai/pr_previews/$(GIT_BRANCH)/%s\n %s\n weekly\n 0.8\n \n" "$$url_path" "$$lastmod" >> sitemap.xml.tmp; \ + printf "%s\n" "$$url_path" >> sitemap.urls.tmp; \ + done + @printf '\n\n' > _site/sitemap.xml + @cat sitemap.xml.tmp >> _site/sitemap.xml + @printf '\n' >> _site/sitemap.xml + @rm sitemap.xml.tmp + @printf '\n\n\n \n ValidMind Documentation & Training Sitemap\n \n\n\n
\n

Root Pages

\n