diff --git a/.gitignore b/.gitignore index 60dea55..cb9f4f1 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,11 @@ coverage/ # Pipeline data backend/data/ +# Script-generated files +scripts/ashby_slugs_curated.txt +scripts/ashby_slugs_curated.csv +scripts/ashby_slugs_curated_verified.csv + # Temp *.tmp *.temp diff --git a/backend/package-lock.json b/backend/package-lock.json index c3f0ecf..bb6c9e5 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@anthropic-ai/sdk": "^0.39.0", "@fastify/cors": "^10.0.0", + "@fastify/static": "^9.0.0", "airtable": "^0.12.2", "cheerio": "^1.0.0", "csv-parse": "^5.6.0", @@ -494,6 +495,22 @@ "node": ">=18" } }, + "node_modules/@fastify/accept-negotiator": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@fastify/accept-negotiator/-/accept-negotiator-2.0.1.tgz", + "integrity": "sha512-/c/TW2bO/v9JeEgoD/g1G5GxGeCF1Hafdf79WPmUlgYiBXummY0oX3VVq4yFkKKVBKDNlaDUYoab7g38RpPqCQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, "node_modules/@fastify/ajv-compiler": { "version": "4.0.5", "resolved": "https://registry.npmjs.org/@fastify/ajv-compiler/-/ajv-compiler-4.0.5.tgz", @@ -625,6 +642,83 @@ "ipaddr.js": "^2.1.0" } }, + "node_modules/@fastify/send": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/@fastify/send/-/send-4.1.0.tgz", + "integrity": "sha512-TMYeQLCBSy2TOFmV95hQWkiTYgC/SEx7vMdV+wnZVX4tt8VBLKzmH8vV9OzJehV0+XBfg+WxPMt5wp+JBUKsVw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@lukeed/ms": "^2.0.2", + "escape-html": "~1.0.3", + "fast-decode-uri-component": "^1.0.1", + "http-errors": "^2.0.0", + "mime": "^3" + } + }, + "node_modules/@fastify/static": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@fastify/static/-/static-9.0.0.tgz", + "integrity": "sha512-r64H8Woe/vfilg5RTy7lwWlE8ZZcTrc3kebYFMEUBrMqlydhQyoiExQXdYAy2REVpST/G35+stAM8WYp1WGmMA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/accept-negotiator": "^2.0.0", + "@fastify/send": "^4.0.0", + "content-disposition": "^1.0.1", + "fastify-plugin": "^5.0.0", + "fastq": "^1.17.1", + "glob": "^13.0.0" + } + }, + "node_modules/@isaacs/balanced-match": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz", + "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==", + "license": "MIT", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@isaacs/brace-expansion": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.1.tgz", + "integrity": "sha512-WMz71T1JS624nWj2n2fnYAuPovhv7EUhk69R6i9dsVyzxt5eM3bjwvgk9L+APE1TRscGysAVMANkB0jh0LQZrQ==", + "license": "MIT", + "dependencies": { + "@isaacs/balanced-match": "^4.0.1" + }, + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@lukeed/ms": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@lukeed/ms/-/ms-2.0.2.tgz", + "integrity": "sha512-9I2Zn6+NJLfaGoz9jN3lpwDgAYvfGeNYdbAIjJOqzs4Tpc+VU3Jqq4IofSUBKajiDS8k9fZIg18/z13mpk1bsA==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/@pinojs/redact": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz", @@ -839,6 +933,19 @@ "node": ">= 0.8" } }, + "node_modules/content-disposition": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz", + "integrity": "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/cookie": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.1.1.tgz", @@ -895,6 +1002,15 @@ "node": ">=0.4.0" } }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/dequal": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", @@ -1097,6 +1213,12 @@ "@esbuild/win32-x64": "0.27.2" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, "node_modules/event-target-shim": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", @@ -1348,6 +1470,23 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/glob": { + "version": "13.0.2", + "resolved": "https://registry.npmjs.org/glob/-/glob-13.0.2.tgz", + "integrity": "sha512-035InabNu/c1lW0tzPhAgapKctblppqsKKG9ZaNzbr+gXwWMjXoiyGSyB9sArzrjG7jY+zntRq5ZSUYemrnWVQ==", + "license": "BlueOak-1.0.0", + "dependencies": { + "minimatch": "^10.1.2", + "minipass": "^7.1.2", + "path-scurry": "^2.0.0" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/gopd": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", @@ -1430,6 +1569,26 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/humanize-ms": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", @@ -1451,6 +1610,12 @@ "node": ">=0.10.0" } }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, "node_modules/ipaddr.js": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.3.0.tgz", @@ -1528,6 +1693,15 @@ "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", "license": "MIT" }, + "node_modules/lru-cache": { + "version": "11.2.6", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz", + "integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", @@ -1537,6 +1711,18 @@ "node": ">= 0.4" } }, + "node_modules/mime": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", + "integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -1558,6 +1744,30 @@ "node": ">= 0.6" } }, + "node_modules/minimatch": { + "version": "10.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.2.tgz", + "integrity": "sha512-fu656aJ0n2kcXwsnwnv9g24tkU5uSmOlTjd6WyyaKm2Z+h1qmY6bAjrcaIxF/BslFqbZ8UBtbJi7KgQOZD2PTw==", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/brace-expansion": "^5.0.1" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "license": "ISC", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, "node_modules/mnemonist": { "version": "0.40.0", "resolved": "https://registry.npmjs.org/mnemonist/-/mnemonist-0.40.0.tgz", @@ -1689,6 +1899,22 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/path-scurry": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.1.tgz", + "integrity": "sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA==", + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^11.0.0", + "minipass": "^7.1.2" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/pino": { "version": "10.3.0", "resolved": "https://registry.npmjs.org/pino/-/pino-10.3.0.tgz", @@ -1869,6 +2095,12 @@ "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==", "license": "MIT" }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, "node_modules/sonic-boom": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.0.tgz", @@ -1887,6 +2119,15 @@ "node": ">= 10.x" } }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/thread-stream": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-4.0.0.tgz", @@ -1908,6 +2149,15 @@ "node": ">=12" } }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, "node_modules/tr46": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", diff --git a/backend/src/constants/ashby-slugs.json b/backend/src/constants/ashby-slugs.json new file mode 100644 index 0000000..f1363cf --- /dev/null +++ b/backend/src/constants/ashby-slugs.json @@ -0,0 +1,253 @@ +[ + "airtable", + "alan", + "altura", + "away", + "deliveroo", + "duolingo", + "flock-safety", + "hackerone", + "notion", + "opendoor", + "oyster", + "posthog", + "ramp", + "sequoia", + "sony", + "vanta", + "cursor", + "deel", + "harvey", + "modern-treasury", + "openai", + "reddit", + "shopify", + "snowflake", + "apify", + "ashby", + "buffer", + "factory", + "hcompany", + "jerry.ai", + "lightning", + "linear", + "lottie", + "lovable", + "notable", + "scribd", + "searchable", + "silver", + "tapcheck", + "blueberrypediatrics", + "cambly", + "checkly", + "cleric", + "continua", + "dryft", + "duck-duck-go", + "equals", + "firetiger", + "homevision", + "imprint", + "kombo", + "legionhealth", + "livekit", + "matterworks", + "meticulous", + "modal", + "norm-ai", + "office-hours", + "ontic", + "orb", + "parabola-io", + "pear", + "pear-vc", + "permitflow", + "sentilink", + "sfcompute", + "steel", + "tiplink", + "titan", + "turnstile", + "verge-genomics", + "virtahealth", + "vitalize", + "wirescreen", + "15five", + "6sense", + "aave", + "ada", + "adept", + "affinity", + "affirm", + "agora", + "ai21-labs", + "airbyte", + "alchemy", + "alloy", + "alma", + "amplitude", + "anaplan", + "anduril", + "angellist", + "anthropic", + "anyscale", + "apollo", + "applied-intuition", + "asana", + "assembly-ai", + "attio", + "aurora", + "baseten", + "benchling", + "braze", + "brex", + "canva", + "carta", + "chainalysis", + "character-ai", + "chime", + "circle", + "clerk", + "clickhouse", + "clockwise", + "coda", + "codeium", + "cohere", + "coinbase", + "contentful", + "courier", + "databricks", + "datadog", + "dbt-labs", + "deepgram", + "discord", + "docker", + "doordash", + "drata", + "figma", + "fireblocks", + "fivetran", + "fly", + "framer", + "front", + "gong", + "grafana-labs", + "grammarly", + "gusto", + "hashicorp", + "hightouch", + "hubspot", + "huggingface", + "instacart", + "intercom", + "ironclad", + "iterable", + "jasper", + "kraken", + "labelbox", + "launchdarkly", + "lemonade", + "loom", + "lyra-health", + "marqeta", + "materialize", + "mercury", + "metabase", + "miro", + "mistral", + "modal-labs", + "monday", + "navan", + "neon", + "newrelic", + "novu", + "nuro", + "opensea", + "oura", + "outreach", + "paddle", + "pandadoc", + "paxos", + "pendo", + "perplexity", + "personio", + "pinecone", + "plaid", + "planetscale", + "postman", + "prefect", + "productboard", + "pulley", + "pulumi", + "qdrant", + "railway", + "raycast", + "readme", + "remote", + "render", + "replicate", + "replit", + "resend", + "retool", + "rippling", + "root-insurance", + "runway", + "samsara", + "sanity", + "sardine", + "scale", + "secureframe", + "sentry", + "shield-ai", + "singlestore", + "skydio", + "slack", + "smartcar", + "snorkel", + "snyk", + "sourcegraph", + "spotify", + "spring-health", + "stability-ai", + "stainless", + "statsig", + "storyblok", + "stripe", + "stytch", + "substack", + "supabase", + "synthesia", + "tailscale", + "temporal", + "tenstorrent", + "thirdweb", + "timescale", + "toast", + "together", + "together-ai", + "trm-labs", + "twilio", + "uniswap", + "upstart", + "vapi", + "vercel", + "verkada", + "voiceflow", + "wandb", + "warp", + "watershed", + "weaviate", + "webflow", + "weights-biases", + "whimsical", + "whoop", + "writer", + "xata", + "yugabyte", + "zed", + "zip", + "zipline", + "zora", + "zuora" +] diff --git a/backend/src/services/theirstack-fetcher.ts b/backend/src/services/theirstack-fetcher.ts index 0215e88..dcebf16 100644 --- a/backend/src/services/theirstack-fetcher.ts +++ b/backend/src/services/theirstack-fetcher.ts @@ -1,4 +1,7 @@ import crypto from 'node:crypto'; +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; import { config } from '../config.js'; import { log } from '../logger.js'; import { writeStep } from '../store.js'; @@ -9,41 +12,14 @@ const ASHBY_PUBLIC_API_BASE = 'https://api.ashbyhq.com/posting-api/job-board'; /** * Curated list of companies that use Ashby as their ATS. * slug = jobs.ashbyhq.com/ + * + * This list is sourced from backend/src/constants/ashby-slugs.json + * to maintain a single canonical source shared with the Python verification script. */ -const ASHBY_COMPANY_SLUGS: string[] = [ - 'airtable', 'alan', 'altura', 'away', 'deliveroo', 'duolingo', 'flock-safety', 'hackerone', - 'notion', 'opendoor', 'oyster', 'posthog', 'ramp', 'sequoia', 'sony', 'vanta', 'cursor', - 'deel', 'harvey', 'modern-treasury', 'openai', 'reddit', 'shopify', 'snowflake', 'apify', - 'ashby', 'buffer', 'factory', 'hcompany', 'jerry.ai', 'lightning', 'linear', 'lottie', - 'lovable', 'notable', 'scribd', 'searchable', 'silver', 'tapcheck', 'blueberrypediatrics', - 'cambly', 'checkly', 'cleric', 'continua', 'dryft', 'duck-duck-go', 'equals', 'firetiger', - 'homevision', 'imprint', 'kombo', 'legionhealth', 'livekit', 'matterworks', 'meticulous', - 'modal', 'norm-ai', 'office-hours', 'ontic', 'orb', 'parabola-io', 'pear', 'pear-vc', - 'permitflow', 'sentilink', 'sfcompute', 'steel', 'tiplink', 'titan', 'turnstile', - 'verge-genomics', 'virtahealth', 'vitalize', 'wirescreen', '15five', '6sense', 'aave', - 'ada', 'adept', 'affinity', 'affirm', 'agora', 'ai21-labs', 'airbyte', 'alchemy', 'alloy', - 'alma', 'amplitude', 'anaplan', 'anduril', 'angellist', 'anthropic', 'anyscale', 'apollo', - 'applied-intuition', 'asana', 'assembly-ai', 'attio', 'aurora', 'baseten', 'benchling', - 'braze', 'brex', 'canva', 'carta', 'chainalysis', 'character-ai', 'chime', 'circle', - 'clerk', 'clickhouse', 'clockwise', 'coda', 'codeium', 'cohere', 'coinbase', 'contentful', - 'courier', 'databricks', 'datadog', 'dbt-labs', 'deepgram', 'discord', 'docker', 'doordash', - 'drata', 'figma', 'fireblocks', 'fivetran', 'fly', 'framer', 'front', 'gong', 'grafana-labs', - 'grammarly', 'gusto', 'hashicorp', 'hightouch', 'hubspot', 'huggingface', 'instacart', - 'intercom', 'ironclad', 'iterable', 'jasper', 'kraken', 'labelbox', 'launchdarkly', - 'lemonade', 'loom', 'lyra-health', 'marqeta', 'materialize', 'mercury', 'metabase', 'miro', - 'mistral', 'modal-labs', 'monday', 'navan', 'neon', 'newrelic', 'novu', 'nuro', 'opensea', - 'oura', 'outreach', 'paddle', 'pandadoc', 'paxos', 'pendo', 'perplexity', 'personio', - 'pinecone', 'plaid', 'planetscale', 'postman', 'prefect', 'productboard', 'pulley', 'pulumi', - 'qdrant', 'railway', 'raycast', 'readme', 'remote', 'render', 'replicate', 'replit', - 'resend', 'retool', 'rippling', 'root-insurance', 'runway', 'samsara', 'sanity', 'sardine', - 'scale', 'secureframe', 'sentry', 'shield-ai', 'singlestore', 'skydio', 'slack', 'smartcar', - 'snorkel', 'snyk', 'sourcegraph', 'spotify', 'spring-health', 'stability-ai', 'stainless', - 'statsig', 'storyblok', 'stripe', 'stytch', 'substack', 'supabase', 'synthesia', 'tailscale', - 'temporal', 'tenstorrent', 'thirdweb', 'timescale', 'toast', 'together', 'together-ai', - 'trm-labs', 'twilio', 'uniswap', 'upstart', 'vapi', 'vercel', 'verkada', 'voiceflow', - 'wandb', 'warp', 'watershed', 'weaviate', 'webflow', 'weights-biases', 'whimsical', 'whoop', - 'writer', 'xata', 'yugabyte', 'zed', 'zip', 'zipline', 'zora', 'zuora' -]; +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const ashbySlugsPath = join(__dirname, '../constants/ashby-slugs.json'); +const ASHBY_COMPANY_SLUGS: readonly string[] = JSON.parse(readFileSync(ashbySlugsPath, 'utf-8')); interface AshbyLocation { location?: string; diff --git a/scripts/ashby_slugs_verified.py b/scripts/ashby_slugs_verified.py index d1c98c6..74f4268 100755 --- a/scripts/ashby_slugs_verified.py +++ b/scripts/ashby_slugs_verified.py @@ -14,17 +14,15 @@ import urllib.error from pathlib import Path -# Compact curated seed list; can be extended over time. -SLUGS = [ - 'airtable','alan','altura','away','deliveroo','duolingo','flock-safety','hackerone','notion','opendoor', - 'oyster','posthog','ramp','sequoia','sony','vanta','cursor','deel','harvey','modern-treasury','openai', - 'reddit','shopify','snowflake','apify','ashby','buffer','factory','hcompany','jerry.ai','lightning','linear', - 'lottie','lovable','notable','scribd','searchable','silver','tapcheck','blueberrypediatrics','cambly','checkly', - 'cleric','continua','dryft','duck-duck-go','equals','firetiger','homevision','imprint','kombo','legionhealth', - 'livekit','matterworks','meticulous','modal','norm-ai','office-hours','ontic','orb','parabola-io','pear','pear-vc', - 'permitflow','sentilink','sfcompute','steel','tiplink','titan','turnstile','verge-genomics','virtahealth','vitalize', - 'wirescreen','anthropic','benchling','clerk','cohere','dbt-labs','perplexity','replit','runway','watershed' -] + +def load_canonical_slugs() -> list[str]: + """Load the canonical slug list from the shared JSON file. + + This ensures the Python script and TypeScript backend stay in sync. + """ + canonical_path = Path(__file__).resolve().parent.parent / 'backend' / 'src' / 'constants' / 'ashby-slugs.json' + with canonical_path.open('r') as f: + return json.load(f) def dedupe(slugs: list[str]) -> list[str]: @@ -56,7 +54,7 @@ def main() -> int: parser.add_argument('--verify', action='store_true') args = parser.parse_args() - slugs = dedupe(SLUGS) + slugs = dedupe(load_canonical_slugs()) out_dir = Path(__file__).resolve().parent txt_path = out_dir / 'ashby_slugs_curated.txt' csv_path = out_dir / 'ashby_slugs_curated.csv'