From 87d87ee0caac0f9b5f117a26cd1b5fd124cc8126 Mon Sep 17 00:00:00 2001 From: Siwei Ma Date: Mon, 16 Mar 2026 20:59:44 -0700 Subject: [PATCH 1/3] feat: add linkedin job search adapter --- README.md | 1 + README.zh-CN.md | 1 + src/clis/linkedin/search.ts | 410 ++++++++++++++++++++++++++++++++++++ 3 files changed, 412 insertions(+) create mode 100644 src/clis/linkedin/search.ts diff --git a/README.md b/README.md index d48e647..4e54d1c 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,7 @@ npm install -g @jackwener/opencli@latest | **boss** | `search` `detail` | 🔐 Browser | | **coupang** | `search` `add-to-cart` | 🔐 Browser | | **youtube** | `search` | 🔐 Browser | +| **linkedin** | `search` | 🔐 Browser | | **yahoo-finance** | `quote` | 🔐 Browser | | **reuters** | `search` | 🔐 Browser | | **smzdm** | `search` | 🔐 Browser | diff --git a/README.zh-CN.md b/README.zh-CN.md index 524242d..6cebe67 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -141,6 +141,7 @@ npm install -g @jackwener/opencli@latest | **boss** | `search` `detail` | 🔐 浏览器 | | **coupang** | `search` `add-to-cart` | 🔐 浏览器 | | **youtube** | `search` | 🔐 浏览器 | +| **linkedin** | `search` | 🔐 浏览器 | | **yahoo-finance** | `quote` | 🔐 浏览器 | | **reuters** | `search` | 🔐 浏览器 | | **smzdm** | `search` | 🔐 浏览器 | diff --git a/src/clis/linkedin/search.ts b/src/clis/linkedin/search.ts new file mode 100644 index 0000000..e82a821 --- /dev/null +++ b/src/clis/linkedin/search.ts @@ -0,0 +1,410 @@ +import { cli, Strategy } from '../../registry.js'; + +const EXPERIENCE_LEVELS: Record = { + internship: '1', + entry: '2', + 'entry-level': '2', + associate: '3', + mid: '4', + senior: '4', + 'mid-senior': '4', + 'mid-senior-level': '4', + director: '5', + executive: '6', +}; + +const JOB_TYPES: Record = { + 'full-time': 'F', + fulltime: 'F', + full: 'F', + 'part-time': 'P', + parttime: 'P', + part: 'P', + contract: 'C', + temporary: 'T', + temp: 'T', + volunteer: 'V', + internship: 'I', + other: 'O', +}; + +const DATE_POSTED: Record = { + any: 'on', + month: 'r2592000', + 'past-month': 'r2592000', + week: 'r604800', + 'past-week': 'r604800', + day: 'r86400', + '24h': 'r86400', + 'past-24h': 'r86400', +}; + +const REMOTE_TYPES: Record = { + onsite: '1', + 'on-site': '1', + hybrid: '3', + remote: '2', +}; + +function parseCsvArg(value: unknown): string[] { + return String(value ?? '') + .split(',') + .map(item => item.trim()) + .filter(Boolean); +} + +function mapFilterValues(input: unknown, mapping: Record, label: string): string[] { + const values = parseCsvArg(input); + const resolved = values.map(value => { + const key = value.toLowerCase(); + const mapped = mapping[key]; + if (!mapped) throw new Error(`Unsupported ${label}: ${value}`); + return mapped; + }); + return [...new Set(resolved)]; +} + +async function resolveCompanyIds(page: any, input: unknown): Promise { + const rawValues = parseCsvArg(input); + const ids = new Set(); + const names: string[] = []; + + for (const value of rawValues) { + if (/^\d+$/.test(value)) ids.add(value); + else names.push(value); + } + + if (!names.length) return [...ids]; + + const resolved = await page.evaluate(`(async () => { + const targets = ${JSON.stringify(names)}; + const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); + const normalize = (value) => (value || '').toLowerCase().replace(/\\s+/g, ' ').trim(); + + const openAllFilters = async () => { + const button = [...document.querySelectorAll('button')] + .find(b => ((b.innerText || '').trim().replace(/\\s+/g, ' ')) === 'All filters'); + if (button) { + button.click(); + await sleep(300); + } + }; + + const companyMap = () => { + const result = {}; + for (const input of document.querySelectorAll('input[name="company-filter-value"]')) { + const value = input.value; + const text = (input.parentElement?.innerText || input.closest('label')?.innerText || '').replace(/\\s+/g, ' ').trim(); + const label = text.replace(/\\s*Filter by.*$/i, '').trim(); + if (label) result[normalize(label)] = value; + } + return result; + }; + + const matchCompany = (map, name) => { + const normalized = normalize(name); + if (map[normalized]) return map[normalized]; + const key = Object.keys(map).find(entry => entry === normalized || entry.includes(normalized) || normalized.includes(entry)); + return key ? map[key] : null; + }; + + await openAllFilters(); + const results = {}; + let map = companyMap(); + + for (const name of targets) { + let found = matchCompany(map, name); + if (!found) { + const input = [...document.querySelectorAll('input')].find(node => node.getAttribute('aria-label') === 'Add a company'); + if (input) { + input.focus(); + input.value = name; + input.dispatchEvent(new Event('input', { bubbles: true })); + input.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', bubbles: true })); + await sleep(1200); + map = companyMap(); + found = matchCompany(map, name); + input.value = ''; + input.dispatchEvent(new Event('input', { bubbles: true })); + await sleep(100); + } + } + results[name] = found || null; + } + + return results; + })()`); + + const unresolved: string[] = []; + for (const name of names) { + const id = resolved?.[name]; + if (id) ids.add(id); + else unresolved.push(name); + } + + if (unresolved.length) { + throw new Error(`Could not resolve LinkedIn company filter: ${unresolved.join(', ')}`); + } + + return [...ids]; +} + +function normalizeWhitespace(value: unknown): string { + return String(value ?? '').replace(/\s+/g, ' ').trim(); +} + +function decodeLinkedinRedirect(url: string): string { + if (!url) return ''; + try { + const parsed = new URL(url); + if (parsed.pathname === '/redir/redirect/') { + return parsed.searchParams.get('url') || url; + } + } catch {} + return url; +} + +async function enrichJobDetails(page: any, jobs: Array>): Promise>> { + const enriched: Array> = []; + + for (const job of jobs) { + if (!job.url) { + enriched.push({ ...job, description: '', apply_url: '' }); + continue; + } + + try { + await page.goto(job.url); + await page.wait({ text: 'About the job', timeout: 8 }); + await page.evaluate(`(() => { + const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim().toLowerCase(); + const aboutSection = [...document.querySelectorAll('div, section, article')] + .find((element) => normalize(element.querySelector('h1, h2, h3, h4')?.textContent || '') === 'about the job'); + const expandButton = [...(aboutSection?.querySelectorAll('button, a[role="button"]') || [])] + .find((element) => /more/.test(normalize(element.textContent || '')) || /more/.test(normalize(element.getAttribute('aria-label') || ''))); + if (expandButton) expandButton.click(); + })()`); + await page.wait(1); + + const detail = await page.evaluate(`(() => { + const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim(); + const candidates = [...document.querySelectorAll('div, section, article')] + .map((element) => { + const heading = normalize(element.querySelector('h1, h2, h3, h4')?.textContent || ''); + const text = normalize(element.innerText || ''); + return { heading, text }; + }) + .filter((item) => item.text && item.heading.toLowerCase() === 'about the job' && item.text.length > 'About the job'.length) + .sort((a, b) => a.text.length - b.text.length); + + const description = candidates[0]?.text.replace(/^About the job\\s*/i, '') || ''; + const applyLink = [...document.querySelectorAll('a[href]')] + .map((anchor) => ({ + href: anchor.href || '', + text: normalize(anchor.textContent || ''), + aria: normalize(anchor.getAttribute('aria-label') || ''), + })) + .find((anchor) => /apply/i.test(anchor.text) || /apply/i.test(anchor.aria)); + + return { + description, + applyUrl: applyLink?.href || '', + }; + })()`); + + enriched.push({ + ...job, + description: normalizeWhitespace(detail?.description), + apply_url: decodeLinkedinRedirect(String(detail?.applyUrl ?? '')), + }); + } catch { + enriched.push({ ...job, description: '', apply_url: '' }); + } + } + + return enriched; +} + +cli({ + site: 'linkedin', + name: 'search', + description: 'Search LinkedIn jobs', + domain: 'www.linkedin.com', + strategy: Strategy.HEADER, + browser: true, + args: [ + { name: 'query', type: 'string', required: true, help: 'Job search keywords' }, + { name: 'location', type: 'string', required: false, help: 'Location text such as San Francisco Bay Area' }, + { name: 'limit', type: 'int', default: 10, help: 'Number of jobs to return (max 100)' }, + { name: 'start', type: 'int', default: 0, help: 'Result offset for pagination' }, + { name: 'details', type: 'bool', default: false, help: 'Include full job description and apply URL (slower)' }, + { name: 'company', type: 'string', required: false, help: 'Comma-separated company names or LinkedIn company IDs' }, + { name: 'experience_level', type: 'string', required: false, help: 'Comma-separated: internship, entry, associate, mid-senior, director, executive' }, + { name: 'job_type', type: 'string', required: false, help: 'Comma-separated: full-time, part-time, contract, temporary, volunteer, internship, other' }, + { name: 'date_posted', type: 'string', required: false, help: 'One of: any, month, week, 24h' }, + { name: 'remote', type: 'string', required: false, help: 'Comma-separated: on-site, hybrid, remote' }, + ], + columns: ['rank', 'title', 'company', 'location', 'listed', 'salary', 'url'], + func: async (page, kwargs) => { + const limit = Math.max(1, Math.min(kwargs.limit ?? 10, 100)); + const start = Math.max(0, kwargs.start ?? 0); + const includeDetails = Boolean(kwargs.details); + const location = (kwargs.location ?? '').trim(); + const keywords = String(kwargs.query ?? '').trim(); + const experienceLevels = mapFilterValues(kwargs.experience_level, EXPERIENCE_LEVELS, 'experience_level'); + const jobTypes = mapFilterValues(kwargs.job_type, JOB_TYPES, 'job_type'); + const remoteTypes = mapFilterValues(kwargs.remote, REMOTE_TYPES, 'remote'); + const datePostedValues = kwargs.date_posted + ? mapFilterValues(kwargs.date_posted, DATE_POSTED, 'date_posted') + : []; + + if (!keywords) throw new Error('query is required'); + + const searchParams = new URLSearchParams({ keywords }); + if (location) searchParams.set('location', location); + + await page.goto(`https://www.linkedin.com/jobs/search/?${searchParams.toString()}`); + await page.wait(5); + const companyIds = await resolveCompanyIds(page, kwargs.company); + + const data = await page.evaluate(`(async () => { + const input = ${JSON.stringify({ + keywords, + location, + limit, + start, + companyIds, + experienceLevels, + jobTypes, + datePostedValues, + remoteTypes, + })}; + const maxBatchSize = 25; + const jsession = document.cookie + .split(';') + .map(part => part.trim()) + .find(part => part.startsWith('JSESSIONID=')) + ?.slice('JSESSIONID='.length); + + if (!jsession) { + return { error: 'LinkedIn JSESSIONID cookie not found. Please sign in to LinkedIn in the browser.' }; + } + + const csrf = jsession.replace(/^"|"$/g, ''); + const headers = { + 'csrf-token': csrf, + 'x-restli-protocol-version': '2.0.0', + }; + + const buildSearchQuery = () => { + const parts = [ + 'origin:' + (( + input.companyIds.length || + input.experienceLevels.length || + input.jobTypes.length || + input.datePostedValues.length || + input.remoteTypes.length + ) ? 'JOB_SEARCH_PAGE_JOB_FILTER' : 'JOB_SEARCH_PAGE_OTHER_ENTRY'), + 'keywords:' + input.keywords, + ]; + if (input.location) { + parts.push('locationUnion:(seoLocation:(location:' + input.location + '))'); + } + const filters = []; + if (input.companyIds.length) filters.push('company:List(' + input.companyIds.join(',') + ')'); + if (input.experienceLevels.length) filters.push('experience:List(' + input.experienceLevels.join(',') + ')'); + if (input.jobTypes.length) filters.push('jobType:List(' + input.jobTypes.join(',') + ')'); + if (input.datePostedValues.length) filters.push('timePostedRange:List(' + input.datePostedValues.join(',') + ')'); + if (input.remoteTypes.length) filters.push('workplaceType:List(' + input.remoteTypes.join(',') + ')'); + if (filters.length) parts.push('selectedFilters:(' + filters.join(',') + ')'); + parts.push('spellCorrectionEnabled:true'); + return '(' + parts.join(',') + ')'; + }; + + const buildUrl = (offset, count) => { + const params = new URLSearchParams({ + decorationId: 'com.linkedin.voyager.dash.deco.jobs.search.JobSearchCardsCollection-220', + count: String(count), + q: 'jobSearch', + }); + const query = encodeURIComponent(buildSearchQuery()) + .replace(/%3A/gi, ':') + .replace(/%2C/gi, ',') + .replace(/%28/gi, '(') + .replace(/%29/gi, ')'); + return '/voyager/api/voyagerJobsDashJobCards?' + + params.toString() + + '&query=' + query + + '&start=' + offset; + }; + + const extractListed = (card) => { + const listed = (card.footerItems || []).find(item => item?.type === 'LISTED_DATE' && item?.timeAt); + return listed?.timeAt ? new Date(listed.timeAt).toISOString().slice(0, 10) : ''; + }; + + const extractJobId = (card) => { + const sources = [ + card.jobPostingUrn, + card.jobPosting?.entityUrn, + card.entityUrn, + ].filter(Boolean); + for (const source of sources) { + const match = String(source).match(/(\\d+)/); + if (match) return match[1]; + } + return ''; + }; + + const collected = []; + let offset = input.start; + + while (collected.length < input.limit) { + const count = Math.min(maxBatchSize, input.limit - collected.length); + const res = await fetch(buildUrl(offset, count), { + credentials: 'include', + headers, + }); + + if (!res.ok) { + const text = await res.text(); + return { error: 'LinkedIn API error: HTTP ' + res.status + ' ' + text.slice(0, 200) }; + } + + const payload = await res.json(); + const elements = Array.isArray(payload?.elements) ? payload.elements : []; + if (elements.length === 0) break; + + for (const element of elements) { + const card = element?.jobCardUnion?.jobPostingCard; + if (!card) continue; + const jobId = extractJobId(card); + collected.push({ + title: card.jobPostingTitle || card.title?.text || '', + company: card.primaryDescription?.text || '', + location: card.secondaryDescription?.text || '', + listed: extractListed(card), + salary: card.tertiaryDescription?.text || '', + url: jobId ? 'https://www.linkedin.com/jobs/view/' + jobId : '', + }); + } + + if (elements.length < count) break; + offset += elements.length; + } + + return collected.slice(0, input.limit).map((item, index) => ({ + rank: input.start + index + 1, + ...item, + })); + })()`); + + if (!Array.isArray(data)) { + throw new Error(data?.error || 'LinkedIn search returned an unexpected response'); + } + + if (!includeDetails) return data; + + return enrichJobDetails(page, data); + }, +}); From d4c464ee56ab6b0098739f06ca464f47200c41ea Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 17 Mar 2026 12:44:48 +0800 Subject: [PATCH 2/3] fix(linkedin): fix parseCsvArg undefined bug, regex escapes in page.evaluate, replace hardcoded wait, add IPage type --- src/clis/linkedin/search.ts | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/clis/linkedin/search.ts b/src/clis/linkedin/search.ts index e82a821..9cbc082 100644 --- a/src/clis/linkedin/search.ts +++ b/src/clis/linkedin/search.ts @@ -1,4 +1,5 @@ import { cli, Strategy } from '../../registry.js'; +import type { IPage } from '../../types.js'; const EXPERIENCE_LEVELS: Record = { internship: '1', @@ -47,7 +48,8 @@ const REMOTE_TYPES: Record = { }; function parseCsvArg(value: unknown): string[] { - return String(value ?? '') + if (value === undefined || value === null || value === '') return []; + return String(value) .split(',') .map(item => item.trim()) .filter(Boolean); @@ -64,7 +66,7 @@ function mapFilterValues(input: unknown, mapping: Record, label: return [...new Set(resolved)]; } -async function resolveCompanyIds(page: any, input: unknown): Promise { +async function resolveCompanyIds(page: IPage, input: unknown): Promise { const rawValues = parseCsvArg(input); const ids = new Set(); const names: string[] = []; @@ -79,11 +81,11 @@ async function resolveCompanyIds(page: any, input: unknown): Promise { const resolved = await page.evaluate(`(async () => { const targets = ${JSON.stringify(names)}; const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); - const normalize = (value) => (value || '').toLowerCase().replace(/\\s+/g, ' ').trim(); + const normalize = (value) => (value || '').toLowerCase().replace(/\s+/g, ' ').trim(); const openAllFilters = async () => { const button = [...document.querySelectorAll('button')] - .find(b => ((b.innerText || '').trim().replace(/\\s+/g, ' ')) === 'All filters'); + .find(b => ((b.innerText || '').trim().replace(/\s+/g, ' ')) === 'All filters'); if (button) { button.click(); await sleep(300); @@ -94,8 +96,8 @@ async function resolveCompanyIds(page: any, input: unknown): Promise { const result = {}; for (const input of document.querySelectorAll('input[name="company-filter-value"]')) { const value = input.value; - const text = (input.parentElement?.innerText || input.closest('label')?.innerText || '').replace(/\\s+/g, ' ').trim(); - const label = text.replace(/\\s*Filter by.*$/i, '').trim(); + const text = (input.parentElement?.innerText || input.closest('label')?.innerText || '').replace(/\s+/g, ' ').trim(); + const label = text.replace(/\s*Filter by.*$/i, '').trim(); if (label) result[normalize(label)] = value; } return result; @@ -164,7 +166,7 @@ function decodeLinkedinRedirect(url: string): string { return url; } -async function enrichJobDetails(page: any, jobs: Array>): Promise>> { +async function enrichJobDetails(page: IPage, jobs: Array>): Promise>> { const enriched: Array> = []; for (const job of jobs) { @@ -177,7 +179,7 @@ async function enrichJobDetails(page: any, jobs: Array>): Pr await page.goto(job.url); await page.wait({ text: 'About the job', timeout: 8 }); await page.evaluate(`(() => { - const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim().toLowerCase(); + const normalize = (value) => (value || '').replace(/\s+/g, ' ').trim().toLowerCase(); const aboutSection = [...document.querySelectorAll('div, section, article')] .find((element) => normalize(element.querySelector('h1, h2, h3, h4')?.textContent || '') === 'about the job'); const expandButton = [...(aboutSection?.querySelectorAll('button, a[role="button"]') || [])] @@ -187,7 +189,7 @@ async function enrichJobDetails(page: any, jobs: Array>): Pr await page.wait(1); const detail = await page.evaluate(`(() => { - const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim(); + const normalize = (value) => (value || '').replace(/\s+/g, ' ').trim(); const candidates = [...document.querySelectorAll('div, section, article')] .map((element) => { const heading = normalize(element.querySelector('h1, h2, h3, h4')?.textContent || ''); @@ -197,7 +199,7 @@ async function enrichJobDetails(page: any, jobs: Array>): Pr .filter((item) => item.text && item.heading.toLowerCase() === 'about the job' && item.text.length > 'About the job'.length) .sort((a, b) => a.text.length - b.text.length); - const description = candidates[0]?.text.replace(/^About the job\\s*/i, '') || ''; + const description = candidates[0]?.text.replace(/^About the job\s*/i, '') || ''; const applyLink = [...document.querySelectorAll('a[href]')] .map((anchor) => ({ href: anchor.href || '', @@ -254,9 +256,7 @@ cli({ const experienceLevels = mapFilterValues(kwargs.experience_level, EXPERIENCE_LEVELS, 'experience_level'); const jobTypes = mapFilterValues(kwargs.job_type, JOB_TYPES, 'job_type'); const remoteTypes = mapFilterValues(kwargs.remote, REMOTE_TYPES, 'remote'); - const datePostedValues = kwargs.date_posted - ? mapFilterValues(kwargs.date_posted, DATE_POSTED, 'date_posted') - : []; + const datePostedValues = mapFilterValues(kwargs.date_posted, DATE_POSTED, 'date_posted'); if (!keywords) throw new Error('query is required'); @@ -264,7 +264,7 @@ cli({ if (location) searchParams.set('location', location); await page.goto(`https://www.linkedin.com/jobs/search/?${searchParams.toString()}`); - await page.wait(5); + await page.wait({ text: 'Jobs', timeout: 10 }); const companyIds = await resolveCompanyIds(page, kwargs.company); const data = await page.evaluate(`(async () => { @@ -350,7 +350,7 @@ cli({ card.entityUrn, ].filter(Boolean); for (const source of sources) { - const match = String(source).match(/(\\d+)/); + const match = String(source).match(/(\d+)/); if (match) return match[1]; } return ''; From 908f5ccd40ddc6c194408b9e0d6a3018ddb3c858 Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 17 Mar 2026 12:54:41 +0800 Subject: [PATCH 3/3] refactor(linkedin): extract evaluate logic, add progress logging, improve code structure - Extract Voyager query/URL building into typed standalone functions - Split fetchJobCards into its own function with per-batch evaluate - Add SearchInput interface for type safety - Add progress logging to enrichJobDetails (stderr) - Add section comments for code organization - Deduplicate normalize helpers in evaluate strings --- src/clis/linkedin/search.ts | 434 ++++++++++++++++++------------------ 1 file changed, 220 insertions(+), 214 deletions(-) diff --git a/src/clis/linkedin/search.ts b/src/clis/linkedin/search.ts index 9cbc082..a3b4f85 100644 --- a/src/clis/linkedin/search.ts +++ b/src/clis/linkedin/search.ts @@ -1,6 +1,8 @@ import { cli, Strategy } from '../../registry.js'; import type { IPage } from '../../types.js'; +// ── Filter value mappings ────────────────────────────────────────────── + const EXPERIENCE_LEVELS: Record = { internship: '1', entry: '2', @@ -47,6 +49,8 @@ const REMOTE_TYPES: Record = { remote: '2', }; +// ── Helpers ──────────────────────────────────────────────────────────── + function parseCsvArg(value: unknown): string[] { if (value === undefined || value === null || value === '') return []; return String(value) @@ -66,6 +70,77 @@ function mapFilterValues(input: unknown, mapping: Record, label: return [...new Set(resolved)]; } +function normalizeWhitespace(value: unknown): string { + return String(value ?? '').replace(/\s+/g, ' ').trim(); +} + +function decodeLinkedinRedirect(url: string): string { + if (!url) return ''; + try { + const parsed = new URL(url); + if (parsed.pathname === '/redir/redirect/') { + return parsed.searchParams.get('url') || url; + } + } catch {} + return url; +} + +// ── Voyager query builder (runs in Node, NOT inside page.evaluate) ──── + +interface SearchInput { + keywords: string; + location: string; + limit: number; + start: number; + companyIds: string[]; + experienceLevels: string[]; + jobTypes: string[]; + datePostedValues: string[]; + remoteTypes: string[]; +} + +function buildVoyagerSearchQuery(input: SearchInput): string { + const hasFilters = + input.companyIds.length || + input.experienceLevels.length || + input.jobTypes.length || + input.datePostedValues.length || + input.remoteTypes.length; + + const parts = [ + 'origin:' + (hasFilters ? 'JOB_SEARCH_PAGE_JOB_FILTER' : 'JOB_SEARCH_PAGE_OTHER_ENTRY'), + 'keywords:' + input.keywords, + ]; + if (input.location) { + parts.push('locationUnion:(seoLocation:(location:' + input.location + '))'); + } + const filters: string[] = []; + if (input.companyIds.length) filters.push('company:List(' + input.companyIds.join(',') + ')'); + if (input.experienceLevels.length) filters.push('experience:List(' + input.experienceLevels.join(',') + ')'); + if (input.jobTypes.length) filters.push('jobType:List(' + input.jobTypes.join(',') + ')'); + if (input.datePostedValues.length) filters.push('timePostedRange:List(' + input.datePostedValues.join(',') + ')'); + if (input.remoteTypes.length) filters.push('workplaceType:List(' + input.remoteTypes.join(',') + ')'); + if (filters.length) parts.push('selectedFilters:(' + filters.join(',') + ')'); + parts.push('spellCorrectionEnabled:true'); + return '(' + parts.join(',') + ')'; +} + +function buildVoyagerUrl(input: SearchInput, offset: number, count: number): string { + const params = new URLSearchParams({ + decorationId: 'com.linkedin.voyager.dash.deco.jobs.search.JobSearchCardsCollection-220', + count: String(count), + q: 'jobSearch', + }); + const query = encodeURIComponent(buildVoyagerSearchQuery(input)) + .replace(/%3A/gi, ':') + .replace(/%2C/gi, ',') + .replace(/%28/gi, '(') + .replace(/%29/gi, ')'); + return '/voyager/api/voyagerJobsDashJobCards?' + params.toString() + '&query=' + query + '&start=' + offset; +} + +// ── Company ID resolution (requires DOM interaction) ────────────────── + async function resolveCompanyIds(page: IPage, input: unknown): Promise { const rawValues = parseCsvArg(input); const ids = new Set(); @@ -81,59 +156,53 @@ async function resolveCompanyIds(page: IPage, input: unknown): Promise const resolved = await page.evaluate(`(async () => { const targets = ${JSON.stringify(names)}; const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); - const normalize = (value) => (value || '').toLowerCase().replace(/\s+/g, ' ').trim(); - - const openAllFilters = async () => { - const button = [...document.querySelectorAll('button')] - .find(b => ((b.innerText || '').trim().replace(/\s+/g, ' ')) === 'All filters'); - if (button) { - button.click(); - await sleep(300); - } - }; - - const companyMap = () => { - const result = {}; - for (const input of document.querySelectorAll('input[name="company-filter-value"]')) { - const value = input.value; - const text = (input.parentElement?.innerText || input.closest('label')?.innerText || '').replace(/\s+/g, ' ').trim(); - const label = text.replace(/\s*Filter by.*$/i, '').trim(); - if (label) result[normalize(label)] = value; + const normalize = (v) => (v || '').toLowerCase().replace(/\\s+/g, ' ').trim(); + + // Open "All filters" panel to expose company filter inputs + const allBtn = [...document.querySelectorAll('button')] + .find(b => ((b.innerText || '').trim().replace(/\\s+/g, ' ')) === 'All filters'); + if (allBtn) { allBtn.click(); await sleep(300); } + + const getCompanyMap = () => { + const map = {}; + for (const el of document.querySelectorAll('input[name="company-filter-value"]')) { + const text = (el.parentElement?.innerText || el.closest('label')?.innerText || '') + .replace(/\\s+/g, ' ').trim().replace(/\\s*Filter by.*$/i, '').trim(); + if (text) map[normalize(text)] = el.value; } - return result; + return map; }; - const matchCompany = (map, name) => { - const normalized = normalize(name); - if (map[normalized]) return map[normalized]; - const key = Object.keys(map).find(entry => entry === normalized || entry.includes(normalized) || normalized.includes(entry)); - return key ? map[key] : null; + const match = (map, name) => { + const n = normalize(name); + if (map[n]) return map[n]; + const k = Object.keys(map).find(e => e === n || e.includes(n) || n.includes(e)); + return k ? map[k] : null; }; - await openAllFilters(); const results = {}; - let map = companyMap(); + let map = getCompanyMap(); for (const name of targets) { - let found = matchCompany(map, name); + let found = match(map, name); if (!found) { - const input = [...document.querySelectorAll('input')].find(node => node.getAttribute('aria-label') === 'Add a company'); - if (input) { - input.focus(); - input.value = name; - input.dispatchEvent(new Event('input', { bubbles: true })); - input.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', bubbles: true })); + const inp = [...document.querySelectorAll('input')] + .find(el => el.getAttribute('aria-label') === 'Add a company'); + if (inp) { + inp.focus(); + inp.value = name; + inp.dispatchEvent(new Event('input', { bubbles: true })); + inp.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', bubbles: true })); await sleep(1200); - map = companyMap(); - found = matchCompany(map, name); - input.value = ''; - input.dispatchEvent(new Event('input', { bubbles: true })); + map = getCompanyMap(); + found = match(map, name); + inp.value = ''; + inp.dispatchEvent(new Event('input', { bubbles: true })); await sleep(100); } } results[name] = found || null; } - return results; })()`); @@ -151,25 +220,90 @@ async function resolveCompanyIds(page: IPage, input: unknown): Promise return [...ids]; } -function normalizeWhitespace(value: unknown): string { - return String(value ?? '').replace(/\s+/g, ' ').trim(); -} +// ── Voyager API fetch (runs inside page context for cookie access) ──── -function decodeLinkedinRedirect(url: string): string { - if (!url) return ''; - try { - const parsed = new URL(url); - if (parsed.pathname === '/redir/redirect/') { - return parsed.searchParams.get('url') || url; +async function fetchJobCards( + page: IPage, + input: SearchInput, +): Promise>> { + const MAX_BATCH = 25; + const allJobs: Array> = []; + let offset = input.start; + + while (allJobs.length < input.limit) { + const count = Math.min(MAX_BATCH, input.limit - allJobs.length); + const apiPath = buildVoyagerUrl(input, offset, count); + + const batch = await page.evaluate(`(async () => { + const jsession = document.cookie.split(';').map(p => p.trim()) + .find(p => p.startsWith('JSESSIONID='))?.slice('JSESSIONID='.length); + if (!jsession) return { error: 'LinkedIn JSESSIONID cookie not found. Please sign in to LinkedIn in the browser.' }; + + const csrf = jsession.replace(/^"|"$/g, ''); + const res = await fetch(${JSON.stringify(apiPath)}, { + credentials: 'include', + headers: { 'csrf-token': csrf, 'x-restli-protocol-version': '2.0.0' }, + }); + if (!res.ok) { + const text = await res.text(); + return { error: 'LinkedIn API error: HTTP ' + res.status + ' ' + text.slice(0, 200) }; + } + return res.json(); + })()`); + + if (!batch || batch.error) { + throw new Error(batch?.error || 'LinkedIn search returned an unexpected response'); } - } catch {} - return url; + + const elements: any[] = Array.isArray(batch?.elements) ? batch.elements : []; + if (elements.length === 0) break; + + for (const element of elements) { + const card = element?.jobCardUnion?.jobPostingCard; + if (!card) continue; + + // Extract job ID from URN fields + const jobId = [card.jobPostingUrn, card.jobPosting?.entityUrn, card.entityUrn] + .filter(Boolean) + .map(s => String(s).match(/(\d+)/)?.[1]) + .find(Boolean) ?? ''; + + // Extract listed date + const listedItem = (card.footerItems || []).find((i: any) => i?.type === 'LISTED_DATE' && i?.timeAt); + const listed = listedItem?.timeAt ? new Date(listedItem.timeAt).toISOString().slice(0, 10) : ''; + + allJobs.push({ + title: card.jobPostingTitle || card.title?.text || '', + company: card.primaryDescription?.text || '', + location: card.secondaryDescription?.text || '', + listed, + salary: card.tertiaryDescription?.text || '', + url: jobId ? 'https://www.linkedin.com/jobs/view/' + jobId : '', + }); + } + + if (elements.length < count) break; + offset += elements.length; + } + + return allJobs.slice(0, input.limit).map((item, index) => ({ + rank: input.start + index + 1, + ...item, + })); } -async function enrichJobDetails(page: IPage, jobs: Array>): Promise>> { +// ── Job detail enrichment (--details flag) ──────────────────────────── + +async function enrichJobDetails( + page: IPage, + jobs: Array>, +): Promise>> { const enriched: Array> = []; - for (const job of jobs) { + for (let i = 0; i < jobs.length; i++) { + const job = jobs[i]; + console.error(`[opencli:linkedin] Fetching details ${i + 1}/${jobs.length}: ${job.title}`); + if (!job.url) { enriched.push({ ...job, description: '', apply_url: '' }); continue; @@ -178,40 +312,37 @@ async function enrichJobDetails(page: IPage, jobs: Array>): try { await page.goto(job.url); await page.wait({ text: 'About the job', timeout: 8 }); + + // Expand "Show more" button if present await page.evaluate(`(() => { - const normalize = (value) => (value || '').replace(/\s+/g, ' ').trim().toLowerCase(); - const aboutSection = [...document.querySelectorAll('div, section, article')] - .find((element) => normalize(element.querySelector('h1, h2, h3, h4')?.textContent || '') === 'about the job'); - const expandButton = [...(aboutSection?.querySelectorAll('button, a[role="button"]') || [])] - .find((element) => /more/.test(normalize(element.textContent || '')) || /more/.test(normalize(element.getAttribute('aria-label') || ''))); - if (expandButton) expandButton.click(); + const norm = (v) => (v || '').replace(/\\s+/g, ' ').trim().toLowerCase(); + const section = [...document.querySelectorAll('div, section, article')] + .find(el => norm(el.querySelector('h1,h2,h3,h4')?.textContent || '') === 'about the job'); + const btn = [...(section?.querySelectorAll('button, a[role="button"]') || [])] + .find(el => /more/.test(norm(el.textContent || '')) || /more/.test(norm(el.getAttribute('aria-label') || ''))); + if (btn) btn.click(); })()`); await page.wait(1); + // Extract description and apply URL const detail = await page.evaluate(`(() => { - const normalize = (value) => (value || '').replace(/\s+/g, ' ').trim(); + const norm = (v) => (v || '').replace(/\\s+/g, ' ').trim(); + // Find the most specific (shortest) container with "About the job" heading + // Shortest = most specific DOM node, avoiding outer wrappers that include unrelated text const candidates = [...document.querySelectorAll('div, section, article')] - .map((element) => { - const heading = normalize(element.querySelector('h1, h2, h3, h4')?.textContent || ''); - const text = normalize(element.innerText || ''); - return { heading, text }; - }) - .filter((item) => item.text && item.heading.toLowerCase() === 'about the job' && item.text.length > 'About the job'.length) + .map(el => ({ + heading: norm(el.querySelector('h1,h2,h3,h4')?.textContent || ''), + text: norm(el.innerText || ''), + })) + .filter(c => c.text && c.heading.toLowerCase() === 'about the job' && c.text.length > 'About the job'.length) .sort((a, b) => a.text.length - b.text.length); - const description = candidates[0]?.text.replace(/^About the job\s*/i, '') || ''; + const description = candidates[0]?.text.replace(/^About the job\\s*/i, '') || ''; const applyLink = [...document.querySelectorAll('a[href]')] - .map((anchor) => ({ - href: anchor.href || '', - text: normalize(anchor.textContent || ''), - aria: normalize(anchor.getAttribute('aria-label') || ''), - })) - .find((anchor) => /apply/i.test(anchor.text) || /apply/i.test(anchor.aria)); + .map(a => ({ href: a.href || '', text: norm(a.textContent || ''), aria: norm(a.getAttribute('aria-label') || '') })) + .find(a => /apply/i.test(a.text) || /apply/i.test(a.aria)); - return { - description, - applyUrl: applyLink?.href || '', - }; + return { description, applyUrl: applyLink?.href || '' }; })()`); enriched.push({ @@ -227,6 +358,8 @@ async function enrichJobDetails(page: IPage, jobs: Array>): return enriched; } +// ── CLI registration ────────────────────────────────────────────────── + cli({ site: 'linkedin', name: 'search', @@ -253,10 +386,6 @@ cli({ const includeDetails = Boolean(kwargs.details); const location = (kwargs.location ?? '').trim(); const keywords = String(kwargs.query ?? '').trim(); - const experienceLevels = mapFilterValues(kwargs.experience_level, EXPERIENCE_LEVELS, 'experience_level'); - const jobTypes = mapFilterValues(kwargs.job_type, JOB_TYPES, 'job_type'); - const remoteTypes = mapFilterValues(kwargs.remote, REMOTE_TYPES, 'remote'); - const datePostedValues = mapFilterValues(kwargs.date_posted, DATE_POSTED, 'date_posted'); if (!keywords) throw new Error('query is required'); @@ -267,144 +396,21 @@ cli({ await page.wait({ text: 'Jobs', timeout: 10 }); const companyIds = await resolveCompanyIds(page, kwargs.company); - const data = await page.evaluate(`(async () => { - const input = ${JSON.stringify({ - keywords, - location, - limit, - start, - companyIds, - experienceLevels, - jobTypes, - datePostedValues, - remoteTypes, - })}; - const maxBatchSize = 25; - const jsession = document.cookie - .split(';') - .map(part => part.trim()) - .find(part => part.startsWith('JSESSIONID=')) - ?.slice('JSESSIONID='.length); - - if (!jsession) { - return { error: 'LinkedIn JSESSIONID cookie not found. Please sign in to LinkedIn in the browser.' }; - } - - const csrf = jsession.replace(/^"|"$/g, ''); - const headers = { - 'csrf-token': csrf, - 'x-restli-protocol-version': '2.0.0', - }; - - const buildSearchQuery = () => { - const parts = [ - 'origin:' + (( - input.companyIds.length || - input.experienceLevels.length || - input.jobTypes.length || - input.datePostedValues.length || - input.remoteTypes.length - ) ? 'JOB_SEARCH_PAGE_JOB_FILTER' : 'JOB_SEARCH_PAGE_OTHER_ENTRY'), - 'keywords:' + input.keywords, - ]; - if (input.location) { - parts.push('locationUnion:(seoLocation:(location:' + input.location + '))'); - } - const filters = []; - if (input.companyIds.length) filters.push('company:List(' + input.companyIds.join(',') + ')'); - if (input.experienceLevels.length) filters.push('experience:List(' + input.experienceLevels.join(',') + ')'); - if (input.jobTypes.length) filters.push('jobType:List(' + input.jobTypes.join(',') + ')'); - if (input.datePostedValues.length) filters.push('timePostedRange:List(' + input.datePostedValues.join(',') + ')'); - if (input.remoteTypes.length) filters.push('workplaceType:List(' + input.remoteTypes.join(',') + ')'); - if (filters.length) parts.push('selectedFilters:(' + filters.join(',') + ')'); - parts.push('spellCorrectionEnabled:true'); - return '(' + parts.join(',') + ')'; - }; - - const buildUrl = (offset, count) => { - const params = new URLSearchParams({ - decorationId: 'com.linkedin.voyager.dash.deco.jobs.search.JobSearchCardsCollection-220', - count: String(count), - q: 'jobSearch', - }); - const query = encodeURIComponent(buildSearchQuery()) - .replace(/%3A/gi, ':') - .replace(/%2C/gi, ',') - .replace(/%28/gi, '(') - .replace(/%29/gi, ')'); - return '/voyager/api/voyagerJobsDashJobCards?' + - params.toString() + - '&query=' + query + - '&start=' + offset; - }; - - const extractListed = (card) => { - const listed = (card.footerItems || []).find(item => item?.type === 'LISTED_DATE' && item?.timeAt); - return listed?.timeAt ? new Date(listed.timeAt).toISOString().slice(0, 10) : ''; - }; - - const extractJobId = (card) => { - const sources = [ - card.jobPostingUrn, - card.jobPosting?.entityUrn, - card.entityUrn, - ].filter(Boolean); - for (const source of sources) { - const match = String(source).match(/(\d+)/); - if (match) return match[1]; - } - return ''; - }; - - const collected = []; - let offset = input.start; - - while (collected.length < input.limit) { - const count = Math.min(maxBatchSize, input.limit - collected.length); - const res = await fetch(buildUrl(offset, count), { - credentials: 'include', - headers, - }); - - if (!res.ok) { - const text = await res.text(); - return { error: 'LinkedIn API error: HTTP ' + res.status + ' ' + text.slice(0, 200) }; - } - - const payload = await res.json(); - const elements = Array.isArray(payload?.elements) ? payload.elements : []; - if (elements.length === 0) break; - - for (const element of elements) { - const card = element?.jobCardUnion?.jobPostingCard; - if (!card) continue; - const jobId = extractJobId(card); - collected.push({ - title: card.jobPostingTitle || card.title?.text || '', - company: card.primaryDescription?.text || '', - location: card.secondaryDescription?.text || '', - listed: extractListed(card), - salary: card.tertiaryDescription?.text || '', - url: jobId ? 'https://www.linkedin.com/jobs/view/' + jobId : '', - }); - } - - if (elements.length < count) break; - offset += elements.length; - } - - return collected.slice(0, input.limit).map((item, index) => ({ - rank: input.start + index + 1, - ...item, - })); - })()`); + const input: SearchInput = { + keywords, + location, + limit, + start, + companyIds, + experienceLevels: mapFilterValues(kwargs.experience_level, EXPERIENCE_LEVELS, 'experience_level'), + jobTypes: mapFilterValues(kwargs.job_type, JOB_TYPES, 'job_type'), + datePostedValues: mapFilterValues(kwargs.date_posted, DATE_POSTED, 'date_posted'), + remoteTypes: mapFilterValues(kwargs.remote, REMOTE_TYPES, 'remote'), + }; - if (!Array.isArray(data)) { - throw new Error(data?.error || 'LinkedIn search returned an unexpected response'); - } + const data = await fetchJobCards(page, input); if (!includeDetails) return data; - return enrichJobDetails(page, data); }, });