search.js
1 import { cli, Strategy } from '@jackwener/opencli/registry'; 2 import { ArgumentError, CommandExecutionError } from '@jackwener/opencli/errors'; 3 // ── Filter value mappings ────────────────────────────────────────────── 4 const EXPERIENCE_LEVELS = { 5 internship: '1', 6 entry: '2', 7 'entry-level': '2', 8 associate: '3', 9 mid: '4', 10 senior: '4', 11 'mid-senior': '4', 12 'mid-senior-level': '4', 13 director: '5', 14 executive: '6', 15 }; 16 const JOB_TYPES = { 17 'full-time': 'F', 18 fulltime: 'F', 19 full: 'F', 20 'part-time': 'P', 21 parttime: 'P', 22 part: 'P', 23 contract: 'C', 24 temporary: 'T', 25 temp: 'T', 26 volunteer: 'V', 27 internship: 'I', 28 other: 'O', 29 }; 30 const DATE_POSTED = { 31 any: 'on', 32 month: 'r2592000', 33 'past-month': 'r2592000', 34 week: 'r604800', 35 'past-week': 'r604800', 36 day: 'r86400', 37 '24h': 'r86400', 38 'past-24h': 'r86400', 39 }; 40 const REMOTE_TYPES = { 41 onsite: '1', 42 'on-site': '1', 43 hybrid: '3', 44 remote: '2', 45 }; 46 // ── Helpers ──────────────────────────────────────────────────────────── 47 function parseCsvArg(value) { 48 if (value === undefined || value === null || value === '') 49 return []; 50 return String(value) 51 .split(',') 52 .map(item => item.trim()) 53 .filter(Boolean); 54 } 55 function mapFilterValues(input, mapping, label) { 56 const values = parseCsvArg(input); 57 const resolved = values.map(value => { 58 const key = value.toLowerCase(); 59 const mapped = mapping[key]; 60 if (!mapped) 61 throw new ArgumentError(`Unsupported ${label}: ${value}`); 62 return mapped; 63 }); 64 return [...new Set(resolved)]; 65 } 66 function normalizeWhitespace(value) { 67 return String(value ?? '').replace(/\s+/g, ' ').trim(); 68 } 69 function decodeLinkedinRedirect(url) { 70 if (!url) 71 return ''; 72 try { 73 const parsed = new URL(url); 74 if (parsed.pathname === '/redir/redirect/') { 75 return parsed.searchParams.get('url') || url; 76 } 77 } 78 catch { } 79 return url; 80 } 81 function buildVoyagerSearchQuery(input) { 82 const hasFilters = input.companyIds.length || 83 input.experienceLevels.length || 84 input.jobTypes.length || 85 input.datePostedValues.length || 86 input.remoteTypes.length; 87 const parts = [ 88 'origin:' + (hasFilters ? 'JOB_SEARCH_PAGE_JOB_FILTER' : 'JOB_SEARCH_PAGE_OTHER_ENTRY'), 89 'keywords:' + input.keywords, 90 ]; 91 if (input.location) { 92 parts.push('locationUnion:(seoLocation:(location:' + input.location + '))'); 93 } 94 const filters = []; 95 if (input.companyIds.length) 96 filters.push('company:List(' + input.companyIds.join(',') + ')'); 97 if (input.experienceLevels.length) 98 filters.push('experience:List(' + input.experienceLevels.join(',') + ')'); 99 if (input.jobTypes.length) 100 filters.push('jobType:List(' + input.jobTypes.join(',') + ')'); 101 if (input.datePostedValues.length) 102 filters.push('timePostedRange:List(' + input.datePostedValues.join(',') + ')'); 103 if (input.remoteTypes.length) 104 filters.push('workplaceType:List(' + input.remoteTypes.join(',') + ')'); 105 if (filters.length) 106 parts.push('selectedFilters:(' + filters.join(',') + ')'); 107 parts.push('spellCorrectionEnabled:true'); 108 return '(' + parts.join(',') + ')'; 109 } 110 function buildVoyagerUrl(input, offset, count) { 111 const params = new URLSearchParams({ 112 decorationId: 'com.linkedin.voyager.dash.deco.jobs.search.JobSearchCardsCollection-220', 113 count: String(count), 114 q: 'jobSearch', 115 }); 116 const query = encodeURIComponent(buildVoyagerSearchQuery(input)) 117 .replace(/%3A/gi, ':') 118 .replace(/%2C/gi, ',') 119 .replace(/%28/gi, '(') 120 .replace(/%29/gi, ')'); 121 return '/voyager/api/voyagerJobsDashJobCards?' + params.toString() + '&query=' + query + '&start=' + offset; 122 } 123 // ── Company ID resolution (requires DOM interaction) ────────────────── 124 async function resolveCompanyIds(page, input) { 125 const rawValues = parseCsvArg(input); 126 const ids = new Set(); 127 const names = []; 128 for (const value of rawValues) { 129 if (/^\d+$/.test(value)) 130 ids.add(value); 131 else 132 names.push(value); 133 } 134 if (!names.length) 135 return [...ids]; 136 const resolved = await page.evaluate(`(async () => { 137 const targets = ${JSON.stringify(names)}; 138 const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); 139 const normalize = (v) => (v || '').toLowerCase().replace(/\\s+/g, ' ').trim(); 140 141 // Open "All filters" panel to expose company filter inputs 142 const allBtn = [...document.querySelectorAll('button')] 143 .find(b => ((b.innerText || '').trim().replace(/\\s+/g, ' ')) === 'All filters'); 144 if (allBtn) { allBtn.click(); await sleep(300); } 145 146 const getCompanyMap = () => { 147 const map = {}; 148 for (const el of document.querySelectorAll('input[name="company-filter-value"]')) { 149 const text = (el.parentElement?.innerText || el.closest('label')?.innerText || '') 150 .replace(/\\s+/g, ' ').trim().replace(/\\s*Filter by.*$/i, '').trim(); 151 if (text) map[normalize(text)] = el.value; 152 } 153 return map; 154 }; 155 156 const match = (map, name) => { 157 const n = normalize(name); 158 if (map[n]) return map[n]; 159 const k = Object.keys(map).find(e => e === n || e.includes(n) || n.includes(e)); 160 return k ? map[k] : null; 161 }; 162 163 const results = {}; 164 let map = getCompanyMap(); 165 166 for (const name of targets) { 167 let found = match(map, name); 168 if (!found) { 169 const inp = [...document.querySelectorAll('input')] 170 .find(el => el.getAttribute('aria-label') === 'Add a company'); 171 if (inp) { 172 inp.focus(); 173 inp.value = name; 174 inp.dispatchEvent(new Event('input', { bubbles: true })); 175 inp.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', bubbles: true })); 176 await sleep(1200); 177 map = getCompanyMap(); 178 found = match(map, name); 179 inp.value = ''; 180 inp.dispatchEvent(new Event('input', { bubbles: true })); 181 await sleep(100); 182 } 183 } 184 results[name] = found || null; 185 } 186 return results; 187 })()`); 188 const unresolved = []; 189 for (const name of names) { 190 const id = resolved?.[name]; 191 if (id) 192 ids.add(id); 193 else 194 unresolved.push(name); 195 } 196 if (unresolved.length) { 197 throw new ArgumentError(`Could not resolve LinkedIn company filter: ${unresolved.join(', ')}`); 198 } 199 return [...ids]; 200 } 201 // ── Voyager API fetch (runs inside page context for cookie access) ──── 202 async function fetchJobCards(page, input) { 203 const MAX_BATCH = 25; 204 const allJobs = []; 205 let offset = input.start; 206 while (allJobs.length < input.limit) { 207 const count = Math.min(MAX_BATCH, input.limit - allJobs.length); 208 const apiPath = buildVoyagerUrl(input, offset, count); 209 const batch = await page.evaluate(`(async () => { 210 const jsession = document.cookie.split(';').map(p => p.trim()) 211 .find(p => p.startsWith('JSESSIONID='))?.slice('JSESSIONID='.length); 212 if (!jsession) return { error: 'LinkedIn JSESSIONID cookie not found. Please sign in to LinkedIn in the browser.' }; 213 214 const csrf = jsession.replace(/^"|"$/g, ''); 215 const res = await fetch(${JSON.stringify(apiPath)}, { 216 credentials: 'include', 217 headers: { 'csrf-token': csrf, 'x-restli-protocol-version': '2.0.0' }, 218 }); 219 if (!res.ok) { 220 const text = await res.text(); 221 return { error: 'LinkedIn API error: HTTP ' + res.status + ' ' + text.slice(0, 200) }; 222 } 223 return res.json(); 224 })()`); 225 if (!batch || batch.error) { 226 throw new CommandExecutionError(batch?.error || 'LinkedIn search returned an unexpected response'); 227 } 228 const elements = Array.isArray(batch?.elements) ? batch.elements : []; 229 if (elements.length === 0) 230 break; 231 for (const element of elements) { 232 const card = element?.jobCardUnion?.jobPostingCard; 233 if (!card) 234 continue; 235 // Extract job ID from URN fields 236 const jobId = [card.jobPostingUrn, card.jobPosting?.entityUrn, card.entityUrn] 237 .filter(Boolean) 238 .map(s => String(s).match(/(\d+)/)?.[1]) 239 .find(Boolean) ?? ''; 240 // Extract listed date 241 const listedItem = (card.footerItems || []).find((i) => i?.type === 'LISTED_DATE' && i?.timeAt); 242 const listed = listedItem?.timeAt ? new Date(listedItem.timeAt).toISOString().slice(0, 10) : ''; 243 allJobs.push({ 244 title: card.jobPostingTitle || card.title?.text || '', 245 company: card.primaryDescription?.text || '', 246 location: card.secondaryDescription?.text || '', 247 listed, 248 salary: card.tertiaryDescription?.text || '', 249 url: jobId ? 'https://www.linkedin.com/jobs/view/' + jobId : '', 250 }); 251 } 252 if (elements.length < count) 253 break; 254 offset += elements.length; 255 } 256 return allJobs.slice(0, input.limit).map((item, index) => ({ 257 rank: input.start + index + 1, 258 ...item, 259 })); 260 } 261 // ── Job detail enrichment (--details flag) ──────────────────────────── 262 async function enrichJobDetails(page, jobs) { 263 const enriched = []; 264 for (let i = 0; i < jobs.length; i++) { 265 const job = jobs[i]; 266 console.error(`[opencli:linkedin] Fetching details ${i + 1}/${jobs.length}: ${job.title}`); 267 if (!job.url) { 268 enriched.push({ ...job, description: '', apply_url: '' }); 269 continue; 270 } 271 try { 272 await page.goto(job.url); 273 await page.wait({ text: 'About the job', timeout: 8 }); 274 // Expand "Show more" button if present 275 await page.evaluate(`(() => { 276 const norm = (v) => (v || '').replace(/\\s+/g, ' ').trim().toLowerCase(); 277 const section = [...document.querySelectorAll('div, section, article')] 278 .find(el => norm(el.querySelector('h1,h2,h3,h4')?.textContent || '') === 'about the job'); 279 const btn = [...(section?.querySelectorAll('button, a[role="button"]') || [])] 280 .find(el => /more/.test(norm(el.textContent || '')) || /more/.test(norm(el.getAttribute('aria-label') || ''))); 281 if (btn) btn.click(); 282 })()`); 283 await page.wait(1); 284 // Extract description and apply URL 285 const detail = await page.evaluate(`(() => { 286 const norm = (v) => (v || '').replace(/\\s+/g, ' ').trim(); 287 // Find the most specific (shortest) container with "About the job" heading 288 // Shortest = most specific DOM node, avoiding outer wrappers that include unrelated text 289 const candidates = [...document.querySelectorAll('div, section, article')] 290 .map(el => ({ 291 heading: norm(el.querySelector('h1,h2,h3,h4')?.textContent || ''), 292 text: norm(el.innerText || ''), 293 })) 294 .filter(c => c.text && c.heading.toLowerCase() === 'about the job' && c.text.length > 'About the job'.length) 295 .sort((a, b) => a.text.length - b.text.length); 296 297 const description = candidates[0]?.text.replace(/^About the job\\s*/i, '') || ''; 298 const applyLink = [...document.querySelectorAll('a[href]')] 299 .map(a => ({ href: a.href || '', text: norm(a.textContent || ''), aria: norm(a.getAttribute('aria-label') || '') })) 300 .find(a => /apply/i.test(a.text) || /apply/i.test(a.aria)); 301 302 return { description, applyUrl: applyLink?.href || '' }; 303 })()`); 304 enriched.push({ 305 ...job, 306 description: normalizeWhitespace(detail?.description), 307 apply_url: decodeLinkedinRedirect(String(detail?.applyUrl ?? '')), 308 }); 309 } 310 catch { 311 enriched.push({ ...job, description: '', apply_url: '' }); 312 } 313 } 314 return enriched; 315 } 316 // ── CLI registration ────────────────────────────────────────────────── 317 cli({ 318 site: 'linkedin', 319 name: 'search', 320 description: 'Search LinkedIn jobs', 321 domain: 'www.linkedin.com', 322 strategy: Strategy.HEADER, 323 browser: true, 324 args: [ 325 { name: 'query', type: 'string', required: true, positional: true, help: 'Job search keywords' }, 326 { name: 'location', type: 'string', required: false, help: 'Location text such as San Francisco Bay Area' }, 327 { name: 'limit', type: 'int', default: 10, help: 'Number of jobs to return (max 100)' }, 328 { name: 'start', type: 'int', default: 0, help: 'Result offset for pagination' }, 329 { name: 'details', type: 'bool', default: false, help: 'Include full job description and apply URL (slower)' }, 330 { name: 'company', type: 'string', required: false, help: 'Comma-separated company names or LinkedIn company IDs' }, 331 { name: 'experience-level', type: 'string', required: false, help: 'Comma-separated: internship, entry, associate, mid-senior, director, executive' }, 332 { name: 'job-type', type: 'string', required: false, help: 'Comma-separated: full-time, part-time, contract, temporary, volunteer, internship, other' }, 333 { name: 'date-posted', type: 'string', required: false, help: 'One of: any, month, week, 24h' }, 334 { name: 'remote', type: 'string', required: false, help: 'Comma-separated: on-site, hybrid, remote' }, 335 ], 336 columns: ['rank', 'title', 'company', 'location', 'listed', 'salary', 'url'], 337 func: async (page, kwargs) => { 338 const limit = Math.max(1, Math.min(kwargs.limit ?? 10, 100)); 339 const start = Math.max(0, kwargs.start ?? 0); 340 const includeDetails = Boolean(kwargs.details); 341 const location = (kwargs.location ?? '').trim(); 342 const keywords = String(kwargs.query ?? '').trim(); 343 if (!keywords) 344 throw new ArgumentError('query is required'); 345 const searchParams = new URLSearchParams({ keywords }); 346 if (location) 347 searchParams.set('location', location); 348 await page.goto(`https://www.linkedin.com/jobs/search/?${searchParams.toString()}`); 349 await page.wait({ text: 'Jobs', timeout: 10 }); 350 const companyIds = await resolveCompanyIds(page, kwargs.company); 351 const input = { 352 keywords, 353 location, 354 limit, 355 start, 356 companyIds, 357 experienceLevels: mapFilterValues(kwargs['experience-level'], EXPERIENCE_LEVELS, 'experience_level'), 358 jobTypes: mapFilterValues(kwargs['job-type'], JOB_TYPES, 'job_type'), 359 datePostedValues: mapFilterValues(kwargs['date-posted'], DATE_POSTED, 'date_posted'), 360 remoteTypes: mapFilterValues(kwargs.remote, REMOTE_TYPES, 'remote'), 361 }; 362 const data = await fetchJobCards(page, input); 363 if (!includeDetails) 364 return data; 365 return enrichJobDetails(page, data); 366 }, 367 });