/ clis / linkedin / search.js
search.js
  1  import { cli, Strategy } from '@jackwener/opencli/registry';
  2  import { ArgumentError, CommandExecutionError } from '@jackwener/opencli/errors';
  3  // ── Filter value mappings ──────────────────────────────────────────────
  4  const EXPERIENCE_LEVELS = {
  5      internship: '1',
  6      entry: '2',
  7      'entry-level': '2',
  8      associate: '3',
  9      mid: '4',
 10      senior: '4',
 11      'mid-senior': '4',
 12      'mid-senior-level': '4',
 13      director: '5',
 14      executive: '6',
 15  };
 16  const JOB_TYPES = {
 17      'full-time': 'F',
 18      fulltime: 'F',
 19      full: 'F',
 20      'part-time': 'P',
 21      parttime: 'P',
 22      part: 'P',
 23      contract: 'C',
 24      temporary: 'T',
 25      temp: 'T',
 26      volunteer: 'V',
 27      internship: 'I',
 28      other: 'O',
 29  };
 30  const DATE_POSTED = {
 31      any: 'on',
 32      month: 'r2592000',
 33      'past-month': 'r2592000',
 34      week: 'r604800',
 35      'past-week': 'r604800',
 36      day: 'r86400',
 37      '24h': 'r86400',
 38      'past-24h': 'r86400',
 39  };
 40  const REMOTE_TYPES = {
 41      onsite: '1',
 42      'on-site': '1',
 43      hybrid: '3',
 44      remote: '2',
 45  };
 46  // ── Helpers ────────────────────────────────────────────────────────────
 47  function parseCsvArg(value) {
 48      if (value === undefined || value === null || value === '')
 49          return [];
 50      return String(value)
 51          .split(',')
 52          .map(item => item.trim())
 53          .filter(Boolean);
 54  }
 55  function mapFilterValues(input, mapping, label) {
 56      const values = parseCsvArg(input);
 57      const resolved = values.map(value => {
 58          const key = value.toLowerCase();
 59          const mapped = mapping[key];
 60          if (!mapped)
 61              throw new ArgumentError(`Unsupported ${label}: ${value}`);
 62          return mapped;
 63      });
 64      return [...new Set(resolved)];
 65  }
 66  function normalizeWhitespace(value) {
 67      return String(value ?? '').replace(/\s+/g, ' ').trim();
 68  }
 69  function decodeLinkedinRedirect(url) {
 70      if (!url)
 71          return '';
 72      try {
 73          const parsed = new URL(url);
 74          if (parsed.pathname === '/redir/redirect/') {
 75              return parsed.searchParams.get('url') || url;
 76          }
 77      }
 78      catch { }
 79      return url;
 80  }
 81  function buildVoyagerSearchQuery(input) {
 82      const hasFilters = input.companyIds.length ||
 83          input.experienceLevels.length ||
 84          input.jobTypes.length ||
 85          input.datePostedValues.length ||
 86          input.remoteTypes.length;
 87      const parts = [
 88          'origin:' + (hasFilters ? 'JOB_SEARCH_PAGE_JOB_FILTER' : 'JOB_SEARCH_PAGE_OTHER_ENTRY'),
 89          'keywords:' + input.keywords,
 90      ];
 91      if (input.location) {
 92          parts.push('locationUnion:(seoLocation:(location:' + input.location + '))');
 93      }
 94      const filters = [];
 95      if (input.companyIds.length)
 96          filters.push('company:List(' + input.companyIds.join(',') + ')');
 97      if (input.experienceLevels.length)
 98          filters.push('experience:List(' + input.experienceLevels.join(',') + ')');
 99      if (input.jobTypes.length)
100          filters.push('jobType:List(' + input.jobTypes.join(',') + ')');
101      if (input.datePostedValues.length)
102          filters.push('timePostedRange:List(' + input.datePostedValues.join(',') + ')');
103      if (input.remoteTypes.length)
104          filters.push('workplaceType:List(' + input.remoteTypes.join(',') + ')');
105      if (filters.length)
106          parts.push('selectedFilters:(' + filters.join(',') + ')');
107      parts.push('spellCorrectionEnabled:true');
108      return '(' + parts.join(',') + ')';
109  }
110  function buildVoyagerUrl(input, offset, count) {
111      const params = new URLSearchParams({
112          decorationId: 'com.linkedin.voyager.dash.deco.jobs.search.JobSearchCardsCollection-220',
113          count: String(count),
114          q: 'jobSearch',
115      });
116      const query = encodeURIComponent(buildVoyagerSearchQuery(input))
117          .replace(/%3A/gi, ':')
118          .replace(/%2C/gi, ',')
119          .replace(/%28/gi, '(')
120          .replace(/%29/gi, ')');
121      return '/voyager/api/voyagerJobsDashJobCards?' + params.toString() + '&query=' + query + '&start=' + offset;
122  }
123  // ── Company ID resolution (requires DOM interaction) ──────────────────
124  async function resolveCompanyIds(page, input) {
125      const rawValues = parseCsvArg(input);
126      const ids = new Set();
127      const names = [];
128      for (const value of rawValues) {
129          if (/^\d+$/.test(value))
130              ids.add(value);
131          else
132              names.push(value);
133      }
134      if (!names.length)
135          return [...ids];
136      const resolved = await page.evaluate(`(async () => {
137      const targets = ${JSON.stringify(names)};
138      const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
139      const normalize = (v) => (v || '').toLowerCase().replace(/\\s+/g, ' ').trim();
140  
141      // Open "All filters" panel to expose company filter inputs
142      const allBtn = [...document.querySelectorAll('button')]
143        .find(b => ((b.innerText || '').trim().replace(/\\s+/g, ' ')) === 'All filters');
144      if (allBtn) { allBtn.click(); await sleep(300); }
145  
146      const getCompanyMap = () => {
147        const map = {};
148        for (const el of document.querySelectorAll('input[name="company-filter-value"]')) {
149          const text = (el.parentElement?.innerText || el.closest('label')?.innerText || '')
150            .replace(/\\s+/g, ' ').trim().replace(/\\s*Filter by.*$/i, '').trim();
151          if (text) map[normalize(text)] = el.value;
152        }
153        return map;
154      };
155  
156      const match = (map, name) => {
157        const n = normalize(name);
158        if (map[n]) return map[n];
159        const k = Object.keys(map).find(e => e === n || e.includes(n) || n.includes(e));
160        return k ? map[k] : null;
161      };
162  
163      const results = {};
164      let map = getCompanyMap();
165  
166      for (const name of targets) {
167        let found = match(map, name);
168        if (!found) {
169          const inp = [...document.querySelectorAll('input')]
170            .find(el => el.getAttribute('aria-label') === 'Add a company');
171          if (inp) {
172            inp.focus();
173            inp.value = name;
174            inp.dispatchEvent(new Event('input', { bubbles: true }));
175            inp.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', bubbles: true }));
176            await sleep(1200);
177            map = getCompanyMap();
178            found = match(map, name);
179            inp.value = '';
180            inp.dispatchEvent(new Event('input', { bubbles: true }));
181            await sleep(100);
182          }
183        }
184        results[name] = found || null;
185      }
186      return results;
187    })()`);
188      const unresolved = [];
189      for (const name of names) {
190          const id = resolved?.[name];
191          if (id)
192              ids.add(id);
193          else
194              unresolved.push(name);
195      }
196      if (unresolved.length) {
197          throw new ArgumentError(`Could not resolve LinkedIn company filter: ${unresolved.join(', ')}`);
198      }
199      return [...ids];
200  }
201  // ── Voyager API fetch (runs inside page context for cookie access) ────
202  async function fetchJobCards(page, input) {
203      const MAX_BATCH = 25;
204      const allJobs = [];
205      let offset = input.start;
206      while (allJobs.length < input.limit) {
207          const count = Math.min(MAX_BATCH, input.limit - allJobs.length);
208          const apiPath = buildVoyagerUrl(input, offset, count);
209          const batch = await page.evaluate(`(async () => {
210        const jsession = document.cookie.split(';').map(p => p.trim())
211          .find(p => p.startsWith('JSESSIONID='))?.slice('JSESSIONID='.length);
212        if (!jsession) return { error: 'LinkedIn JSESSIONID cookie not found. Please sign in to LinkedIn in the browser.' };
213  
214        const csrf = jsession.replace(/^"|"$/g, '');
215        const res = await fetch(${JSON.stringify(apiPath)}, {
216          credentials: 'include',
217          headers: { 'csrf-token': csrf, 'x-restli-protocol-version': '2.0.0' },
218        });
219        if (!res.ok) {
220          const text = await res.text();
221          return { error: 'LinkedIn API error: HTTP ' + res.status + ' ' + text.slice(0, 200) };
222        }
223        return res.json();
224      })()`);
225          if (!batch || batch.error) {
226              throw new CommandExecutionError(batch?.error || 'LinkedIn search returned an unexpected response');
227          }
228          const elements = Array.isArray(batch?.elements) ? batch.elements : [];
229          if (elements.length === 0)
230              break;
231          for (const element of elements) {
232              const card = element?.jobCardUnion?.jobPostingCard;
233              if (!card)
234                  continue;
235              // Extract job ID from URN fields
236              const jobId = [card.jobPostingUrn, card.jobPosting?.entityUrn, card.entityUrn]
237                  .filter(Boolean)
238                  .map(s => String(s).match(/(\d+)/)?.[1])
239                  .find(Boolean) ?? '';
240              // Extract listed date
241              const listedItem = (card.footerItems || []).find((i) => i?.type === 'LISTED_DATE' && i?.timeAt);
242              const listed = listedItem?.timeAt ? new Date(listedItem.timeAt).toISOString().slice(0, 10) : '';
243              allJobs.push({
244                  title: card.jobPostingTitle || card.title?.text || '',
245                  company: card.primaryDescription?.text || '',
246                  location: card.secondaryDescription?.text || '',
247                  listed,
248                  salary: card.tertiaryDescription?.text || '',
249                  url: jobId ? 'https://www.linkedin.com/jobs/view/' + jobId : '',
250              });
251          }
252          if (elements.length < count)
253              break;
254          offset += elements.length;
255      }
256      return allJobs.slice(0, input.limit).map((item, index) => ({
257          rank: input.start + index + 1,
258          ...item,
259      }));
260  }
261  // ── Job detail enrichment (--details flag) ────────────────────────────
262  async function enrichJobDetails(page, jobs) {
263      const enriched = [];
264      for (let i = 0; i < jobs.length; i++) {
265          const job = jobs[i];
266          console.error(`[opencli:linkedin] Fetching details ${i + 1}/${jobs.length}: ${job.title}`);
267          if (!job.url) {
268              enriched.push({ ...job, description: '', apply_url: '' });
269              continue;
270          }
271          try {
272              await page.goto(job.url);
273              await page.wait({ text: 'About the job', timeout: 8 });
274              // Expand "Show more" button if present
275              await page.evaluate(`(() => {
276          const norm = (v) => (v || '').replace(/\\s+/g, ' ').trim().toLowerCase();
277          const section = [...document.querySelectorAll('div, section, article')]
278            .find(el => norm(el.querySelector('h1,h2,h3,h4')?.textContent || '') === 'about the job');
279          const btn = [...(section?.querySelectorAll('button, a[role="button"]') || [])]
280            .find(el => /more/.test(norm(el.textContent || '')) || /more/.test(norm(el.getAttribute('aria-label') || '')));
281          if (btn) btn.click();
282        })()`);
283              await page.wait(1);
284              // Extract description and apply URL
285              const detail = await page.evaluate(`(() => {
286          const norm = (v) => (v || '').replace(/\\s+/g, ' ').trim();
287          // Find the most specific (shortest) container with "About the job" heading
288          // Shortest = most specific DOM node, avoiding outer wrappers that include unrelated text
289          const candidates = [...document.querySelectorAll('div, section, article')]
290            .map(el => ({
291              heading: norm(el.querySelector('h1,h2,h3,h4')?.textContent || ''),
292              text: norm(el.innerText || ''),
293            }))
294            .filter(c => c.text && c.heading.toLowerCase() === 'about the job' && c.text.length > 'About the job'.length)
295            .sort((a, b) => a.text.length - b.text.length);
296  
297          const description = candidates[0]?.text.replace(/^About the job\\s*/i, '') || '';
298          const applyLink = [...document.querySelectorAll('a[href]')]
299            .map(a => ({ href: a.href || '', text: norm(a.textContent || ''), aria: norm(a.getAttribute('aria-label') || '') }))
300            .find(a => /apply/i.test(a.text) || /apply/i.test(a.aria));
301  
302          return { description, applyUrl: applyLink?.href || '' };
303        })()`);
304              enriched.push({
305                  ...job,
306                  description: normalizeWhitespace(detail?.description),
307                  apply_url: decodeLinkedinRedirect(String(detail?.applyUrl ?? '')),
308              });
309          }
310          catch {
311              enriched.push({ ...job, description: '', apply_url: '' });
312          }
313      }
314      return enriched;
315  }
316  // ── CLI registration ──────────────────────────────────────────────────
317  cli({
318      site: 'linkedin',
319      name: 'search',
320      description: 'Search LinkedIn jobs',
321      domain: 'www.linkedin.com',
322      strategy: Strategy.HEADER,
323      browser: true,
324      args: [
325          { name: 'query', type: 'string', required: true, positional: true, help: 'Job search keywords' },
326          { name: 'location', type: 'string', required: false, help: 'Location text such as San Francisco Bay Area' },
327          { name: 'limit', type: 'int', default: 10, help: 'Number of jobs to return (max 100)' },
328          { name: 'start', type: 'int', default: 0, help: 'Result offset for pagination' },
329          { name: 'details', type: 'bool', default: false, help: 'Include full job description and apply URL (slower)' },
330          { name: 'company', type: 'string', required: false, help: 'Comma-separated company names or LinkedIn company IDs' },
331          { name: 'experience-level', type: 'string', required: false, help: 'Comma-separated: internship, entry, associate, mid-senior, director, executive' },
332          { name: 'job-type', type: 'string', required: false, help: 'Comma-separated: full-time, part-time, contract, temporary, volunteer, internship, other' },
333          { name: 'date-posted', type: 'string', required: false, help: 'One of: any, month, week, 24h' },
334          { name: 'remote', type: 'string', required: false, help: 'Comma-separated: on-site, hybrid, remote' },
335      ],
336      columns: ['rank', 'title', 'company', 'location', 'listed', 'salary', 'url'],
337      func: async (page, kwargs) => {
338          const limit = Math.max(1, Math.min(kwargs.limit ?? 10, 100));
339          const start = Math.max(0, kwargs.start ?? 0);
340          const includeDetails = Boolean(kwargs.details);
341          const location = (kwargs.location ?? '').trim();
342          const keywords = String(kwargs.query ?? '').trim();
343          if (!keywords)
344              throw new ArgumentError('query is required');
345          const searchParams = new URLSearchParams({ keywords });
346          if (location)
347              searchParams.set('location', location);
348          await page.goto(`https://www.linkedin.com/jobs/search/?${searchParams.toString()}`);
349          await page.wait({ text: 'Jobs', timeout: 10 });
350          const companyIds = await resolveCompanyIds(page, kwargs.company);
351          const input = {
352              keywords,
353              location,
354              limit,
355              start,
356              companyIds,
357              experienceLevels: mapFilterValues(kwargs['experience-level'], EXPERIENCE_LEVELS, 'experience_level'),
358              jobTypes: mapFilterValues(kwargs['job-type'], JOB_TYPES, 'job_type'),
359              datePostedValues: mapFilterValues(kwargs['date-posted'], DATE_POSTED, 'date_posted'),
360              remoteTypes: mapFilterValues(kwargs.remote, REMOTE_TYPES, 'remote'),
361          };
362          const data = await fetchJobCards(page, input);
363          if (!includeDetails)
364              return data;
365          return enrichJobDetails(page, data);
366      },
367  });