/ clis / linkedin / timeline.js
timeline.js
  1  import { cli, Strategy } from '@jackwener/opencli/registry';
  2  import { AuthRequiredError, EmptyResultError } from '@jackwener/opencli/errors';
  3  function normalizeWhitespace(value) {
  4      return String(value ?? '').replace(/\s+/g, ' ').trim();
  5  }
  6  function parseMetric(value) {
  7      const raw = normalizeWhitespace(value).toLowerCase();
  8      if (!raw)
  9          return 0;
 10      const compact = raw.replace(/,/g, '');
 11      const match = compact.match(/(\d+(?:\.\d+)?)(k|m)?/i);
 12      if (!match)
 13          return 0;
 14      const base = Number(match[1]);
 15      const suffix = (match[2] || '').toLowerCase();
 16      if (suffix === 'k')
 17          return Math.round(base * 1000);
 18      if (suffix === 'm')
 19          return Math.round(base * 1000000);
 20      return Math.round(base);
 21  }
 22  function buildPostId(post) {
 23      const url = normalizeWhitespace(post.url);
 24      if (url)
 25          return url;
 26      const author = normalizeWhitespace(post.author);
 27      const text = normalizeWhitespace(post.text);
 28      const postedAt = normalizeWhitespace(post.posted_at);
 29      return `${author}::${postedAt}::${text.slice(0, 120)}`;
 30  }
 31  function mergeTimelinePosts(existing, batch) {
 32      const seen = new Set(existing.map(post => post.id));
 33      const merged = [...existing];
 34      for (const rawPost of batch) {
 35          const post = {
 36              id: buildPostId(rawPost),
 37              author: normalizeWhitespace(rawPost.author),
 38              author_url: normalizeWhitespace(rawPost.author_url),
 39              headline: normalizeWhitespace(rawPost.headline),
 40              text: normalizeWhitespace(rawPost.text),
 41              posted_at: normalizeWhitespace(rawPost.posted_at),
 42              reactions: Number(rawPost.reactions) || 0,
 43              comments: Number(rawPost.comments) || 0,
 44              url: normalizeWhitespace(rawPost.url),
 45          };
 46          if (!post.author || !post.text)
 47              continue;
 48          if (seen.has(post.id))
 49              continue;
 50          seen.add(post.id);
 51          merged.push(post);
 52      }
 53      return merged;
 54  }
 55  async function extractVisiblePosts(page) {
 56      return page.evaluate(`(function () {
 57      function normalize(value) {
 58        return String(value || '').replace(/\\s+/g, ' ').trim();
 59      }
 60      function textOf(root, selector) {
 61        var el = root.querySelector(selector);
 62        return el ? el.textContent : '';
 63      }
 64      function hrefOf(root, selector) {
 65        var el = root.querySelector(selector);
 66        return el && el.href ? el.href : '';
 67      }
 68      function attrOf(root, selector, attr) {
 69        var el = root.querySelector(selector);
 70        return el ? el.getAttribute(attr) : '';
 71      }
 72      function cleanTimestamp(value) {
 73        return normalize(String(value || '').replace(/[•.]/g, ' '));
 74      }
 75      function parseMetric(value) {
 76        var raw = normalize(value).toLowerCase();
 77        var match;
 78        var base;
 79        var suffix;
 80        if (!raw) return 0;
 81        match = raw.replace(/,/g, '').match(/(\\d+(?:\\.\\d+)?)(k|m)?/i);
 82        if (!match) return 0;
 83        base = Number(match[1]);
 84        suffix = (match[2] || '').toLowerCase();
 85        if (suffix === 'k') return Math.round(base * 1000);
 86        if (suffix === 'm') return Math.round(base * 1000000);
 87        return Math.round(base);
 88      }
 89      function splitBlocks(text) {
 90        var lines = String(text || '').split('\\n');
 91        var blocks = [];
 92        var current = [];
 93        var i;
 94        var line;
 95        for (i = 0; i < lines.length; i += 1) {
 96          line = normalize(lines[i]);
 97          if (!line) {
 98            if (current.length) {
 99              blocks.push(normalize(current.join(' ')));
100              current = [];
101            }
102            continue;
103          }
104          current.push(line);
105        }
106        if (current.length) blocks.push(normalize(current.join(' ')));
107        return blocks;
108      }
109      function looksLikeTimestamp(value) {
110        var lower = String(value || '').toLowerCase();
111        return /^\\d+\\s*(s|m|h|d|w|mo|yr|min)(\\s*[•.])?$/i.test(lower);
112      }
113      function looksLikeBadge(value) {
114        var lower = String(value || '').toLowerCase();
115        return String(value || '').indexOf('•') === 0
116          || lower === '1st'
117          || lower === '2nd'
118          || lower === '3rd'
119          || lower === 'degree connection';
120      }
121      function looksLikeAction(value) {
122        return /^(follow|send message|connect|visit my website|view my newsletter|subscribe)$/i.test((value || '').toLowerCase());
123      }
124      function looksLikeCta(value) {
125        return /^(book an appointment|view my services|visit my website|view my newsletter|subscribe|learn more|contact us)$/i.test((value || '').toLowerCase());
126      }
127      function looksLikeEngagement(value) {
128        return /(reactions?|comments?|reposts?)/i.test(String(value || ''));
129      }
130      function looksLikeFooterAction(value) {
131        return /^(like|comment|repost|send|reply|load more comments)$/i.test((value || '').toLowerCase());
132      }
133      function findActivityUrn(root) {
134        var elements = [root].concat(Array.from(root.querySelectorAll('*')));
135        var i;
136        var j;
137        var attrs;
138        var value;
139        var match;
140        for (i = 0; i < elements.length; i += 1) {
141          attrs = Array.from(elements[i].attributes || []);
142          for (j = 0; j < attrs.length; j += 1) {
143            value = String(attrs[j].value || '');
144            match = value.match(/urn:li:activity:\\d+/);
145            if (match) return match[0];
146          }
147        }
148        return '';
149      }
150      function parseReactionCount(root, blocks) {
151        var direct = textOf(root, '.social-details-social-counts__reactions-count');
152        var rootText = String(root.innerText || '');
153        var i;
154        var value;
155        value = rootText.match(/and\\s+(\\d[\\d,]*)\\s+others\\s+reacted/i);
156        if (value) return parseMetric(value[1]) + 1;
157        value = rootText.match(/and\\s+(\\d[\\d,]*)\\s+others(?!\\s+comments?)(?!\\s+reposts?)/i);
158        if (value) return parseMetric(value[1]) + 1;
159        value = rootText.match(/(\\d[\\d,]*)\\s+reactions?/i);
160        if (value) return parseMetric(value[0]);
161        if (direct) return parseMetric(direct);
162        for (i = 0; i < blocks.length; i += 1) {
163          value = blocks[i];
164          if (/and\\s+\\d[\\d,]*\\s+others(?!\\s+comments?)(?!\\s+reposts?)/i.test(value)) {
165            return parseMetric(value) + 1;
166          }
167          if (/reactions?/i.test(value)) return parseMetric(value);
168          if (/and\\s+\\d+[\\d,]*\\s+others\\s+reacted/i.test(value)) return parseMetric(value) + 1;
169        }
170        return 0;
171      }
172      function parseCommentCount(blocks) {
173        var i;
174        var text = blocks.join(' ');
175        var match = text.match(/(\\d[\\d,]*)\\s+comments?/i);
176        if (match) return parseMetric(match[0]);
177        for (i = 0; i < blocks.length; i += 1) {
178          if (/comments?/i.test(blocks[i])) return parseMetric(blocks[i]);
179        }
180        return 0;
181      }
182      function selectProfileLink(root, author) {
183        var links = Array.from(root.querySelectorAll('a[href*="/in/"], a[href*="/company/"]'));
184        var normalizedAuthor = normalize(author).toLowerCase();
185        var i;
186        var label;
187        for (i = 0; i < links.length; i += 1) {
188          label = normalize(links[i].textContent || links[i].getAttribute('aria-label')).toLowerCase();
189          if (!links[i].href) continue;
190          if (normalizedAuthor && label.indexOf(normalizedAuthor) >= 0) return links[i];
191        }
192        return links[0] || null;
193      }
194      function selectProfileUrl(root, author) {
195        var link = selectProfileLink(root, author);
196        return link && link.href ? link.href : '';
197      }
198      function parseActorLinkMeta(root, author) {
199        var link = selectProfileLink(root, author);
200        var text = normalize(link ? link.textContent : '');
201        var normalizedAuthor = normalize(author);
202        var match;
203        var rest;
204        var headline = '';
205        var postedAt = '';
206        if (!text || !normalizedAuthor) return { headline: '', postedAt: '' };
207        if (text.indexOf(normalizedAuthor) === 0) {
208          rest = normalize(text.slice(normalizedAuthor.length));
209        } else {
210          rest = text;
211        }
212        rest = normalize(rest.replace(/^[•·]\\s*(1st|2nd|3rd\\+?|3rd|degree connection)/i, ''));
213        match = rest.match(/(\\d+\\s*(?:s|m|h|d|w|mo|yr|min))\\s*[•·]?$/i);
214        if (match) {
215          postedAt = cleanTimestamp(match[1]);
216          headline = normalize(rest.slice(0, rest.length - match[0].length));
217        } else {
218          headline = rest;
219        }
220        headline = normalize(headline.replace(/^(book an appointment|view my services|visit my website|view my newsletter)\\s*/i, ''));
221        return { headline: headline, postedAt: postedAt };
222      }
223      function stripBodyTail(value) {
224        return normalize(String(value || '')
225          .replace(/\\s+\\d[\\d,]*\\s+reactions?[\\s\\S]*$/i, '')
226          .replace(/\\s+\\d[\\d,]*\\s+comments?[\\s\\S]*$/i, '')
227          .replace(/\\s+[A-Z][^\\n]+\\s+and\\s+\\d[\\d,]*\\s+others\\s+reacted[\\s\\S]*$/i, '')
228          .replace(/\\s+Like\\s+Comment\\s+Repost\\s+Send[\\s\\S]*$/i, '')
229          .replace(/\\s+Reaction button state:[\\s\\S]*$/i, '')
230          .replace(/^\\d+\\s*(?:s|m|h|d|w|mo|yr|min)\\s*[•.]?\\s*Follow\\s+/i, '')
231        );
232      }
233      function parseActorMeta(root) {
234        var actorLink = root.querySelector('a[href*="/in/"], a[href*="/company/"]');
235        var actorText = normalize(actorLink ? actorLink.textContent : '');
236        var author = '';
237        var headline = '';
238        var postedAt = '';
239        var match;
240        if (actorText) {
241          match = actorText.match(/^(.+?)\\s+[•·]\\s+(1st|2nd|3rd\\+?|3rd|degree connection)(.*)$/i);
242          if (match) {
243            author = normalize(match[1]);
244            actorText = normalize(match[3]);
245          }
246        }
247        match = actorText.match(/(.+?)\\s+(\\d+\\s*(?:s|m|h|d|w|mo|yr|min))\\s*[•·]?$/i);
248        if (match) {
249          headline = normalize(match[1]);
250          postedAt = cleanTimestamp(match[2]);
251        } else if (actorText) {
252          headline = actorText;
253        }
254        return {
255          author: author,
256          headline: headline,
257          postedAt: postedAt,
258          authorUrl: actorLink && actorLink.href ? actorLink.href : '',
259        };
260      }
261      function extractFromListItem(root) {
262        var blocks = splitBlocks(root.innerText || '');
263        var filtered = [];
264        var i;
265        var value;
266        var author = '';
267        var authorUrl = '';
268        var headline = '';
269        var postedAt = '';
270        var text = '';
271        var bodyStart = -1;
272        var permalink;
273        var url;
274        var reactions;
275        var comments;
276        var endIndex = -1;
277        var urn;
278  
279        if (blocks.length < 5) return null;
280        if (blocks[0] !== 'Feed post') return null;
281  
282        for (i = 1; i < blocks.length; i += 1) {
283          value = blocks[i];
284          if (!value) continue;
285          if (/commented on this|reposted this|liked this|suggested/i.test(value)) continue;
286          filtered.push(value);
287        }
288        if (filtered.length < 4) return null;
289  
290        for (i = 0; i < filtered.length; i += 1) {
291          value = filtered[i];
292          if (!author && !looksLikeBadge(value) && !looksLikeAction(value) && !looksLikeTimestamp(value)) {
293            author = value;
294            continue;
295          }
296          if (author && !headline && !looksLikeBadge(value) && !looksLikeAction(value) && !looksLikeTimestamp(value) && !looksLikeCta(value)) {
297            headline = value;
298            continue;
299          }
300          if (!postedAt && looksLikeTimestamp(value)) {
301            postedAt = cleanTimestamp(value);
302            continue;
303          }
304        }
305  
306        if (!author) return null;
307        authorUrl = selectProfileUrl(root, author);
308        if (!headline || !postedAt) {
309          var actorMeta = parseActorLinkMeta(root, author);
310          if (!headline && actorMeta.headline) headline = actorMeta.headline;
311          if (!postedAt && actorMeta.postedAt) postedAt = actorMeta.postedAt;
312        }
313  
314        for (i = 0; i < filtered.length; i += 1) {
315          value = filtered[i];
316          if (looksLikeAction(value)) {
317            bodyStart = i + 1;
318            break;
319          }
320        }
321        if (bodyStart < 0 && postedAt) {
322          bodyStart = filtered.indexOf(postedAt) + 1;
323        }
324        if (bodyStart < 0) bodyStart = Math.min(filtered.length, headline ? 2 : 1);
325  
326        for (i = bodyStart; i < filtered.length; i += 1) {
327          value = filtered[i];
328          if (looksLikeEngagement(value) || looksLikeFooterAction(value)) {
329            endIndex = i;
330            break;
331          }
332        }
333        if (endIndex < 0) endIndex = filtered.length;
334  
335        text = stripBodyTail(filtered.slice(bodyStart, endIndex).join('\\n\\n'));
336        if (!text) return null;
337  
338        permalink = root.querySelector('a[href*="/feed/update/"], a[href*="/posts/"], a[href*="/pulse/"]');
339        url = permalink ? permalink.href : '';
340        urn = findActivityUrn(root);
341        if (!url && urn) url = 'https://www.linkedin.com/feed/update/' + urn + '/';
342        reactions = parseReactionCount(root, filtered);
343        comments = parseCommentCount(filtered);
344  
345        return {
346          id: url || (author + '::' + postedAt + '::' + text.slice(0, 120)),
347          author: author,
348          author_url: authorUrl,
349          headline: headline,
350          text: text,
351          posted_at: postedAt,
352          reactions: reactions,
353          comments: comments,
354          url: url,
355        };
356      }
357      function commentMetric(root) {
358        var links = Array.from(root.querySelectorAll('button, a'));
359        var i;
360        var label;
361        for (i = 0; i < links.length; i += 1) {
362          label = normalize(links[i].textContent || links[i].getAttribute('aria-label'));
363          if (/comment/i.test(label)) return parseMetric(label);
364        }
365        return 0;
366      }
367  
368      var currentUrl = window.location.href;
369      var path = String(window.location.pathname || '');
370      var loginRequired = path.indexOf('/login') >= 0
371        || path.indexOf('/checkpoint/') >= 0
372        || Boolean(document.querySelector('input[name="session_key"], form.login__form'));
373      var moreButtons = Array.from(document.querySelectorAll('button, a[role="button"]'))
374        .filter(function (el) {
375          return /see more|more/i.test(normalize(el.textContent))
376            || /see more|more/i.test(normalize(el.getAttribute('aria-label')));
377        })
378        .slice(0, 8);
379      var cards = Array.from(document.querySelectorAll('article, .feed-shared-update-v2, .occludable-update, [role="listitem"]'));
380      var seen = new Set();
381      var posts = [];
382      var i;
383      var card;
384      var root;
385      var author;
386      var headline;
387      var text;
388      var postedAt;
389      var permalink;
390      var url;
391      var reactions;
392      var comments;
393  
394      for (i = 0; i < moreButtons.length; i += 1) {
395        try { moreButtons[i].click(); } catch (err) {}
396      }
397  
398      for (i = 0; i < cards.length; i += 1) {
399        card = cards[i];
400        root = card.closest('article, .feed-shared-update-v2, .occludable-update, [role="listitem"]') || card;
401        if (!root || seen.has(root)) continue;
402        seen.add(root);
403  
404        if (String(root.getAttribute('role') || '') === 'listitem') {
405          var extracted = extractFromListItem(root);
406          if (extracted) posts.push(extracted);
407          continue;
408        }
409  
410        author = normalize(
411          textOf(root, '.update-components-actor__title span[dir="ltr"]')
412          || textOf(root, '.update-components-actor__title')
413          || textOf(root, '[data-control-name="actor"] span[dir="ltr"]')
414          || textOf(root, '[data-control-name="actor"]')
415        );
416        headline = normalize(
417          textOf(root, '.update-components-actor__description')
418          || textOf(root, '.update-components-actor__sub-description')
419        );
420        text = normalize(
421          textOf(root, '.update-components-text span[dir="ltr"]')
422          || textOf(root, '.update-components-text')
423          || textOf(root, '.feed-shared-inline-show-more-text span[dir="ltr"]')
424          || textOf(root, '.feed-shared-inline-show-more-text')
425          || textOf(root, '[data-test-id="main-feed-activity-card"] .break-words')
426        );
427        postedAt = normalize(
428          textOf(root, '.update-components-actor__sub-description a')
429          || textOf(root, '.update-components-actor__sub-description span[aria-hidden="true"]')
430          || textOf(root, 'time')
431        );
432        permalink = root.querySelector('a[href*="/feed/update/"], a[href*="/posts/"], a[href*="/pulse/"]');
433        url = permalink ? permalink.href : '';
434        if (url && url.indexOf('/') === 0) url = new URL(url, currentUrl).toString();
435        reactions = parseMetric(
436          textOf(root, '.social-details-social-counts__reactions-count')
437          || attrOf(root, '[aria-label*="reaction"]', 'aria-label')
438          || attrOf(root, '[aria-label*="like"]', 'aria-label')
439        );
440        comments = commentMetric(root);
441  
442        if (!author || !text) continue;
443  
444        posts.push({
445          id: url || (author + '::' + postedAt + '::' + text.slice(0, 120)),
446          author: author,
447          author_url: hrefOf(root, 'a[href*="/in/"], a[href*="/company/"]'),
448          headline: headline,
449          text: text,
450          posted_at: postedAt,
451          reactions: reactions,
452          comments: comments,
453          url: url,
454        });
455      }
456  
457      return { loginRequired: loginRequired, posts: posts };
458    })()`);
459  }
460  cli({
461      site: 'linkedin',
462      name: 'timeline',
463      description: 'Read LinkedIn home timeline posts',
464      domain: 'www.linkedin.com',
465      strategy: Strategy.COOKIE,
466      browser: true,
467      args: [
468          { name: 'limit', type: 'int', default: 20, help: 'Number of posts to return (max 100)' },
469      ],
470      columns: ['rank', 'author', 'author_url', 'headline', 'text', 'posted_at', 'reactions', 'comments', 'url'],
471      func: async (page, kwargs) => {
472          const limit = Math.max(1, Math.min(kwargs.limit ?? 20, 100));
473          await page.goto('https://www.linkedin.com/feed/');
474          await page.wait(4);
475          let posts = [];
476          let sawLoginWall = false;
477          for (let i = 0; i < 6 && posts.length < limit; i++) {
478              const batch = await extractVisiblePosts(page);
479              if (batch?.loginRequired)
480                  sawLoginWall = true;
481              posts = mergeTimelinePosts(posts, Array.isArray(batch?.posts) ? batch.posts : []);
482              if (posts.length >= limit)
483                  break;
484              await page.autoScroll({ times: 1, delayMs: 1200 });
485              await page.wait(1);
486          }
487          if (sawLoginWall && posts.length === 0) {
488              throw new AuthRequiredError('linkedin.com', 'LinkedIn timeline requires an active signed-in browser session');
489          }
490          if (posts.length === 0) {
491              throw new EmptyResultError('linkedin timeline', 'Make sure your LinkedIn home feed is visible in the browser.');
492          }
493          return posts.slice(0, limit).map((post, index) => ({
494              rank: index + 1,
495              ...post,
496          }));
497      },
498  });
499  export const __test__ = {
500      parseMetric,
501      buildPostId,
502      mergeTimelinePosts,
503  };