timeline.js
1 import { cli, Strategy } from '@jackwener/opencli/registry'; 2 import { AuthRequiredError, EmptyResultError } from '@jackwener/opencli/errors'; 3 function normalizeWhitespace(value) { 4 return String(value ?? '').replace(/\s+/g, ' ').trim(); 5 } 6 function parseMetric(value) { 7 const raw = normalizeWhitespace(value).toLowerCase(); 8 if (!raw) 9 return 0; 10 const compact = raw.replace(/,/g, ''); 11 const match = compact.match(/(\d+(?:\.\d+)?)(k|m)?/i); 12 if (!match) 13 return 0; 14 const base = Number(match[1]); 15 const suffix = (match[2] || '').toLowerCase(); 16 if (suffix === 'k') 17 return Math.round(base * 1000); 18 if (suffix === 'm') 19 return Math.round(base * 1000000); 20 return Math.round(base); 21 } 22 function buildPostId(post) { 23 const url = normalizeWhitespace(post.url); 24 if (url) 25 return url; 26 const author = normalizeWhitespace(post.author); 27 const text = normalizeWhitespace(post.text); 28 const postedAt = normalizeWhitespace(post.posted_at); 29 return `${author}::${postedAt}::${text.slice(0, 120)}`; 30 } 31 function mergeTimelinePosts(existing, batch) { 32 const seen = new Set(existing.map(post => post.id)); 33 const merged = [...existing]; 34 for (const rawPost of batch) { 35 const post = { 36 id: buildPostId(rawPost), 37 author: normalizeWhitespace(rawPost.author), 38 author_url: normalizeWhitespace(rawPost.author_url), 39 headline: normalizeWhitespace(rawPost.headline), 40 text: normalizeWhitespace(rawPost.text), 41 posted_at: normalizeWhitespace(rawPost.posted_at), 42 reactions: Number(rawPost.reactions) || 0, 43 comments: Number(rawPost.comments) || 0, 44 url: normalizeWhitespace(rawPost.url), 45 }; 46 if (!post.author || !post.text) 47 continue; 48 if (seen.has(post.id)) 49 continue; 50 seen.add(post.id); 51 merged.push(post); 52 } 53 return merged; 54 } 55 async function extractVisiblePosts(page) { 56 return page.evaluate(`(function () { 57 function normalize(value) { 58 return String(value || '').replace(/\\s+/g, ' ').trim(); 59 } 60 function textOf(root, selector) { 61 var el = root.querySelector(selector); 62 return el ? el.textContent : ''; 63 } 64 function hrefOf(root, selector) { 65 var el = root.querySelector(selector); 66 return el && el.href ? el.href : ''; 67 } 68 function attrOf(root, selector, attr) { 69 var el = root.querySelector(selector); 70 return el ? el.getAttribute(attr) : ''; 71 } 72 function cleanTimestamp(value) { 73 return normalize(String(value || '').replace(/[•.]/g, ' ')); 74 } 75 function parseMetric(value) { 76 var raw = normalize(value).toLowerCase(); 77 var match; 78 var base; 79 var suffix; 80 if (!raw) return 0; 81 match = raw.replace(/,/g, '').match(/(\\d+(?:\\.\\d+)?)(k|m)?/i); 82 if (!match) return 0; 83 base = Number(match[1]); 84 suffix = (match[2] || '').toLowerCase(); 85 if (suffix === 'k') return Math.round(base * 1000); 86 if (suffix === 'm') return Math.round(base * 1000000); 87 return Math.round(base); 88 } 89 function splitBlocks(text) { 90 var lines = String(text || '').split('\\n'); 91 var blocks = []; 92 var current = []; 93 var i; 94 var line; 95 for (i = 0; i < lines.length; i += 1) { 96 line = normalize(lines[i]); 97 if (!line) { 98 if (current.length) { 99 blocks.push(normalize(current.join(' '))); 100 current = []; 101 } 102 continue; 103 } 104 current.push(line); 105 } 106 if (current.length) blocks.push(normalize(current.join(' '))); 107 return blocks; 108 } 109 function looksLikeTimestamp(value) { 110 var lower = String(value || '').toLowerCase(); 111 return /^\\d+\\s*(s|m|h|d|w|mo|yr|min)(\\s*[•.])?$/i.test(lower); 112 } 113 function looksLikeBadge(value) { 114 var lower = String(value || '').toLowerCase(); 115 return String(value || '').indexOf('•') === 0 116 || lower === '1st' 117 || lower === '2nd' 118 || lower === '3rd' 119 || lower === 'degree connection'; 120 } 121 function looksLikeAction(value) { 122 return /^(follow|send message|connect|visit my website|view my newsletter|subscribe)$/i.test((value || '').toLowerCase()); 123 } 124 function looksLikeCta(value) { 125 return /^(book an appointment|view my services|visit my website|view my newsletter|subscribe|learn more|contact us)$/i.test((value || '').toLowerCase()); 126 } 127 function looksLikeEngagement(value) { 128 return /(reactions?|comments?|reposts?)/i.test(String(value || '')); 129 } 130 function looksLikeFooterAction(value) { 131 return /^(like|comment|repost|send|reply|load more comments)$/i.test((value || '').toLowerCase()); 132 } 133 function findActivityUrn(root) { 134 var elements = [root].concat(Array.from(root.querySelectorAll('*'))); 135 var i; 136 var j; 137 var attrs; 138 var value; 139 var match; 140 for (i = 0; i < elements.length; i += 1) { 141 attrs = Array.from(elements[i].attributes || []); 142 for (j = 0; j < attrs.length; j += 1) { 143 value = String(attrs[j].value || ''); 144 match = value.match(/urn:li:activity:\\d+/); 145 if (match) return match[0]; 146 } 147 } 148 return ''; 149 } 150 function parseReactionCount(root, blocks) { 151 var direct = textOf(root, '.social-details-social-counts__reactions-count'); 152 var rootText = String(root.innerText || ''); 153 var i; 154 var value; 155 value = rootText.match(/and\\s+(\\d[\\d,]*)\\s+others\\s+reacted/i); 156 if (value) return parseMetric(value[1]) + 1; 157 value = rootText.match(/and\\s+(\\d[\\d,]*)\\s+others(?!\\s+comments?)(?!\\s+reposts?)/i); 158 if (value) return parseMetric(value[1]) + 1; 159 value = rootText.match(/(\\d[\\d,]*)\\s+reactions?/i); 160 if (value) return parseMetric(value[0]); 161 if (direct) return parseMetric(direct); 162 for (i = 0; i < blocks.length; i += 1) { 163 value = blocks[i]; 164 if (/and\\s+\\d[\\d,]*\\s+others(?!\\s+comments?)(?!\\s+reposts?)/i.test(value)) { 165 return parseMetric(value) + 1; 166 } 167 if (/reactions?/i.test(value)) return parseMetric(value); 168 if (/and\\s+\\d+[\\d,]*\\s+others\\s+reacted/i.test(value)) return parseMetric(value) + 1; 169 } 170 return 0; 171 } 172 function parseCommentCount(blocks) { 173 var i; 174 var text = blocks.join(' '); 175 var match = text.match(/(\\d[\\d,]*)\\s+comments?/i); 176 if (match) return parseMetric(match[0]); 177 for (i = 0; i < blocks.length; i += 1) { 178 if (/comments?/i.test(blocks[i])) return parseMetric(blocks[i]); 179 } 180 return 0; 181 } 182 function selectProfileLink(root, author) { 183 var links = Array.from(root.querySelectorAll('a[href*="/in/"], a[href*="/company/"]')); 184 var normalizedAuthor = normalize(author).toLowerCase(); 185 var i; 186 var label; 187 for (i = 0; i < links.length; i += 1) { 188 label = normalize(links[i].textContent || links[i].getAttribute('aria-label')).toLowerCase(); 189 if (!links[i].href) continue; 190 if (normalizedAuthor && label.indexOf(normalizedAuthor) >= 0) return links[i]; 191 } 192 return links[0] || null; 193 } 194 function selectProfileUrl(root, author) { 195 var link = selectProfileLink(root, author); 196 return link && link.href ? link.href : ''; 197 } 198 function parseActorLinkMeta(root, author) { 199 var link = selectProfileLink(root, author); 200 var text = normalize(link ? link.textContent : ''); 201 var normalizedAuthor = normalize(author); 202 var match; 203 var rest; 204 var headline = ''; 205 var postedAt = ''; 206 if (!text || !normalizedAuthor) return { headline: '', postedAt: '' }; 207 if (text.indexOf(normalizedAuthor) === 0) { 208 rest = normalize(text.slice(normalizedAuthor.length)); 209 } else { 210 rest = text; 211 } 212 rest = normalize(rest.replace(/^[•·]\\s*(1st|2nd|3rd\\+?|3rd|degree connection)/i, '')); 213 match = rest.match(/(\\d+\\s*(?:s|m|h|d|w|mo|yr|min))\\s*[•·]?$/i); 214 if (match) { 215 postedAt = cleanTimestamp(match[1]); 216 headline = normalize(rest.slice(0, rest.length - match[0].length)); 217 } else { 218 headline = rest; 219 } 220 headline = normalize(headline.replace(/^(book an appointment|view my services|visit my website|view my newsletter)\\s*/i, '')); 221 return { headline: headline, postedAt: postedAt }; 222 } 223 function stripBodyTail(value) { 224 return normalize(String(value || '') 225 .replace(/\\s+\\d[\\d,]*\\s+reactions?[\\s\\S]*$/i, '') 226 .replace(/\\s+\\d[\\d,]*\\s+comments?[\\s\\S]*$/i, '') 227 .replace(/\\s+[A-Z][^\\n]+\\s+and\\s+\\d[\\d,]*\\s+others\\s+reacted[\\s\\S]*$/i, '') 228 .replace(/\\s+Like\\s+Comment\\s+Repost\\s+Send[\\s\\S]*$/i, '') 229 .replace(/\\s+Reaction button state:[\\s\\S]*$/i, '') 230 .replace(/^\\d+\\s*(?:s|m|h|d|w|mo|yr|min)\\s*[•.]?\\s*Follow\\s+/i, '') 231 ); 232 } 233 function parseActorMeta(root) { 234 var actorLink = root.querySelector('a[href*="/in/"], a[href*="/company/"]'); 235 var actorText = normalize(actorLink ? actorLink.textContent : ''); 236 var author = ''; 237 var headline = ''; 238 var postedAt = ''; 239 var match; 240 if (actorText) { 241 match = actorText.match(/^(.+?)\\s+[•·]\\s+(1st|2nd|3rd\\+?|3rd|degree connection)(.*)$/i); 242 if (match) { 243 author = normalize(match[1]); 244 actorText = normalize(match[3]); 245 } 246 } 247 match = actorText.match(/(.+?)\\s+(\\d+\\s*(?:s|m|h|d|w|mo|yr|min))\\s*[•·]?$/i); 248 if (match) { 249 headline = normalize(match[1]); 250 postedAt = cleanTimestamp(match[2]); 251 } else if (actorText) { 252 headline = actorText; 253 } 254 return { 255 author: author, 256 headline: headline, 257 postedAt: postedAt, 258 authorUrl: actorLink && actorLink.href ? actorLink.href : '', 259 }; 260 } 261 function extractFromListItem(root) { 262 var blocks = splitBlocks(root.innerText || ''); 263 var filtered = []; 264 var i; 265 var value; 266 var author = ''; 267 var authorUrl = ''; 268 var headline = ''; 269 var postedAt = ''; 270 var text = ''; 271 var bodyStart = -1; 272 var permalink; 273 var url; 274 var reactions; 275 var comments; 276 var endIndex = -1; 277 var urn; 278 279 if (blocks.length < 5) return null; 280 if (blocks[0] !== 'Feed post') return null; 281 282 for (i = 1; i < blocks.length; i += 1) { 283 value = blocks[i]; 284 if (!value) continue; 285 if (/commented on this|reposted this|liked this|suggested/i.test(value)) continue; 286 filtered.push(value); 287 } 288 if (filtered.length < 4) return null; 289 290 for (i = 0; i < filtered.length; i += 1) { 291 value = filtered[i]; 292 if (!author && !looksLikeBadge(value) && !looksLikeAction(value) && !looksLikeTimestamp(value)) { 293 author = value; 294 continue; 295 } 296 if (author && !headline && !looksLikeBadge(value) && !looksLikeAction(value) && !looksLikeTimestamp(value) && !looksLikeCta(value)) { 297 headline = value; 298 continue; 299 } 300 if (!postedAt && looksLikeTimestamp(value)) { 301 postedAt = cleanTimestamp(value); 302 continue; 303 } 304 } 305 306 if (!author) return null; 307 authorUrl = selectProfileUrl(root, author); 308 if (!headline || !postedAt) { 309 var actorMeta = parseActorLinkMeta(root, author); 310 if (!headline && actorMeta.headline) headline = actorMeta.headline; 311 if (!postedAt && actorMeta.postedAt) postedAt = actorMeta.postedAt; 312 } 313 314 for (i = 0; i < filtered.length; i += 1) { 315 value = filtered[i]; 316 if (looksLikeAction(value)) { 317 bodyStart = i + 1; 318 break; 319 } 320 } 321 if (bodyStart < 0 && postedAt) { 322 bodyStart = filtered.indexOf(postedAt) + 1; 323 } 324 if (bodyStart < 0) bodyStart = Math.min(filtered.length, headline ? 2 : 1); 325 326 for (i = bodyStart; i < filtered.length; i += 1) { 327 value = filtered[i]; 328 if (looksLikeEngagement(value) || looksLikeFooterAction(value)) { 329 endIndex = i; 330 break; 331 } 332 } 333 if (endIndex < 0) endIndex = filtered.length; 334 335 text = stripBodyTail(filtered.slice(bodyStart, endIndex).join('\\n\\n')); 336 if (!text) return null; 337 338 permalink = root.querySelector('a[href*="/feed/update/"], a[href*="/posts/"], a[href*="/pulse/"]'); 339 url = permalink ? permalink.href : ''; 340 urn = findActivityUrn(root); 341 if (!url && urn) url = 'https://www.linkedin.com/feed/update/' + urn + '/'; 342 reactions = parseReactionCount(root, filtered); 343 comments = parseCommentCount(filtered); 344 345 return { 346 id: url || (author + '::' + postedAt + '::' + text.slice(0, 120)), 347 author: author, 348 author_url: authorUrl, 349 headline: headline, 350 text: text, 351 posted_at: postedAt, 352 reactions: reactions, 353 comments: comments, 354 url: url, 355 }; 356 } 357 function commentMetric(root) { 358 var links = Array.from(root.querySelectorAll('button, a')); 359 var i; 360 var label; 361 for (i = 0; i < links.length; i += 1) { 362 label = normalize(links[i].textContent || links[i].getAttribute('aria-label')); 363 if (/comment/i.test(label)) return parseMetric(label); 364 } 365 return 0; 366 } 367 368 var currentUrl = window.location.href; 369 var path = String(window.location.pathname || ''); 370 var loginRequired = path.indexOf('/login') >= 0 371 || path.indexOf('/checkpoint/') >= 0 372 || Boolean(document.querySelector('input[name="session_key"], form.login__form')); 373 var moreButtons = Array.from(document.querySelectorAll('button, a[role="button"]')) 374 .filter(function (el) { 375 return /see more|more/i.test(normalize(el.textContent)) 376 || /see more|more/i.test(normalize(el.getAttribute('aria-label'))); 377 }) 378 .slice(0, 8); 379 var cards = Array.from(document.querySelectorAll('article, .feed-shared-update-v2, .occludable-update, [role="listitem"]')); 380 var seen = new Set(); 381 var posts = []; 382 var i; 383 var card; 384 var root; 385 var author; 386 var headline; 387 var text; 388 var postedAt; 389 var permalink; 390 var url; 391 var reactions; 392 var comments; 393 394 for (i = 0; i < moreButtons.length; i += 1) { 395 try { moreButtons[i].click(); } catch (err) {} 396 } 397 398 for (i = 0; i < cards.length; i += 1) { 399 card = cards[i]; 400 root = card.closest('article, .feed-shared-update-v2, .occludable-update, [role="listitem"]') || card; 401 if (!root || seen.has(root)) continue; 402 seen.add(root); 403 404 if (String(root.getAttribute('role') || '') === 'listitem') { 405 var extracted = extractFromListItem(root); 406 if (extracted) posts.push(extracted); 407 continue; 408 } 409 410 author = normalize( 411 textOf(root, '.update-components-actor__title span[dir="ltr"]') 412 || textOf(root, '.update-components-actor__title') 413 || textOf(root, '[data-control-name="actor"] span[dir="ltr"]') 414 || textOf(root, '[data-control-name="actor"]') 415 ); 416 headline = normalize( 417 textOf(root, '.update-components-actor__description') 418 || textOf(root, '.update-components-actor__sub-description') 419 ); 420 text = normalize( 421 textOf(root, '.update-components-text span[dir="ltr"]') 422 || textOf(root, '.update-components-text') 423 || textOf(root, '.feed-shared-inline-show-more-text span[dir="ltr"]') 424 || textOf(root, '.feed-shared-inline-show-more-text') 425 || textOf(root, '[data-test-id="main-feed-activity-card"] .break-words') 426 ); 427 postedAt = normalize( 428 textOf(root, '.update-components-actor__sub-description a') 429 || textOf(root, '.update-components-actor__sub-description span[aria-hidden="true"]') 430 || textOf(root, 'time') 431 ); 432 permalink = root.querySelector('a[href*="/feed/update/"], a[href*="/posts/"], a[href*="/pulse/"]'); 433 url = permalink ? permalink.href : ''; 434 if (url && url.indexOf('/') === 0) url = new URL(url, currentUrl).toString(); 435 reactions = parseMetric( 436 textOf(root, '.social-details-social-counts__reactions-count') 437 || attrOf(root, '[aria-label*="reaction"]', 'aria-label') 438 || attrOf(root, '[aria-label*="like"]', 'aria-label') 439 ); 440 comments = commentMetric(root); 441 442 if (!author || !text) continue; 443 444 posts.push({ 445 id: url || (author + '::' + postedAt + '::' + text.slice(0, 120)), 446 author: author, 447 author_url: hrefOf(root, 'a[href*="/in/"], a[href*="/company/"]'), 448 headline: headline, 449 text: text, 450 posted_at: postedAt, 451 reactions: reactions, 452 comments: comments, 453 url: url, 454 }); 455 } 456 457 return { loginRequired: loginRequired, posts: posts }; 458 })()`); 459 } 460 cli({ 461 site: 'linkedin', 462 name: 'timeline', 463 description: 'Read LinkedIn home timeline posts', 464 domain: 'www.linkedin.com', 465 strategy: Strategy.COOKIE, 466 browser: true, 467 args: [ 468 { name: 'limit', type: 'int', default: 20, help: 'Number of posts to return (max 100)' }, 469 ], 470 columns: ['rank', 'author', 'author_url', 'headline', 'text', 'posted_at', 'reactions', 'comments', 'url'], 471 func: async (page, kwargs) => { 472 const limit = Math.max(1, Math.min(kwargs.limit ?? 20, 100)); 473 await page.goto('https://www.linkedin.com/feed/'); 474 await page.wait(4); 475 let posts = []; 476 let sawLoginWall = false; 477 for (let i = 0; i < 6 && posts.length < limit; i++) { 478 const batch = await extractVisiblePosts(page); 479 if (batch?.loginRequired) 480 sawLoginWall = true; 481 posts = mergeTimelinePosts(posts, Array.isArray(batch?.posts) ? batch.posts : []); 482 if (posts.length >= limit) 483 break; 484 await page.autoScroll({ times: 1, delayMs: 1200 }); 485 await page.wait(1); 486 } 487 if (sawLoginWall && posts.length === 0) { 488 throw new AuthRequiredError('linkedin.com', 'LinkedIn timeline requires an active signed-in browser session'); 489 } 490 if (posts.length === 0) { 491 throw new EmptyResultError('linkedin timeline', 'Make sure your LinkedIn home feed is visible in the browser.'); 492 } 493 return posts.slice(0, limit).map((post, index) => ({ 494 rank: index + 1, 495 ...post, 496 })); 497 }, 498 }); 499 export const __test__ = { 500 parseMetric, 501 buildPostId, 502 mergeTimelinePosts, 503 };