article.js
1 import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; 2 import { cli, Strategy } from '@jackwener/opencli/registry'; 3 import { resolveTwitterQueryId } from './shared.js'; 4 const TWEET_RESULT_BY_REST_ID_QUERY_ID = '7xflPyRiUxGVbJd4uWmbfg'; 5 cli({ 6 site: 'twitter', 7 name: 'article', 8 description: 'Fetch a Twitter Article (long-form content) and export as Markdown', 9 domain: 'x.com', 10 strategy: Strategy.COOKIE, 11 browser: true, 12 args: [ 13 { name: 'tweet-id', type: 'string', positional: true, required: true, help: 'Tweet ID or URL containing the article' }, 14 ], 15 columns: ['title', 'author', 'content', 'url'], 16 func: async (page, kwargs) => { 17 // Extract tweet ID from URL if needed. 18 // Article URLs (x.com/i/article/{articleId}) use a different ID than 19 // tweet status URLs — the GraphQL endpoint needs the parent tweet ID. 20 let tweetId = kwargs['tweet-id']; 21 const isArticleUrl = /\/article\/\d+/.test(tweetId); 22 const urlMatch = tweetId.match(/\/(?:status|article)\/(\d+)/); 23 if (urlMatch) 24 tweetId = urlMatch[1]; 25 if (isArticleUrl) { 26 // Navigate to the article page and resolve the parent tweet ID from DOM 27 await page.goto(`https://x.com/i/article/${tweetId}`); 28 await page.wait(3); 29 const resolvedId = await page.evaluate(` 30 (function() { 31 var links = document.querySelectorAll('a[href*="/status/"]'); 32 for (var i = 0; i < links.length; i++) { 33 var m = links[i].href.match(/\\/status\\/(\\d+)/); 34 if (m) return m[1]; 35 } 36 var og = document.querySelector('meta[property="og:url"]'); 37 if (og && og.content) { 38 var m2 = og.content.match(/\\/status\\/(\\d+)/); 39 if (m2) return m2[1]; 40 } 41 return null; 42 })() 43 `); 44 if (!resolvedId || typeof resolvedId !== 'string') { 45 throw new CommandExecutionError(`Could not resolve article ${tweetId} to a tweet ID. The article page may not contain a linked tweet.`); 46 } 47 tweetId = resolvedId; 48 } 49 // Navigate to the tweet page for cookie context 50 await page.goto(`https://x.com/i/status/${tweetId}`); 51 await page.wait(3); 52 const queryId = await resolveTwitterQueryId(page, 'TweetResultByRestId', TWEET_RESULT_BY_REST_ID_QUERY_ID); 53 const result = await page.evaluate(` 54 async () => { 55 const tweetId = "${tweetId}"; 56 const ct0 = document.cookie.split(';').map(c=>c.trim()).find(c=>c.startsWith('ct0='))?.split('=')[1]; 57 if (!ct0) return {error: 'No ct0 cookie — not logged into x.com'}; 58 59 const bearer = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'; 60 const headers = { 61 'Authorization': 'Bearer ' + decodeURIComponent(bearer), 62 'X-Csrf-Token': ct0, 63 'X-Twitter-Auth-Type': 'OAuth2Session', 64 'X-Twitter-Active-User': 'yes' 65 }; 66 67 const variables = JSON.stringify({ 68 tweetId: tweetId, 69 withCommunity: false, 70 includePromotedContent: false, 71 withVoice: false, 72 }); 73 const features = JSON.stringify({ 74 longform_notetweets_consumption_enabled: true, 75 responsive_web_twitter_article_tweet_consumption_enabled: true, 76 longform_notetweets_rich_text_read_enabled: true, 77 longform_notetweets_inline_media_enabled: true, 78 articles_preview_enabled: true, 79 responsive_web_graphql_exclude_directive_enabled: true, 80 verified_phone_label_enabled: false, 81 }); 82 const fieldToggles = JSON.stringify({ 83 withArticleRichContentState: true, 84 withArticlePlainText: true, 85 }); 86 87 const url = '/i/api/graphql/' + ${JSON.stringify(queryId)} + '/TweetResultByRestId?variables=' 88 + encodeURIComponent(variables) 89 + '&features=' + encodeURIComponent(features) 90 + '&fieldToggles=' + encodeURIComponent(fieldToggles); 91 92 const resp = await fetch(url, {headers, credentials: 'include'}); 93 if (!resp.ok) return {error: 'HTTP ' + resp.status, hint: 'Tweet may not exist or queryId expired'}; 94 const d = await resp.json(); 95 96 const result = d.data?.tweetResult?.result; 97 if (!result) return {error: 'Article not found'}; 98 99 // Unwrap TweetWithVisibilityResults 100 const tw = result.tweet || result; 101 const legacy = tw.legacy || {}; 102 const user = tw.core?.user_results?.result; 103 const screenName = user?.legacy?.screen_name || user?.core?.screen_name || 'unknown'; 104 105 // Extract article content 106 const articleResults = tw.article?.article_results?.result; 107 if (!articleResults) { 108 // Fallback: return note_tweet text if present 109 const noteText = tw.note_tweet?.note_tweet_results?.result?.text; 110 if (noteText) { 111 return [{ 112 title: '(Note Tweet)', 113 author: screenName, 114 content: noteText, 115 url: 'https://x.com/' + screenName + '/status/' + tweetId, 116 }]; 117 } 118 return {error: 'Tweet ' + tweetId + ' has no article content'}; 119 } 120 121 const title = articleResults.title || '(Untitled)'; 122 const contentState = articleResults.content_state || {}; 123 const blocks = contentState.blocks || []; 124 125 // Convert draft.js blocks to Markdown 126 const parts = []; 127 let orderedCounter = 0; 128 for (const block of blocks) { 129 const blockType = block.type || 'unstyled'; 130 if (blockType === 'atomic') continue; 131 const text = block.text || ''; 132 if (!text) continue; 133 if (blockType !== 'ordered-list-item') orderedCounter = 0; 134 135 if (blockType === 'header-one') parts.push('# ' + text); 136 else if (blockType === 'header-two') parts.push('## ' + text); 137 else if (blockType === 'header-three') parts.push('### ' + text); 138 else if (blockType === 'blockquote') parts.push('> ' + text); 139 else if (blockType === 'unordered-list-item') parts.push('- ' + text); 140 else if (blockType === 'ordered-list-item') { 141 orderedCounter++; 142 parts.push(orderedCounter + '. ' + text); 143 } 144 else if (blockType === 'code-block') parts.push('\`\`\`\\n' + text + '\\n\`\`\`'); 145 else parts.push(text); 146 } 147 148 return [{ 149 title, 150 author: screenName, 151 content: parts.join('\\n\\n') || legacy.full_text || '', 152 url: 'https://x.com/' + screenName + '/status/' + tweetId, 153 }]; 154 } 155 `); 156 if (result?.error) { 157 if (String(result.error).includes('No ct0 cookie')) 158 throw new AuthRequiredError('x.com', result.error); 159 throw new CommandExecutionError(result.error + (result.hint ? ` (${result.hint})` : '')); 160 } 161 return result || []; 162 } 163 });