/ clis / twitter / article.js
article.js
  1  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
  2  import { cli, Strategy } from '@jackwener/opencli/registry';
  3  import { resolveTwitterQueryId } from './shared.js';
  4  const TWEET_RESULT_BY_REST_ID_QUERY_ID = '7xflPyRiUxGVbJd4uWmbfg';
  5  cli({
  6      site: 'twitter',
  7      name: 'article',
  8      description: 'Fetch a Twitter Article (long-form content) and export as Markdown',
  9      domain: 'x.com',
 10      strategy: Strategy.COOKIE,
 11      browser: true,
 12      args: [
 13          { name: 'tweet-id', type: 'string', positional: true, required: true, help: 'Tweet ID or URL containing the article' },
 14      ],
 15      columns: ['title', 'author', 'content', 'url'],
 16      func: async (page, kwargs) => {
 17          // Extract tweet ID from URL if needed.
 18          // Article URLs (x.com/i/article/{articleId}) use a different ID than
 19          // tweet status URLs — the GraphQL endpoint needs the parent tweet ID.
 20          let tweetId = kwargs['tweet-id'];
 21          const isArticleUrl = /\/article\/\d+/.test(tweetId);
 22          const urlMatch = tweetId.match(/\/(?:status|article)\/(\d+)/);
 23          if (urlMatch)
 24              tweetId = urlMatch[1];
 25          if (isArticleUrl) {
 26              // Navigate to the article page and resolve the parent tweet ID from DOM
 27              await page.goto(`https://x.com/i/article/${tweetId}`);
 28              await page.wait(3);
 29              const resolvedId = await page.evaluate(`
 30          (function() {
 31            var links = document.querySelectorAll('a[href*="/status/"]');
 32            for (var i = 0; i < links.length; i++) {
 33              var m = links[i].href.match(/\\/status\\/(\\d+)/);
 34              if (m) return m[1];
 35            }
 36            var og = document.querySelector('meta[property="og:url"]');
 37            if (og && og.content) {
 38              var m2 = og.content.match(/\\/status\\/(\\d+)/);
 39              if (m2) return m2[1];
 40            }
 41            return null;
 42          })()
 43        `);
 44              if (!resolvedId || typeof resolvedId !== 'string') {
 45                  throw new CommandExecutionError(`Could not resolve article ${tweetId} to a tweet ID. The article page may not contain a linked tweet.`);
 46              }
 47              tweetId = resolvedId;
 48          }
 49          // Navigate to the tweet page for cookie context
 50          await page.goto(`https://x.com/i/status/${tweetId}`);
 51          await page.wait(3);
 52          const queryId = await resolveTwitterQueryId(page, 'TweetResultByRestId', TWEET_RESULT_BY_REST_ID_QUERY_ID);
 53          const result = await page.evaluate(`
 54        async () => {
 55          const tweetId = "${tweetId}";
 56          const ct0 = document.cookie.split(';').map(c=>c.trim()).find(c=>c.startsWith('ct0='))?.split('=')[1];
 57          if (!ct0) return {error: 'No ct0 cookie — not logged into x.com'};
 58  
 59          const bearer = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
 60          const headers = {
 61            'Authorization': 'Bearer ' + decodeURIComponent(bearer),
 62            'X-Csrf-Token': ct0,
 63            'X-Twitter-Auth-Type': 'OAuth2Session',
 64            'X-Twitter-Active-User': 'yes'
 65          };
 66  
 67          const variables = JSON.stringify({
 68            tweetId: tweetId,
 69            withCommunity: false,
 70            includePromotedContent: false,
 71            withVoice: false,
 72          });
 73          const features = JSON.stringify({
 74            longform_notetweets_consumption_enabled: true,
 75            responsive_web_twitter_article_tweet_consumption_enabled: true,
 76            longform_notetweets_rich_text_read_enabled: true,
 77            longform_notetweets_inline_media_enabled: true,
 78            articles_preview_enabled: true,
 79            responsive_web_graphql_exclude_directive_enabled: true,
 80            verified_phone_label_enabled: false,
 81          });
 82          const fieldToggles = JSON.stringify({
 83            withArticleRichContentState: true,
 84            withArticlePlainText: true,
 85          });
 86  
 87          const url = '/i/api/graphql/' + ${JSON.stringify(queryId)} + '/TweetResultByRestId?variables='
 88            + encodeURIComponent(variables)
 89            + '&features=' + encodeURIComponent(features)
 90            + '&fieldToggles=' + encodeURIComponent(fieldToggles);
 91  
 92          const resp = await fetch(url, {headers, credentials: 'include'});
 93          if (!resp.ok) return {error: 'HTTP ' + resp.status, hint: 'Tweet may not exist or queryId expired'};
 94          const d = await resp.json();
 95  
 96          const result = d.data?.tweetResult?.result;
 97          if (!result) return {error: 'Article not found'};
 98  
 99          // Unwrap TweetWithVisibilityResults
100          const tw = result.tweet || result;
101          const legacy = tw.legacy || {};
102          const user = tw.core?.user_results?.result;
103          const screenName = user?.legacy?.screen_name || user?.core?.screen_name || 'unknown';
104  
105          // Extract article content
106          const articleResults = tw.article?.article_results?.result;
107          if (!articleResults) {
108            // Fallback: return note_tweet text if present
109            const noteText = tw.note_tweet?.note_tweet_results?.result?.text;
110            if (noteText) {
111              return [{
112                title: '(Note Tweet)',
113                author: screenName,
114                content: noteText,
115                url: 'https://x.com/' + screenName + '/status/' + tweetId,
116              }];
117            }
118            return {error: 'Tweet ' + tweetId + ' has no article content'};
119          }
120  
121          const title = articleResults.title || '(Untitled)';
122          const contentState = articleResults.content_state || {};
123          const blocks = contentState.blocks || [];
124  
125          // Convert draft.js blocks to Markdown
126          const parts = [];
127          let orderedCounter = 0;
128          for (const block of blocks) {
129            const blockType = block.type || 'unstyled';
130            if (blockType === 'atomic') continue;
131            const text = block.text || '';
132            if (!text) continue;
133            if (blockType !== 'ordered-list-item') orderedCounter = 0;
134  
135            if (blockType === 'header-one')           parts.push('# ' + text);
136            else if (blockType === 'header-two')      parts.push('## ' + text);
137            else if (blockType === 'header-three')    parts.push('### ' + text);
138            else if (blockType === 'blockquote')       parts.push('> ' + text);
139            else if (blockType === 'unordered-list-item') parts.push('- ' + text);
140            else if (blockType === 'ordered-list-item') {
141              orderedCounter++;
142              parts.push(orderedCounter + '. ' + text);
143            }
144            else if (blockType === 'code-block')       parts.push('\`\`\`\\n' + text + '\\n\`\`\`');
145            else                                       parts.push(text);
146          }
147  
148          return [{
149            title,
150            author: screenName,
151            content: parts.join('\\n\\n') || legacy.full_text || '',
152            url: 'https://x.com/' + screenName + '/status/' + tweetId,
153          }];
154        }
155      `);
156          if (result?.error) {
157              if (String(result.error).includes('No ct0 cookie'))
158                  throw new AuthRequiredError('x.com', result.error);
159              throw new CommandExecutionError(result.error + (result.hint ? ` (${result.hint})` : ''));
160          }
161          return result || [];
162      }
163  });