Cradicle Explorer

/ clis / twitter / download.js
download.js
  1  /**
  2   * Twitter/X download — download images and videos from tweets.
  3   *
  4   * Usage:
  5   *   opencli twitter download elonmusk --limit 10 --output ./twitter
  6   *   opencli twitter download --tweet-url https://x.com/xxx/status/123 --output ./twitter
  7   */
  8  import { cli, Strategy } from '@jackwener/opencli/registry';
  9  import { formatCookieHeader } from '@jackwener/opencli/download';
 10  import { downloadMedia } from '@jackwener/opencli/download/media-download';
 11  cli({
 12      site: 'twitter',
 13      name: 'download',
 14      description: '下载 Twitter/X 媒体（图片和视频）',
 15      domain: 'x.com',
 16      strategy: Strategy.COOKIE,
 17      args: [
 18          { name: 'username', positional: true, help: 'Twitter username (downloads from media tab)' },
 19          { name: 'tweet-url', help: 'Single tweet URL to download' },
 20          { name: 'limit', type: 'int', default: 10, help: 'Number of tweets to scan' },
 21          { name: 'output', default: './twitter-downloads', help: 'Output directory' },
 22      ],
 23      columns: ['index', 'type', 'status', 'size'],
 24      func: async (page, kwargs) => {
 25          const username = kwargs.username;
 26          const tweetUrl = kwargs['tweet-url'];
 27          const limit = kwargs.limit;
 28          const output = kwargs.output;
 29          if (!username && !tweetUrl) {
 30              return [{
 31                      index: 0,
 32                      type: '-',
 33                      status: 'failed',
 34                      size: 'Must provide a username or --tweet-url',
 35                  }];
 36          }
 37          // Navigate to the appropriate page
 38          if (tweetUrl) {
 39              await page.goto(tweetUrl);
 40          }
 41          else {
 42              await page.goto(`https://x.com/${username}/media`);
 43          }
 44          await page.wait(3);
 45          // Scroll to load more content
 46          if (!tweetUrl) {
 47              await page.autoScroll({ times: Math.ceil(limit / 5) });
 48          }
 49          // Extract media URLs
 50          const data = await page.evaluate(`
 51        (() => {
 52          const media = [];
 53  
 54          // Find images (high quality)
 55          document.querySelectorAll('img[src*="pbs.twimg.com/media"]').forEach(img => {
 56            let src = img.src || '';
 57            // Get large version
 58            src = src.replace(/&name=\\w+$/, '&name=large');
 59            src = src.replace(/\\?format=/, '?format=');
 60            if (!src.includes('&name=')) {
 61              src = src + '&name=large';
 62            }
 63            media.push({ type: 'image', url: src });
 64          });
 65  
 66          // Find videos
 67          document.querySelectorAll('video').forEach(video => {
 68            const src = video.src || '';
 69            if (src) {
 70              media.push({ type: 'video', url: src, poster: video.poster || '' });
 71            }
 72          });
 73  
 74          // Find video tweets (for yt-dlp)
 75          document.querySelectorAll('[data-testid="videoPlayer"]').forEach(player => {
 76            const tweetLink = player.closest('article')?.querySelector('a[href*="/status/"]');
 77            const href = tweetLink?.getAttribute('href') || '';
 78            if (href) {
 79              const tweetUrl = 'https://x.com' + href;
 80              media.push({ type: 'video-tweet', url: tweetUrl });
 81            }
 82          });
 83  
 84          return media;
 85        })()
 86      `);
 87          if (!data || data.length === 0) {
 88              return [{ index: 0, type: '-', status: 'failed', size: 'No media found' }];
 89          }
 90          // Extract cookies
 91          const browserCookies = await page.getCookies({ domain: 'x.com' });
 92          // Deduplicate media
 93          const seen = new Set();
 94          const uniqueMedia = data.filter((m) => {
 95              if (seen.has(m.url))
 96                  return false;
 97              seen.add(m.url);
 98              return true;
 99          }).slice(0, limit);
100          const subdir = tweetUrl ? 'tweets' : (username || 'media');
101          return downloadMedia(uniqueMedia, {
102              output,
103              subdir,
104              cookies: formatCookieHeader(browserCookies),
105              browserCookies,
106              filenamePrefix: username || 'tweet',
107              ytdlpExtraArgs: ['--merge-output-format', 'mp4'],
108          });
109      },
110  });