download.js
1 /** 2 * Twitter/X download — download images and videos from tweets. 3 * 4 * Usage: 5 * opencli twitter download elonmusk --limit 10 --output ./twitter 6 * opencli twitter download --tweet-url https://x.com/xxx/status/123 --output ./twitter 7 */ 8 import { cli, Strategy } from '@jackwener/opencli/registry'; 9 import { formatCookieHeader } from '@jackwener/opencli/download'; 10 import { downloadMedia } from '@jackwener/opencli/download/media-download'; 11 cli({ 12 site: 'twitter', 13 name: 'download', 14 description: '下载 Twitter/X 媒体(图片和视频)', 15 domain: 'x.com', 16 strategy: Strategy.COOKIE, 17 args: [ 18 { name: 'username', positional: true, help: 'Twitter username (downloads from media tab)' }, 19 { name: 'tweet-url', help: 'Single tweet URL to download' }, 20 { name: 'limit', type: 'int', default: 10, help: 'Number of tweets to scan' }, 21 { name: 'output', default: './twitter-downloads', help: 'Output directory' }, 22 ], 23 columns: ['index', 'type', 'status', 'size'], 24 func: async (page, kwargs) => { 25 const username = kwargs.username; 26 const tweetUrl = kwargs['tweet-url']; 27 const limit = kwargs.limit; 28 const output = kwargs.output; 29 if (!username && !tweetUrl) { 30 return [{ 31 index: 0, 32 type: '-', 33 status: 'failed', 34 size: 'Must provide a username or --tweet-url', 35 }]; 36 } 37 // Navigate to the appropriate page 38 if (tweetUrl) { 39 await page.goto(tweetUrl); 40 } 41 else { 42 await page.goto(`https://x.com/${username}/media`); 43 } 44 await page.wait(3); 45 // Scroll to load more content 46 if (!tweetUrl) { 47 await page.autoScroll({ times: Math.ceil(limit / 5) }); 48 } 49 // Extract media URLs 50 const data = await page.evaluate(` 51 (() => { 52 const media = []; 53 54 // Find images (high quality) 55 document.querySelectorAll('img[src*="pbs.twimg.com/media"]').forEach(img => { 56 let src = img.src || ''; 57 // Get large version 58 src = src.replace(/&name=\\w+$/, '&name=large'); 59 src = src.replace(/\\?format=/, '?format='); 60 if (!src.includes('&name=')) { 61 src = src + '&name=large'; 62 } 63 media.push({ type: 'image', url: src }); 64 }); 65 66 // Find videos 67 document.querySelectorAll('video').forEach(video => { 68 const src = video.src || ''; 69 if (src) { 70 media.push({ type: 'video', url: src, poster: video.poster || '' }); 71 } 72 }); 73 74 // Find video tweets (for yt-dlp) 75 document.querySelectorAll('[data-testid="videoPlayer"]').forEach(player => { 76 const tweetLink = player.closest('article')?.querySelector('a[href*="/status/"]'); 77 const href = tweetLink?.getAttribute('href') || ''; 78 if (href) { 79 const tweetUrl = 'https://x.com' + href; 80 media.push({ type: 'video-tweet', url: tweetUrl }); 81 } 82 }); 83 84 return media; 85 })() 86 `); 87 if (!data || data.length === 0) { 88 return [{ index: 0, type: '-', status: 'failed', size: 'No media found' }]; 89 } 90 // Extract cookies 91 const browserCookies = await page.getCookies({ domain: 'x.com' }); 92 // Deduplicate media 93 const seen = new Set(); 94 const uniqueMedia = data.filter((m) => { 95 if (seen.has(m.url)) 96 return false; 97 seen.add(m.url); 98 return true; 99 }).slice(0, limit); 100 const subdir = tweetUrl ? 'tweets' : (username || 'media'); 101 return downloadMedia(uniqueMedia, { 102 output, 103 subdir, 104 cookies: formatCookieHeader(browserCookies), 105 browserCookies, 106 filenamePrefix: username || 'tweet', 107 ytdlpExtraArgs: ['--merge-output-format', 'mp4'], 108 }); 109 }, 110 });