post.js
1 import { AuthRequiredError, EmptyResultError } from '@jackwener/opencli/errors'; 2 import { formatCookieHeader } from '@jackwener/opencli/download'; 3 import { downloadMedia } from '@jackwener/opencli/download/media-download'; 4 import { cli, Strategy } from '@jackwener/opencli/registry'; 5 /** 6 * band post — Export full content of a Band post: body, comments, and optional photo download. 7 * 8 * Navigates directly to the post URL and extracts everything from the DOM. 9 * No XHR interception needed — Band renders the full post for logged-in users. 10 * 11 * Output rows: 12 * type=post → the post itself (author, date, body text) 13 * type=comment → top-level comment 14 * type=reply → reply to a comment (nested under its parent) 15 * 16 * Photo thumbnail URLs carry a ?type=sNNN suffix; stripping it yields full-res. 17 */ 18 cli({ 19 site: 'band', 20 name: 'post', 21 description: 'Export full content of a post including comments', 22 domain: 'www.band.us', 23 strategy: Strategy.COOKIE, 24 navigateBefore: false, 25 browser: true, 26 args: [ 27 { name: 'band_no', positional: true, required: true, type: 'int', help: 'Band number' }, 28 { name: 'post_no', positional: true, required: true, type: 'int', help: 'Post number' }, 29 { name: 'output', type: 'str', default: '', help: 'Directory to save attached photos' }, 30 { name: 'comments', type: 'bool', default: true, help: 'Include comments (default: true)' }, 31 ], 32 columns: ['type', 'author', 'date', 'text'], 33 func: async (page, kwargs) => { 34 const bandNo = Number(kwargs.band_no); 35 const postNo = Number(kwargs.post_no); 36 const outputDir = kwargs.output; 37 const withComments = kwargs.comments; 38 await page.goto(`https://www.band.us/band/${bandNo}/post/${postNo}`); 39 const cookies = await page.getCookies({ domain: 'band.us' }); 40 const isLoggedIn = cookies.some(c => c.name === 'band_session'); 41 if (!isLoggedIn) 42 throw new AuthRequiredError('band.us', 'Not logged in to Band'); 43 const data = await page.evaluate(` 44 (async () => { 45 const withComments = ${withComments}; 46 const sleep = ms => new Promise(r => setTimeout(r, ms)); 47 const norm = s => (s || '').replace(/\\s+/g, ' ').trim(); 48 // Band embeds <band:mention>, <band:sticker>, etc. in content — strip to plain text. 49 const stripTags = s => s.replace(/<\\/?band:[^>]+>/g, ''); 50 51 // Wait up to 9 s for the post content to render (poll for the author link, 52 // which appears after React hydration fills the post header). 53 for (let i = 0; i < 30; i++) { 54 if (document.querySelector('._postWrapper a.text')) break; 55 await sleep(300); 56 } 57 58 const postCard = document.querySelector('._postWrapper'); 59 const commentSection = postCard?.querySelector('.dPostCommentMainView'); 60 61 // Author and date live in the post header, above the comment section. 62 // Exclude any matches inside the comment section to avoid picking up comment authors. 63 let author = '', date = ''; 64 for (const el of (postCard?.querySelectorAll('a.text') || [])) { 65 if (!commentSection?.contains(el)) { author = norm(el.textContent); break; } 66 } 67 for (const el of (postCard?.querySelectorAll('time.time') || [])) { 68 if (!commentSection?.contains(el)) { date = norm(el.textContent); break; } 69 } 70 71 const bodyEl = postCard?.querySelector('.postText._postText'); 72 const text = bodyEl ? stripTags(norm(bodyEl.innerText || bodyEl.textContent)) : ''; 73 74 // Photo thumbnails have a ?type=sNNN query param; strip it for full-res URL. 75 // Use location.href as base so protocol-relative or relative URLs resolve correctly. 76 const photos = Array.from(postCard?.querySelectorAll('img._imgRecentPhoto, img._imgPhoto') || []) 77 .map(img => { 78 const src = img.getAttribute('src') || ''; 79 if (!src) return ''; 80 try { const u = new URL(src, location.href); return u.origin + u.pathname; } 81 catch { return ''; } 82 }) 83 .filter(Boolean); 84 85 if (!withComments) return { author, date, text, photos, comments: [] }; 86 87 // Wait up to 6 s for the comment list container to render. 88 // Wait for the container itself (not .cComment) so posts with zero comments 89 // don't incur a fixed 6s delay waiting for an element that never appears. 90 for (let i = 0; i < 20; i++) { 91 if (postCard?.querySelector('.sCommentList._heightDetectAreaForComment')) break; 92 await sleep(300); 93 } 94 95 // Recursively collect comments and their replies. 96 // Replies live in .sReplyList > .sCommentList, not in ._replyRegion. 97 function extractComments(container, depth) { 98 const results = []; 99 for (const el of container.querySelectorAll(':scope > .cComment')) { 100 results.push({ 101 depth, 102 author: norm(el.querySelector('strong.name')?.textContent), 103 date: norm(el.querySelector('time.time')?.textContent), 104 text: stripTags(norm(el.querySelector('p.txt._commentContent')?.innerText || '')), 105 }); 106 const replyList = el.querySelector('.sReplyList .sCommentList._heightDetectAreaForComment'); 107 if (replyList) results.push(...extractComments(replyList, depth + 1)); 108 } 109 return results; 110 } 111 112 const commentList = postCard?.querySelector('.sCommentList._heightDetectAreaForComment'); 113 const comments = commentList ? extractComments(commentList, 0) : []; 114 115 return { author, date, text, photos, comments }; 116 })() 117 `); 118 if (!data?.text && !data?.comments?.length && !data?.photos?.length) { 119 throw new EmptyResultError('band post', 'Post not found or not accessible'); 120 } 121 const photos = data.photos ?? []; 122 // Download photos when --output is specified, using the shared downloadMedia utility 123 // which handles redirects, timeouts, and stream errors correctly. 124 // Pass browser cookies so Band's login-protected photo URLs don't fail with 401/403. 125 if (outputDir && photos.length > 0) { 126 // Only send Band cookies to Band-hosted URLs; avoid leaking auth cookies to third-party CDNs. 127 // Use a global index across both batches so filenames don't collide (photo_1, photo_2, ...). 128 const cookieHeader = formatCookieHeader(await page.getCookies({ url: 'https://www.band.us' })); 129 const isBandUrl = (u) => { try { 130 const h = new URL(u).hostname; 131 return h === 'band.us' || h.endsWith('.band.us'); 132 } 133 catch { 134 return false; 135 } }; 136 // Derive extension from URL path so downloaded files have correct extensions (e.g. photo_1.jpg). 137 const urlExt = (u) => { try { 138 return new URL(u).pathname.match(/\.(\w+)$/)?.[1] ?? 'jpg'; 139 } 140 catch { 141 return 'jpg'; 142 } }; 143 let globalIndex = 1; 144 const bandPhotos = photos.filter(isBandUrl); 145 const otherPhotos = photos.filter(u => !isBandUrl(u)); 146 if (bandPhotos.length > 0) { 147 await downloadMedia(bandPhotos.map(url => ({ type: 'image', url, filename: `photo_${globalIndex++}.${urlExt(url)}` })), { output: outputDir, verbose: false, cookies: cookieHeader }); 148 } 149 if (otherPhotos.length > 0) { 150 await downloadMedia(otherPhotos.map(url => ({ type: 'image', url, filename: `photo_${globalIndex++}.${urlExt(url)}` })), { output: outputDir, verbose: false }); 151 } 152 } 153 const rows = []; 154 // Post row — append photo URLs inline when not downloading to disk. 155 rows.push({ 156 type: 'post', 157 author: data.author ?? '', 158 date: data.date ?? '', 159 text: [ 160 data.text ?? '', 161 ...(outputDir ? [] : photos.map((u, i) => `[photo${i + 1}] ${u}`)), 162 ].filter(Boolean).join('\n'), 163 }); 164 // Comment rows — depth=0 → type 'comment', depth≥1 → type 'reply'. 165 for (const c of data.comments ?? []) { 166 rows.push({ 167 type: c.depth === 0 ? 'comment' : 'reply', 168 author: c.author ?? '', 169 date: c.date ?? '', 170 text: c.depth > 0 ? ' '.repeat(c.depth) + '└ ' + (c.text ?? '') : (c.text ?? ''), 171 }); 172 } 173 return rows; 174 }, 175 });