read.js
1 /** 2 * Reddit post reader with threaded comment tree. 3 * 4 * Replaces the original flat read.yaml with recursive comment traversal: 5 * - Top-K comments by score at each level 6 * - Configurable depth and replies-per-level 7 * - Indented output showing conversation threads 8 */ 9 import { cli, Strategy } from '@jackwener/opencli/registry'; 10 import { CommandExecutionError } from '@jackwener/opencli/errors'; 11 cli({ 12 site: 'reddit', 13 name: 'read', 14 description: 'Read a Reddit post and its comments', 15 domain: 'reddit.com', 16 strategy: Strategy.COOKIE, 17 args: [ 18 { name: 'post-id', required: true, positional: true, help: 'Post ID (e.g. 1abc123) or full URL' }, 19 { name: 'sort', default: 'best', help: 'Comment sort: best, top, new, controversial, old, qa' }, 20 { name: 'limit', type: 'int', default: 25, help: 'Number of top-level comments' }, 21 { name: 'depth', type: 'int', default: 2, help: 'Max reply depth (1=no replies, 2=one level of replies, etc.)' }, 22 { name: 'replies', type: 'int', default: 5, help: 'Max replies shown per comment at each level (sorted by score)' }, 23 { name: 'max-length', type: 'int', default: 2000, help: 'Max characters per comment body (min 100)' }, 24 ], 25 columns: ['type', 'author', 'score', 'text'], 26 func: async (page, kwargs) => { 27 const sort = kwargs.sort ?? 'best'; 28 const limit = Math.max(1, kwargs.limit ?? 25); 29 const maxDepth = Math.max(1, kwargs.depth ?? 2); 30 const maxReplies = Math.max(1, kwargs.replies ?? 5); 31 const maxLength = Math.max(100, kwargs['max-length'] ?? 2000); 32 await page.goto('https://www.reddit.com'); 33 const data = await page.evaluate(` 34 (async function() { 35 var postId = ${JSON.stringify(kwargs['post-id'])}; 36 var urlMatch = postId.match(/comments\\/([a-z0-9]+)/); 37 if (urlMatch) postId = urlMatch[1]; 38 39 var sort = ${JSON.stringify(sort)}; 40 var limit = ${limit}; 41 var maxDepth = ${maxDepth}; 42 var maxReplies = ${maxReplies}; 43 var maxLength = ${maxLength}; 44 45 // Request more from API than top-level limit to get inline replies 46 // depth param tells Reddit how deep to inline replies vs "more" stubs 47 var apiLimit = Math.max(limit * 3, 100); 48 var res = await fetch( 49 '/comments/' + postId + '.json?sort=' + sort + '&limit=' + apiLimit + '&depth=' + (maxDepth + 1) + '&raw_json=1', 50 { credentials: 'include' } 51 ); 52 if (!res.ok) return { error: 'Reddit API returned HTTP ' + res.status }; 53 54 var data; 55 try { data = await res.json(); } catch(e) { return { error: 'Failed to parse response' }; } 56 if (!Array.isArray(data) || data.length < 2) return { error: 'Unexpected response format' }; 57 58 var results = []; 59 60 // Post 61 var post = data[0] && data[0].data && data[0].data.children && data[0].data.children[0] && data[0].data.children[0].data; 62 if (post) { 63 var body = post.selftext || ''; 64 if (body.length > maxLength) body = body.slice(0, maxLength) + '\\n... [truncated]'; 65 results.push({ 66 type: 'POST', 67 author: post.author || '[deleted]', 68 score: post.score || 0, 69 text: post.title + (body ? '\\n\\n' + body : '') + (post.url && !post.is_self ? '\\n' + post.url : ''), 70 }); 71 } 72 73 // Recursive comment walker 74 // depth 0 = top-level comments; maxDepth is exclusive, 75 // so --depth 1 means top-level only, --depth 2 means one reply level, etc. 76 function walkComment(node, depth) { 77 if (!node || node.kind !== 't1') return; 78 var d = node.data; 79 var body = d.body || ''; 80 if (body.length > maxLength) body = body.slice(0, maxLength) + '...'; 81 82 // Indent prefix: apply to every line so multiline bodies stay aligned 83 var indent = ''; 84 for (var i = 0; i < depth; i++) indent += ' '; 85 var prefix = depth === 0 ? '' : indent + '> '; 86 var indentedBody = depth === 0 87 ? body 88 : body.split('\\n').map(function(line) { return prefix + line; }).join('\\n'); 89 90 results.push({ 91 type: depth === 0 ? 'L0' : 'L' + depth, 92 author: d.author || '[deleted]', 93 score: d.score || 0, 94 text: indentedBody, 95 }); 96 97 // Count all available replies (for accurate "more" count) 98 var t1Children = []; 99 var moreCount = 0; 100 if (d.replies && d.replies.data && d.replies.data.children) { 101 var children = d.replies.data.children; 102 for (var i = 0; i < children.length; i++) { 103 if (children[i].kind === 't1') { 104 t1Children.push(children[i]); 105 } else if (children[i].kind === 'more') { 106 moreCount += children[i].data.count || 0; 107 } 108 } 109 } 110 111 // At depth cutoff: don't recurse, but show all replies as hidden 112 if (depth + 1 >= maxDepth) { 113 var totalHidden = t1Children.length + moreCount; 114 if (totalHidden > 0) { 115 var cutoffIndent = ''; 116 for (var j = 0; j <= depth; j++) cutoffIndent += ' '; 117 results.push({ 118 type: 'L' + (depth + 1), 119 author: '', 120 score: '', 121 text: cutoffIndent + '[+' + totalHidden + ' more replies]', 122 }); 123 } 124 return; 125 } 126 127 // Sort by score descending, take top N 128 t1Children.sort(function(a, b) { return (b.data.score || 0) - (a.data.score || 0); }); 129 var toProcess = Math.min(t1Children.length, maxReplies); 130 for (var i = 0; i < toProcess; i++) { 131 walkComment(t1Children[i], depth + 1); 132 } 133 134 // Show hidden count (skipped replies + "more" stubs) 135 var hidden = t1Children.length - toProcess + moreCount; 136 if (hidden > 0) { 137 var moreIndent = ''; 138 for (var j = 0; j <= depth; j++) moreIndent += ' '; 139 results.push({ 140 type: 'L' + (depth + 1), 141 author: '', 142 score: '', 143 text: moreIndent + '[+' + hidden + ' more replies]', 144 }); 145 } 146 } 147 148 // Walk top-level comments 149 var topLevel = data[1].data.children || []; 150 var t1TopLevel = []; 151 for (var i = 0; i < topLevel.length; i++) { 152 if (topLevel[i].kind === 't1') t1TopLevel.push(topLevel[i]); 153 } 154 155 // Top-level are already sorted by Reddit (sort param), take top N 156 for (var i = 0; i < Math.min(t1TopLevel.length, limit); i++) { 157 walkComment(t1TopLevel[i], 0); 158 } 159 160 // Count remaining 161 var moreTopLevel = topLevel.filter(function(c) { return c.kind === 'more'; }) 162 .reduce(function(sum, c) { return sum + (c.data.count || 0); }, 0); 163 var hiddenTopLevel = Math.max(0, t1TopLevel.length - limit) + moreTopLevel; 164 if (hiddenTopLevel > 0) { 165 results.push({ 166 type: '', 167 author: '', 168 score: '', 169 text: '[+' + hiddenTopLevel + ' more top-level comments]', 170 }); 171 } 172 173 return results; 174 })() 175 `); 176 if (!data || typeof data !== 'object') 177 throw new CommandExecutionError('Failed to fetch post data'); 178 if (!Array.isArray(data) && data.error) 179 throw new CommandExecutionError(data.error); 180 if (!Array.isArray(data)) 181 throw new CommandExecutionError('Unexpected response'); 182 return data; 183 }, 184 });