Cradicle Explorer

read.js
  1  /**
  2   * Reddit post reader with threaded comment tree.
  3   *
  4   * Replaces the original flat read.yaml with recursive comment traversal:
  5   * - Top-K comments by score at each level
  6   * - Configurable depth and replies-per-level
  7   * - Indented output showing conversation threads
  8   */
  9  import { cli, Strategy } from '@jackwener/opencli/registry';
 10  import { CommandExecutionError } from '@jackwener/opencli/errors';
 11  cli({
 12      site: 'reddit',
 13      name: 'read',
 14      description: 'Read a Reddit post and its comments',
 15      domain: 'reddit.com',
 16      strategy: Strategy.COOKIE,
 17      args: [
 18          { name: 'post-id', required: true, positional: true, help: 'Post ID (e.g. 1abc123) or full URL' },
 19          { name: 'sort', default: 'best', help: 'Comment sort: best, top, new, controversial, old, qa' },
 20          { name: 'limit', type: 'int', default: 25, help: 'Number of top-level comments' },
 21          { name: 'depth', type: 'int', default: 2, help: 'Max reply depth (1=no replies, 2=one level of replies, etc.)' },
 22          { name: 'replies', type: 'int', default: 5, help: 'Max replies shown per comment at each level (sorted by score)' },
 23          { name: 'max-length', type: 'int', default: 2000, help: 'Max characters per comment body (min 100)' },
 24      ],
 25      columns: ['type', 'author', 'score', 'text'],
 26      func: async (page, kwargs) => {
 27          const sort = kwargs.sort ?? 'best';
 28          const limit = Math.max(1, kwargs.limit ?? 25);
 29          const maxDepth = Math.max(1, kwargs.depth ?? 2);
 30          const maxReplies = Math.max(1, kwargs.replies ?? 5);
 31          const maxLength = Math.max(100, kwargs['max-length'] ?? 2000);
 32          await page.goto('https://www.reddit.com');
 33          const data = await page.evaluate(`
 34        (async function() {
 35          var postId = ${JSON.stringify(kwargs['post-id'])};
 36          var urlMatch = postId.match(/comments\\/([a-z0-9]+)/);
 37          if (urlMatch) postId = urlMatch[1];
 38  
 39          var sort = ${JSON.stringify(sort)};
 40          var limit = ${limit};
 41          var maxDepth = ${maxDepth};
 42          var maxReplies = ${maxReplies};
 43          var maxLength = ${maxLength};
 44  
 45          // Request more from API than top-level limit to get inline replies
 46          // depth param tells Reddit how deep to inline replies vs "more" stubs
 47          var apiLimit = Math.max(limit * 3, 100);
 48          var res = await fetch(
 49            '/comments/' + postId + '.json?sort=' + sort + '&limit=' + apiLimit + '&depth=' + (maxDepth + 1) + '&raw_json=1',
 50            { credentials: 'include' }
 51          );
 52          if (!res.ok) return { error: 'Reddit API returned HTTP ' + res.status };
 53  
 54          var data;
 55          try { data = await res.json(); } catch(e) { return { error: 'Failed to parse response' }; }
 56          if (!Array.isArray(data) || data.length < 2) return { error: 'Unexpected response format' };
 57  
 58          var results = [];
 59  
 60          // Post
 61          var post = data[0] && data[0].data && data[0].data.children && data[0].data.children[0] && data[0].data.children[0].data;
 62          if (post) {
 63            var body = post.selftext || '';
 64            if (body.length > maxLength) body = body.slice(0, maxLength) + '\\n... [truncated]';
 65            results.push({
 66              type: 'POST',
 67              author: post.author || '[deleted]',
 68              score: post.score || 0,
 69              text: post.title + (body ? '\\n\\n' + body : '') + (post.url && !post.is_self ? '\\n' + post.url : ''),
 70            });
 71          }
 72  
 73          // Recursive comment walker
 74          // depth 0 = top-level comments; maxDepth is exclusive,
 75          // so --depth 1 means top-level only, --depth 2 means one reply level, etc.
 76          function walkComment(node, depth) {
 77            if (!node || node.kind !== 't1') return;
 78            var d = node.data;
 79            var body = d.body || '';
 80            if (body.length > maxLength) body = body.slice(0, maxLength) + '...';
 81  
 82            // Indent prefix: apply to every line so multiline bodies stay aligned
 83            var indent = '';
 84            for (var i = 0; i < depth; i++) indent += '  ';
 85            var prefix = depth === 0 ? '' : indent + '> ';
 86            var indentedBody = depth === 0
 87              ? body
 88              : body.split('\\n').map(function(line) { return prefix + line; }).join('\\n');
 89  
 90            results.push({
 91              type: depth === 0 ? 'L0' : 'L' + depth,
 92              author: d.author || '[deleted]',
 93              score: d.score || 0,
 94              text: indentedBody,
 95            });
 96  
 97            // Count all available replies (for accurate "more" count)
 98            var t1Children = [];
 99            var moreCount = 0;
100            if (d.replies && d.replies.data && d.replies.data.children) {
101              var children = d.replies.data.children;
102              for (var i = 0; i < children.length; i++) {
103                if (children[i].kind === 't1') {
104                  t1Children.push(children[i]);
105                } else if (children[i].kind === 'more') {
106                  moreCount += children[i].data.count || 0;
107                }
108              }
109            }
110  
111            // At depth cutoff: don't recurse, but show all replies as hidden
112            if (depth + 1 >= maxDepth) {
113              var totalHidden = t1Children.length + moreCount;
114              if (totalHidden > 0) {
115                var cutoffIndent = '';
116                for (var j = 0; j <= depth; j++) cutoffIndent += '  ';
117                results.push({
118                  type: 'L' + (depth + 1),
119                  author: '',
120                  score: '',
121                  text: cutoffIndent + '[+' + totalHidden + ' more replies]',
122                });
123              }
124              return;
125            }
126  
127            // Sort by score descending, take top N
128            t1Children.sort(function(a, b) { return (b.data.score || 0) - (a.data.score || 0); });
129            var toProcess = Math.min(t1Children.length, maxReplies);
130            for (var i = 0; i < toProcess; i++) {
131              walkComment(t1Children[i], depth + 1);
132            }
133  
134            // Show hidden count (skipped replies + "more" stubs)
135            var hidden = t1Children.length - toProcess + moreCount;
136            if (hidden > 0) {
137              var moreIndent = '';
138              for (var j = 0; j <= depth; j++) moreIndent += '  ';
139              results.push({
140                type: 'L' + (depth + 1),
141                author: '',
142                score: '',
143                text: moreIndent + '[+' + hidden + ' more replies]',
144              });
145            }
146          }
147  
148          // Walk top-level comments
149          var topLevel = data[1].data.children || [];
150          var t1TopLevel = [];
151          for (var i = 0; i < topLevel.length; i++) {
152            if (topLevel[i].kind === 't1') t1TopLevel.push(topLevel[i]);
153          }
154  
155          // Top-level are already sorted by Reddit (sort param), take top N
156          for (var i = 0; i < Math.min(t1TopLevel.length, limit); i++) {
157            walkComment(t1TopLevel[i], 0);
158          }
159  
160          // Count remaining
161          var moreTopLevel = topLevel.filter(function(c) { return c.kind === 'more'; })
162            .reduce(function(sum, c) { return sum + (c.data.count || 0); }, 0);
163          var hiddenTopLevel = Math.max(0, t1TopLevel.length - limit) + moreTopLevel;
164          if (hiddenTopLevel > 0) {
165            results.push({
166              type: '',
167              author: '',
168              score: '',
169              text: '[+' + hiddenTopLevel + ' more top-level comments]',
170            });
171          }
172  
173          return results;
174        })()
175      `);
176          if (!data || typeof data !== 'object')
177              throw new CommandExecutionError('Failed to fetch post data');
178          if (!Array.isArray(data) && data.error)
179              throw new CommandExecutionError(data.error);
180          if (!Array.isArray(data))
181              throw new CommandExecutionError('Unexpected response');
182          return data;
183      },
184  });