/ scripts / analyze-claude-md.js
analyze-claude-md.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * CLAUDE.md Analysis Script (Non-Destructive)
  5   *
  6   * Analyzes CLAUDE.md for duplication and optimization opportunities WITHOUT modifying it.
  7   * Creates a detailed report of findings for manual review.
  8   *
  9   * Features:
 10   * - Identifies duplicate or near-duplicate paragraphs
 11   * - Finds repeated bullet points and list items
 12   * - Detects overlapping sections
 13   * - Calculates potential token savings
 14   * - Generates actionable recommendations
 15   * - DOES NOT modify the original file
 16   */
 17  
 18  import fs from 'fs';
 19  import path from 'path';
 20  import { fileURLToPath } from 'url';
 21  import dotenv from 'dotenv';
 22  
 23  const __filename = fileURLToPath(import.meta.url);
 24  const __dirname = path.dirname(__filename);
 25  
 26  // Load environment variables
 27  dotenv.config({ path: path.join(__dirname, '..', '.env') });
 28  
 29  // Configuration
 30  const CLAUDE_MD_PATH = path.join(__dirname, '..', 'CLAUDE.md');
 31  const REPORT_DIR = path.join(__dirname, '..', '.claude-analysis');
 32  
 33  // Ensure report directory exists
 34  if (!fs.existsSync(REPORT_DIR)) {
 35    fs.mkdirSync(REPORT_DIR, { recursive: true });
 36  }
 37  
 38  /**
 39   * Split content into paragraphs for analysis
 40   */
 41  function splitIntoParagraphs(content) {
 42    return content
 43      .split('\n\n')
 44      .map((p, i) => ({ index: i, text: p.trim(), lines: p.split('\n').length }))
 45      .filter(p => p.text.length > 20); // Ignore very short paragraphs
 46  }
 47  
 48  /**
 49   * Calculate similarity between two strings (Jaccard similarity)
 50   */
 51  function calculateSimilarity(str1, str2) {
 52    const set1 = new Set(str1.toLowerCase().split(/\s+/));
 53    const set2 = new Set(str2.toLowerCase().split(/\s+/));
 54  
 55    const intersection = new Set([...set1].filter(x => set2.has(x)));
 56    const union = new Set([...set1, ...set2]);
 57  
 58    return intersection.size / union.size;
 59  }
 60  
 61  /**
 62   * Find duplicate or similar paragraphs
 63   */
 64  function findDuplicates(paragraphs, threshold = 0.8) {
 65    const duplicates = [];
 66  
 67    for (let i = 0; i < paragraphs.length; i++) {
 68      for (let j = i + 1; j < paragraphs.length; j++) {
 69        const similarity = calculateSimilarity(paragraphs[i].text, paragraphs[j].text);
 70  
 71        if (similarity >= threshold) {
 72          duplicates.push({
 73            paragraph1: i,
 74            paragraph2: j,
 75            similarity: (similarity * 100).toFixed(1),
 76            text1: `${paragraphs[i].text.substring(0, 100)}...`,
 77            text2: `${paragraphs[j].text.substring(0, 100)}...`,
 78            potential_savings: Math.min(paragraphs[i].text.length, paragraphs[j].text.length),
 79          });
 80        }
 81      }
 82    }
 83  
 84    return duplicates.sort((a, b) => b.similarity - a.similarity);
 85  }
 86  
 87  /**
 88   * Find repeated bullet points across the document
 89   */
 90  function findRepeatedBullets(content) {
 91    const bullets = content.match(/^[-*]\s+.+$/gm) || [];
 92    const bulletCounts = {};
 93  
 94    bullets.forEach(bullet => {
 95      const normalized = bullet.replace(/^[-*]\s+/, '').trim();
 96      bulletCounts[normalized] = (bulletCounts[normalized] || 0) + 1;
 97    });
 98  
 99    return Object.entries(bulletCounts)
100      .filter(([_, count]) => count > 1)
101      .sort((a, b) => b[1] - a[1])
102      .map(([text, count]) => ({ text, count, savings: text.length * (count - 1) }));
103  }
104  
105  /**
106   * Find repeated command examples
107   */
108  function findRepeatedCommands(content) {
109    const commands = content.match(/`[^`]+`/g) || [];
110    const commandCounts = {};
111  
112    commands.forEach(cmd => {
113      commandCounts[cmd] = (commandCounts[cmd] || 0) + 1;
114    });
115  
116    return Object.entries(commandCounts)
117      .filter(([_, count]) => count > 3) // Only show if mentioned 4+ times
118      .sort((a, b) => b[1] - a[1])
119      .map(([text, count]) => ({ text, count }));
120  }
121  
122  /**
123   * Analyze section structure and find potential consolidations
124   */
125  function analyzeSections(content) {
126    const sections = content.split(/^## /gm).filter(s => s.trim());
127    const sectionInfo = sections.map(section => {
128      const lines = section.split('\n');
129      const title = lines[0];
130      return {
131        title,
132        length: section.length,
133        subsections: (section.match(/^###/gm) || []).length,
134        bullets: (section.match(/^[-*]/gm) || []).length,
135        codeBlocks: (section.match(/```/g) || []).length / 2,
136      };
137    });
138  
139    return sectionInfo;
140  }
141  
142  /**
143   * Generate comprehensive analysis report
144   */
145  function generateReport(content) {
146    console.log('\nšŸ“Š Analyzing CLAUDE.md...\n');
147  
148    const paragraphs = splitIntoParagraphs(content);
149    const duplicates = findDuplicates(paragraphs, 0.7);
150    const repeatedBullets = findRepeatedBullets(content);
151    const repeatedCommands = findRepeatedCommands(content);
152    const sections = analyzeSections(content);
153  
154    const totalSavings =
155      duplicates.reduce((sum, d) => sum + d.potential_savings, 0) +
156      repeatedBullets.reduce((sum, b) => sum + b.savings, 0);
157  
158    const report = `# CLAUDE.md Analysis Report
159  Generated: ${new Date().toISOString()}
160  
161  ## Summary
162  - Total size: ${content.length.toLocaleString()} characters (ā‰ˆ${Math.ceil(content.length / 4)} tokens)
163  - Sections: ${sections.length}
164  - Potential savings: ~${totalSavings.toLocaleString()} characters (${((totalSavings / content.length) * 100).toFixed(1)}%)
165  
166  ## Duplicate/Similar Paragraphs (${duplicates.length} found)
167  
168  ${duplicates
169    .slice(0, 10)
170    .map(
171      (d, i) => `### ${i + 1}. Similarity: ${d.similarity}%
172  **Paragraph ${d.paragraph1}:** "${d.text1}"
173  **Paragraph ${d.paragraph2}:** "${d.text2}"
174  **Potential savings:** ${d.potential_savings} characters
175  
176  `
177    )
178    .join('')}
179  ${duplicates.length > 10 ? `\n... and ${duplicates.length - 10} more\n` : ''}
180  
181  ## Repeated Bullet Points (${repeatedBullets.length} found)
182  
183  ${repeatedBullets
184    .slice(0, 15)
185    .map(
186      (b, i) =>
187        `${i + 1}. **${b.count}x** - "${b.text.substring(0, 80)}${b.text.length > 80 ? '...' : ''}" (saves ${b.savings} chars)`
188    )
189    .join('\n')}
190  ${repeatedBullets.length > 15 ? `\n... and ${repeatedBullets.length - 15} more` : ''}
191  
192  ## Frequently Mentioned Commands (${repeatedCommands.length} found)
193  
194  ${repeatedCommands
195    .slice(0, 10)
196    .map((c, i) => `${i + 1}. **${c.count}x** - ${c.text}`)
197    .join('\n')}
198  
199  ## Section Analysis
200  
201  ${sections
202    .map(
203      (s, i) => `${i + 1}. **${s.title}**
204     - ${s.length} chars, ${s.subsections} subsections, ${s.bullets} bullets, ${s.codeBlocks} code blocks`
205    )
206    .join('\n')}
207  
208  ## Recommendations
209  
210  ### High Priority
211  1. **Consolidate testing guidance** - Coverage targets and test commands appear in multiple sections
212  2. **Merge git workflow guidance** - Similar pre-commit steps repeated across sections
213  3. **Create integration testing reference** - Test credentials mentioned in 3+ places
214  
215  ### Medium Priority
216  4. **Remove duplicate bullet points** - ${repeatedBullets.length} bullets appear multiple times
217  5. **Consolidate common commands** - Some commands referenced ${repeatedCommands[0]?.count || 0}+ times
218  
219  ### Low Priority
220  6. **Add cross-references** - Instead of repeating, use "See [Section Name]"
221  7. **Consider moving examples** - Some detailed examples could move to dedicated docs
222  
223  ## Manual Optimization Steps
224  
225  1. **Review duplicate paragraphs** above and consolidate similar content
226  2. **Audit repeated bullets** and keep only in one canonical location
227  3. **Check testing sections** - Merge "Testing Automation" and testing parts of "Development Commands"
228  4. **Consolidate git workflow** - Create single authoritative git section
229  5. **Add cross-references** where appropriate instead of duplicating
230  
231  ## Notes
232  
233  This is a NON-DESTRUCTIVE analysis. No changes have been made to CLAUDE.md.
234  Review these findings and make manual edits as appropriate.
235  
236  To restore the current version if you make changes:
237  \`\`\`bash
238  cp CLAUDE.md .claude-analysis/CLAUDE-$(date +%Y%m%d).md.backup
239  \`\`\`
240  `;
241  
242    const reportPath = path.join(REPORT_DIR, `analysis-${new Date().toISOString().split('T')[0]}.md`);
243    fs.writeFileSync(reportPath, report);
244  
245    console.log(report);
246    console.log(`\nāœ… Analysis complete!`);
247    console.log(`šŸ“„ Full report saved to: ${reportPath}\n`);
248    console.log(`ā„¹ļø  No changes were made to CLAUDE.md`);
249    console.log(`ā„¹ļø  Review the report and make manual optimizations as needed\n`);
250  }
251  
252  /**
253   * Main execution
254   */
255  function main() {
256    try {
257      console.log('šŸ” CLAUDE.md Analysis (Non-Destructive)\n');
258  
259      const content = fs.readFileSync(CLAUDE_MD_PATH, 'utf-8');
260      generateReport(content);
261    } catch (error) {
262      console.error('\nāŒ Error during analysis:', error.message);
263      process.exit(1);
264    }
265  }
266  
267  main();