analyze-claude-md.js
1 #!/usr/bin/env node 2 3 /** 4 * CLAUDE.md Analysis Script (Non-Destructive) 5 * 6 * Analyzes CLAUDE.md for duplication and optimization opportunities WITHOUT modifying it. 7 * Creates a detailed report of findings for manual review. 8 * 9 * Features: 10 * - Identifies duplicate or near-duplicate paragraphs 11 * - Finds repeated bullet points and list items 12 * - Detects overlapping sections 13 * - Calculates potential token savings 14 * - Generates actionable recommendations 15 * - DOES NOT modify the original file 16 */ 17 18 import fs from 'fs'; 19 import path from 'path'; 20 import { fileURLToPath } from 'url'; 21 import dotenv from 'dotenv'; 22 23 const __filename = fileURLToPath(import.meta.url); 24 const __dirname = path.dirname(__filename); 25 26 // Load environment variables 27 dotenv.config({ path: path.join(__dirname, '..', '.env') }); 28 29 // Configuration 30 const CLAUDE_MD_PATH = path.join(__dirname, '..', 'CLAUDE.md'); 31 const REPORT_DIR = path.join(__dirname, '..', '.claude-analysis'); 32 33 // Ensure report directory exists 34 if (!fs.existsSync(REPORT_DIR)) { 35 fs.mkdirSync(REPORT_DIR, { recursive: true }); 36 } 37 38 /** 39 * Split content into paragraphs for analysis 40 */ 41 function splitIntoParagraphs(content) { 42 return content 43 .split('\n\n') 44 .map((p, i) => ({ index: i, text: p.trim(), lines: p.split('\n').length })) 45 .filter(p => p.text.length > 20); // Ignore very short paragraphs 46 } 47 48 /** 49 * Calculate similarity between two strings (Jaccard similarity) 50 */ 51 function calculateSimilarity(str1, str2) { 52 const set1 = new Set(str1.toLowerCase().split(/\s+/)); 53 const set2 = new Set(str2.toLowerCase().split(/\s+/)); 54 55 const intersection = new Set([...set1].filter(x => set2.has(x))); 56 const union = new Set([...set1, ...set2]); 57 58 return intersection.size / union.size; 59 } 60 61 /** 62 * Find duplicate or similar paragraphs 63 */ 64 function findDuplicates(paragraphs, threshold = 0.8) { 65 const duplicates = []; 66 67 for (let i = 0; i < paragraphs.length; i++) { 68 for (let j = i + 1; j < paragraphs.length; j++) { 69 const similarity = calculateSimilarity(paragraphs[i].text, paragraphs[j].text); 70 71 if (similarity >= threshold) { 72 duplicates.push({ 73 paragraph1: i, 74 paragraph2: j, 75 similarity: (similarity * 100).toFixed(1), 76 text1: `${paragraphs[i].text.substring(0, 100)}...`, 77 text2: `${paragraphs[j].text.substring(0, 100)}...`, 78 potential_savings: Math.min(paragraphs[i].text.length, paragraphs[j].text.length), 79 }); 80 } 81 } 82 } 83 84 return duplicates.sort((a, b) => b.similarity - a.similarity); 85 } 86 87 /** 88 * Find repeated bullet points across the document 89 */ 90 function findRepeatedBullets(content) { 91 const bullets = content.match(/^[-*]\s+.+$/gm) || []; 92 const bulletCounts = {}; 93 94 bullets.forEach(bullet => { 95 const normalized = bullet.replace(/^[-*]\s+/, '').trim(); 96 bulletCounts[normalized] = (bulletCounts[normalized] || 0) + 1; 97 }); 98 99 return Object.entries(bulletCounts) 100 .filter(([_, count]) => count > 1) 101 .sort((a, b) => b[1] - a[1]) 102 .map(([text, count]) => ({ text, count, savings: text.length * (count - 1) })); 103 } 104 105 /** 106 * Find repeated command examples 107 */ 108 function findRepeatedCommands(content) { 109 const commands = content.match(/`[^`]+`/g) || []; 110 const commandCounts = {}; 111 112 commands.forEach(cmd => { 113 commandCounts[cmd] = (commandCounts[cmd] || 0) + 1; 114 }); 115 116 return Object.entries(commandCounts) 117 .filter(([_, count]) => count > 3) // Only show if mentioned 4+ times 118 .sort((a, b) => b[1] - a[1]) 119 .map(([text, count]) => ({ text, count })); 120 } 121 122 /** 123 * Analyze section structure and find potential consolidations 124 */ 125 function analyzeSections(content) { 126 const sections = content.split(/^## /gm).filter(s => s.trim()); 127 const sectionInfo = sections.map(section => { 128 const lines = section.split('\n'); 129 const title = lines[0]; 130 return { 131 title, 132 length: section.length, 133 subsections: (section.match(/^###/gm) || []).length, 134 bullets: (section.match(/^[-*]/gm) || []).length, 135 codeBlocks: (section.match(/```/g) || []).length / 2, 136 }; 137 }); 138 139 return sectionInfo; 140 } 141 142 /** 143 * Generate comprehensive analysis report 144 */ 145 function generateReport(content) { 146 console.log('\nš Analyzing CLAUDE.md...\n'); 147 148 const paragraphs = splitIntoParagraphs(content); 149 const duplicates = findDuplicates(paragraphs, 0.7); 150 const repeatedBullets = findRepeatedBullets(content); 151 const repeatedCommands = findRepeatedCommands(content); 152 const sections = analyzeSections(content); 153 154 const totalSavings = 155 duplicates.reduce((sum, d) => sum + d.potential_savings, 0) + 156 repeatedBullets.reduce((sum, b) => sum + b.savings, 0); 157 158 const report = `# CLAUDE.md Analysis Report 159 Generated: ${new Date().toISOString()} 160 161 ## Summary 162 - Total size: ${content.length.toLocaleString()} characters (ā${Math.ceil(content.length / 4)} tokens) 163 - Sections: ${sections.length} 164 - Potential savings: ~${totalSavings.toLocaleString()} characters (${((totalSavings / content.length) * 100).toFixed(1)}%) 165 166 ## Duplicate/Similar Paragraphs (${duplicates.length} found) 167 168 ${duplicates 169 .slice(0, 10) 170 .map( 171 (d, i) => `### ${i + 1}. Similarity: ${d.similarity}% 172 **Paragraph ${d.paragraph1}:** "${d.text1}" 173 **Paragraph ${d.paragraph2}:** "${d.text2}" 174 **Potential savings:** ${d.potential_savings} characters 175 176 ` 177 ) 178 .join('')} 179 ${duplicates.length > 10 ? `\n... and ${duplicates.length - 10} more\n` : ''} 180 181 ## Repeated Bullet Points (${repeatedBullets.length} found) 182 183 ${repeatedBullets 184 .slice(0, 15) 185 .map( 186 (b, i) => 187 `${i + 1}. **${b.count}x** - "${b.text.substring(0, 80)}${b.text.length > 80 ? '...' : ''}" (saves ${b.savings} chars)` 188 ) 189 .join('\n')} 190 ${repeatedBullets.length > 15 ? `\n... and ${repeatedBullets.length - 15} more` : ''} 191 192 ## Frequently Mentioned Commands (${repeatedCommands.length} found) 193 194 ${repeatedCommands 195 .slice(0, 10) 196 .map((c, i) => `${i + 1}. **${c.count}x** - ${c.text}`) 197 .join('\n')} 198 199 ## Section Analysis 200 201 ${sections 202 .map( 203 (s, i) => `${i + 1}. **${s.title}** 204 - ${s.length} chars, ${s.subsections} subsections, ${s.bullets} bullets, ${s.codeBlocks} code blocks` 205 ) 206 .join('\n')} 207 208 ## Recommendations 209 210 ### High Priority 211 1. **Consolidate testing guidance** - Coverage targets and test commands appear in multiple sections 212 2. **Merge git workflow guidance** - Similar pre-commit steps repeated across sections 213 3. **Create integration testing reference** - Test credentials mentioned in 3+ places 214 215 ### Medium Priority 216 4. **Remove duplicate bullet points** - ${repeatedBullets.length} bullets appear multiple times 217 5. **Consolidate common commands** - Some commands referenced ${repeatedCommands[0]?.count || 0}+ times 218 219 ### Low Priority 220 6. **Add cross-references** - Instead of repeating, use "See [Section Name]" 221 7. **Consider moving examples** - Some detailed examples could move to dedicated docs 222 223 ## Manual Optimization Steps 224 225 1. **Review duplicate paragraphs** above and consolidate similar content 226 2. **Audit repeated bullets** and keep only in one canonical location 227 3. **Check testing sections** - Merge "Testing Automation" and testing parts of "Development Commands" 228 4. **Consolidate git workflow** - Create single authoritative git section 229 5. **Add cross-references** where appropriate instead of duplicating 230 231 ## Notes 232 233 This is a NON-DESTRUCTIVE analysis. No changes have been made to CLAUDE.md. 234 Review these findings and make manual edits as appropriate. 235 236 To restore the current version if you make changes: 237 \`\`\`bash 238 cp CLAUDE.md .claude-analysis/CLAUDE-$(date +%Y%m%d).md.backup 239 \`\`\` 240 `; 241 242 const reportPath = path.join(REPORT_DIR, `analysis-${new Date().toISOString().split('T')[0]}.md`); 243 fs.writeFileSync(reportPath, report); 244 245 console.log(report); 246 console.log(`\nā Analysis complete!`); 247 console.log(`š Full report saved to: ${reportPath}\n`); 248 console.log(`ā¹ļø No changes were made to CLAUDE.md`); 249 console.log(`ā¹ļø Review the report and make manual optimizations as needed\n`); 250 } 251 252 /** 253 * Main execution 254 */ 255 function main() { 256 try { 257 console.log('š CLAUDE.md Analysis (Non-Destructive)\n'); 258 259 const content = fs.readFileSync(CLAUDE_MD_PATH, 'utf-8'); 260 generateReport(content); 261 } catch (error) { 262 console.error('\nā Error during analysis:', error.message); 263 process.exit(1); 264 } 265 } 266 267 main();