/ scripts / optimize-claude-md.js
optimize-claude-md.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * CLAUDE.md Optimization Script
  5   *
  6   * Removes duplication and optimizes the size of CLAUDE.md while preserving all important information.
  7   * Uses LLM to intelligently consolidate redundant content.
  8   *
  9   * Features:
 10   * - Backs up original file before changes
 11   * - Detects and removes duplicate content
 12   * - Consolidates overlapping sections
 13   * - Generates detailed change report
 14   * - Preserves all critical information
 15   */
 16  
 17  import fs from 'fs';
 18  import path from 'path';
 19  import { fileURLToPath } from 'url';
 20  import dotenv from 'dotenv';
 21  
 22  const __filename = fileURLToPath(import.meta.url);
 23  const __dirname = path.dirname(__filename);
 24  
 25  // Load environment variables
 26  dotenv.config({ path: path.join(__dirname, '..', '.env') });
 27  
 28  // Configuration
 29  const CLAUDE_MD_PATH = path.join(__dirname, '..', 'CLAUDE.md');
 30  const BACKUP_DIR = path.join(__dirname, '..', '.claude-backups');
 31  const { OPENROUTER_API_KEY } = process.env;
 32  const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1';
 33  
 34  // Ensure backup directory exists
 35  if (!fs.existsSync(BACKUP_DIR)) {
 36    fs.mkdirSync(BACKUP_DIR, { recursive: true });
 37  }
 38  
 39  /**
 40   * Call OpenRouter API
 41   */
 42  async function callOpenRouter(messages, maxTokens = 4096) {
 43    const response = await fetch(`${OPENROUTER_BASE_URL}/chat/completions`, {
 44      method: 'POST',
 45      headers: {
 46        Authorization: `Bearer ${OPENROUTER_API_KEY}`,
 47        'Content-Type': 'application/json',
 48        'HTTP-Referer': 'https://github.com/333method',
 49        'X-Title': '333 Method - CLAUDE.md Optimizer',
 50      },
 51      body: JSON.stringify({
 52        model: 'anthropic/claude-3.5-sonnet',
 53        messages,
 54        max_tokens: maxTokens,
 55        temperature: 0.3,
 56      }),
 57    });
 58  
 59    if (!response.ok) {
 60      const error = await response.text();
 61      throw new Error(`OpenRouter API error: ${response.status} ${error}`);
 62    }
 63  
 64    const data = await response.json();
 65    return data.choices[0].message.content;
 66  }
 67  
 68  /**
 69   * Create a timestamped backup of CLAUDE.md
 70   */
 71  function backupClaudeMd() {
 72    const timestamp = new Date().toISOString().replace(/[:.]/g, '-').split('T')[0];
 73    const backupPath = path.join(BACKUP_DIR, `CLAUDE-${timestamp}.md`);
 74  
 75    const content = fs.readFileSync(CLAUDE_MD_PATH, 'utf-8');
 76    fs.writeFileSync(backupPath, content);
 77  
 78    console.log(`āœ“ Backup created: ${backupPath}`);
 79    return backupPath;
 80  }
 81  
 82  /**
 83   * Analyze CLAUDE.md for duplication and optimization opportunities
 84   */
 85  async function analyzeForOptimization(content) {
 86    console.log('\nšŸ“Š Analyzing CLAUDE.md for optimization opportunities...\n');
 87  
 88    const prompt = `You are analyzing a CLAUDE.md file (project documentation for AI assistants) for duplication and optimization opportunities.
 89  
 90  Current file size: ${content.length} characters
 91  
 92  Please analyze this content and identify:
 93  
 94  1. **Duplicate Content**: Sections or paragraphs that repeat the same information
 95  2. **Overlapping Sections**: Different sections that cover similar topics and could be consolidated
 96  3. **Redundant Details**: Information that is stated multiple times in different ways
 97  4. **Optimization Opportunities**: Ways to preserve all information while reducing token count
 98  
 99  Focus on:
100  - Testing/coverage targets mentioned multiple times
101  - Git workflow guidance repeated across sections
102  - Integration testing details duplicated
103  - Command examples that overlap
104  - Similar checklists or protocols
105  
106  Provide specific line numbers or section names where duplication exists.
107  
108  Here's the content:
109  
110  ${content}
111  
112  Return your analysis in this JSON format:
113  {
114    "duplicates": [
115      {
116        "type": "exact|similar|overlapping",
117        "sections": ["Section A", "Section B"],
118        "description": "What's duplicated",
119        "suggestion": "How to consolidate"
120      }
121    ],
122    "metrics": {
123      "current_length": ${content.length},
124      "estimated_reduction_percent": 15,
125      "token_count_estimate": 20000
126    },
127    "priority_areas": [
128      {
129        "area": "Area name",
130        "reason": "Why this is a priority for consolidation"
131      }
132    ]
133  }`;
134  
135    const analysisText = await callOpenRouter([{ role: 'user', content: prompt }], 4096);
136  
137    // Extract JSON from response (may be wrapped in markdown code blocks)
138    const jsonMatch =
139      analysisText.match(/```json\n([\s\S]+?)\n```/) || analysisText.match(/({[\s\S]+})/);
140  
141    if (!jsonMatch) {
142      console.error('Could not parse analysis response');
143      return null;
144    }
145  
146    return JSON.parse(jsonMatch[1]);
147  }
148  
149  /**
150   * Optimize CLAUDE.md using LLM to consolidate content
151   */
152  async function optimizeContent(content, analysis) {
153    console.log('\nšŸ”§ Optimizing CLAUDE.md content...\n');
154  
155    const duplicatesDescription = analysis.duplicates
156      .map(
157        (d, i) =>
158          `${i + 1}. ${d.type.toUpperCase()}: ${d.description}\n   Sections: ${d.sections.join(', ')}\n   Suggestion: ${d.suggestion}`
159      )
160      .join('\n\n');
161  
162    const prompt = `You are optimizing a CLAUDE.md file to remove duplication while preserving ALL important information.
163  
164  **Your Goal:**
165  Make TARGETED, CONSERVATIVE edits to remove ONLY obvious duplication. Preserve 90%+ of the content.
166  
167  **Analysis Results:**
168  ${duplicatesDescription}
169  
170  **CRITICAL RULES - READ CAREFULLY:**
171  1. DO NOT rewrite sections - only remove exact duplicates
172  2. DO NOT summarize or condense unique information
173  3. DO NOT remove any code examples, commands, or specific instructions
174  4. DO NOT change section structure or headers
175  5. ONLY merge content when it's nearly identical
176  6. ONLY remove information if it appears verbatim elsewhere
177  7. Target 10-20% reduction MAX - if you're reducing more, you're removing too much
178  8. When in doubt, KEEP the content
179  
180  **Allowed Optimizations (ONLY these):**
181  - Remove exact duplicate paragraphs or bullet points
182  - Consolidate identical coverage targets mentioned 2-3 times
183  - Merge identical command examples if they appear multiple times
184  - Add cross-references like "See [Section]" ONLY if content is truly identical
185  - Fix minor formatting issues
186  
187  **FORBIDDEN Actions:**
188  - Rewriting entire sections in a more concise way
189  - Removing "redundant" details that provide context
190  - Consolidating similar-but-different information
191  - Removing examples, workflows, or detailed procedures
192  - Changing the structure or organization
193  
194  **Original Content:**
195  ${content}
196  
197  **Output:**
198  Return the optimized CLAUDE.md content as plain markdown (not in a code block).
199  Start with "# CLAUDE.md" and include all content.`;
200  
201    return await callOpenRouter([{ role: 'user', content: prompt }], 16000);
202  }
203  
204  /**
205   * Generate a change report
206   */
207  function generateReport(original, optimized, analysis) {
208    const originalLines = original.split('\n').length;
209    const optimizedLines = optimized.split('\n').length;
210    const originalChars = original.length;
211    const optimizedChars = optimized.length;
212  
213    const report = `# CLAUDE.md Optimization Report
214  Generated: ${new Date().toISOString()}
215  
216  ## Metrics
217  - Original: ${originalChars.toLocaleString()} characters, ${originalLines.toLocaleString()} lines
218  - Optimized: ${optimizedChars.toLocaleString()} characters, ${optimizedLines.toLocaleString()} lines
219  - Reduction: ${(originalChars - optimizedChars).toLocaleString()} characters (${(((originalChars - optimizedChars) / originalChars) * 100).toFixed(1)}%)
220  - Lines saved: ${originalLines - optimizedLines}
221  
222  ## Duplication Analysis
223  ${analysis.duplicates
224    .map(
225      (d, i) => `### ${i + 1}. ${d.type.toUpperCase()}: ${d.description}
226  **Sections:** ${d.sections.join(', ')}
227  **Action:** ${d.suggestion}
228  `
229    )
230    .join('\n')}
231  
232  ## Priority Areas Addressed
233  ${analysis.priority_areas.map((p, i) => `${i + 1}. **${p.area}**: ${p.reason}`).join('\n')}
234  
235  ## Next Steps
236  - Review the optimized CLAUDE.md
237  - Test with Claude Code to ensure all important information is accessible
238  - If satisfied, the optimized version has been saved
239  - If not, restore from backup: \`.claude-backups/\`
240  `;
241  
242    const reportPath = path.join(
243      BACKUP_DIR,
244      `optimization-report-${new Date().toISOString().split('T')[0]}.md`
245    );
246    fs.writeFileSync(reportPath, report);
247  
248    console.log(report);
249    console.log(`\nšŸ“„ Full report saved to: ${reportPath}`);
250  }
251  
252  /**
253   * Main execution
254   */
255  async function main() {
256    try {
257      console.log('šŸš€ CLAUDE.md Optimization Script\n');
258  
259      // Check for API key
260      if (!OPENROUTER_API_KEY) {
261        console.error('āŒ Error: OPENROUTER_API_KEY not found in environment');
262        process.exit(1);
263      }
264  
265      // Read current content
266      const originalContent = fs.readFileSync(CLAUDE_MD_PATH, 'utf-8');
267      console.log(`šŸ“– Current CLAUDE.md: ${originalContent.length.toLocaleString()} characters\n`);
268  
269      // Backup
270      const backupPath = backupClaudeMd();
271  
272      // Analyze
273      const analysis = await analyzeForOptimization(originalContent);
274  
275      if (!analysis) {
276        console.error('āŒ Analysis failed');
277        process.exit(1);
278      }
279  
280      console.log(`\nšŸ“Š Analysis complete:`);
281      console.log(`   - Found ${analysis.duplicates.length} duplication issues`);
282      console.log(`   - Estimated reduction: ~${analysis.metrics.estimated_reduction_percent}%\n`);
283  
284      // Optimize
285      const optimizedContent = await optimizeContent(originalContent, analysis);
286  
287      // Save optimized version
288      fs.writeFileSync(CLAUDE_MD_PATH, optimizedContent);
289  
290      console.log(`āœ“ Optimized CLAUDE.md saved\n`);
291  
292      // Generate report
293      generateReport(originalContent, optimizedContent, analysis);
294  
295      console.log('\nāœ… Optimization complete!');
296      console.log(`   Original backup: ${backupPath}`);
297      console.log(`   New CLAUDE.md: ${CLAUDE_MD_PATH}`);
298    } catch (error) {
299      console.error('\nāŒ Error during optimization:', error.message);
300      console.error(error.stack);
301      process.exit(1);
302    }
303  }
304  
305  main();