export-to-html.py
1 #!/usr/bin/env python3 2 """ 3 Markdown to HTML Converter 4 5 Converts markdown files to styled HTML with working anchor links. 6 Designed for business plan exports but works with any markdown file. 7 8 Usage: 9 python3 export-to-html.py input.md output.html 10 python3 export-to-html.py 333Method-Business-Plan.md 333Method-Business-Plan.html 11 """ 12 13 import re 14 import sys 15 16 def slugify(text): 17 """Convert heading text to URL-friendly slug""" 18 # Remove HTML tags 19 text = re.sub(r'<[^>]+>', '', text) 20 # Convert to lowercase 21 text = text.lower() 22 # Replace spaces and special chars with hyphens 23 text = re.sub(r'[^\w\s-]', '', text) 24 text = re.sub(r'[\s_]+', '-', text) 25 text = re.sub(r'-+', '-', text) 26 return text.strip('-') 27 28 def extract_headings(md_content): 29 """Extract all headings for TOC generation""" 30 headings = [] 31 for match in re.finditer(r'^(#{2,4})\s+(.+)$', md_content, re.MULTILINE): 32 level = len(match.group(1)) 33 text = match.group(2) 34 slug = slugify(text) 35 headings.append({ 36 'level': level, 37 'text': text, 38 'slug': slug 39 }) 40 return headings 41 42 def markdown_to_html(md_content): 43 html = md_content 44 45 # Convert headers with slugified IDs 46 def replace_h1(match): 47 text = match.group(1) 48 slug = slugify(text) 49 return f'<h1 id="{slug}">{text}</h1>' 50 51 def replace_h2(match): 52 text = match.group(1) 53 slug = slugify(text) 54 return f'<h2 id="{slug}">{text}</h2>' 55 56 def replace_h3(match): 57 text = match.group(1) 58 slug = slugify(text) 59 return f'<h3 id="{slug}">{text}</h3>' 60 61 def replace_h4(match): 62 text = match.group(1) 63 slug = slugify(text) 64 return f'<h4 id="{slug}">{text}</h4>' 65 66 html = re.sub(r'^# (.+)$', replace_h1, html, flags=re.MULTILINE) 67 html = re.sub(r'^## (.+)$', replace_h2, html, flags=re.MULTILINE) 68 html = re.sub(r'^### (.+)$', replace_h3, html, flags=re.MULTILINE) 69 html = re.sub(r'^#### (.+)$', replace_h4, html, flags=re.MULTILINE) 70 71 # Convert tables (basic support) 72 table_pattern = r'\|(.+)\|\n\|[-:\| ]+\|\n((?:\|.+\|\n?)+)' 73 74 def convert_table(match): 75 header = match.group(1) 76 rows = match.group(2) 77 78 # Process header 79 header_cells = [f'<th>{cell.strip()}</th>' for cell in header.split('|') if cell.strip()] 80 header_row = f'<tr>{"".join(header_cells)}</tr>' 81 82 # Process rows 83 body_rows = [] 84 for row in rows.strip().split('\n'): 85 cells = [f'<td>{cell.strip()}</td>' for cell in row.split('|') if cell.strip()] 86 if cells: 87 body_rows.append(f'<tr>{"".join(cells)}</tr>') 88 89 return f'<table>\n<thead>{header_row}</thead>\n<tbody>{"".join(body_rows)}</tbody>\n</table>' 90 91 html = re.sub(table_pattern, convert_table, html, flags=re.MULTILINE) 92 93 # Convert bold 94 html = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', html) 95 96 # Convert italic 97 html = re.sub(r'\*(.+?)\*', r'<em>\1</em>', html) 98 99 # Convert links [text](url) to <a href="url">text</a> 100 html = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', r'<a href="\2">\1</a>', html) 101 102 # Convert horizontal rules 103 html = re.sub(r'^---+$', '<hr>', html, flags=re.MULTILINE) 104 105 # Convert line breaks 106 html = re.sub(r'^<br>$', '<br>', html, flags=re.MULTILINE) 107 108 # Convert lists 109 html = re.sub(r'^\- (.+)$', r'<li>\1</li>', html, flags=re.MULTILINE) 110 html = re.sub(r'(<li>.*</li>\n?)+', r'<ul>\n\g<0></ul>\n', html) 111 112 # Convert paragraphs (lines that aren't HTML tags) 113 lines = html.split('\n') 114 result = [] 115 in_paragraph = False 116 117 for line in lines: 118 stripped = line.strip() 119 if not stripped: 120 if in_paragraph: 121 result.append('</p>') 122 in_paragraph = False 123 result.append('') 124 elif stripped.startswith('<') or '|' in stripped: 125 if in_paragraph: 126 result.append('</p>') 127 in_paragraph = False 128 result.append(line) 129 else: 130 if not in_paragraph: 131 result.append('<p>') 132 in_paragraph = True 133 result.append(line) 134 135 if in_paragraph: 136 result.append('</p>') 137 138 return '\n'.join(result) 139 140 def build_toc_html(headings): 141 """Build hierarchical TOC from headings list""" 142 if not headings: 143 return "" 144 145 toc_html = ['<nav id="toc" class="toc">'] 146 toc_html.append('<div class="toc-header">Contents</div>') 147 toc_html.append('<ul class="toc-list">') 148 149 for heading in headings: 150 level_class = f"toc-level-{heading['level']}" 151 toc_html.append( 152 f'<li class="{level_class}">' 153 f'<a href="#{heading["slug"]}" class="toc-link" data-target="{heading["slug"]}">' 154 f'{heading["text"]}' 155 f'</a></li>' 156 ) 157 158 toc_html.append('</ul>') 159 toc_html.append('</nav>') 160 return '\n'.join(toc_html) 161 162 def create_html_document(body_html, toc_html, title="Audit&Fix Business Plan"): 163 """Wrap HTML body in complete document with styling""" 164 return f'''<!DOCTYPE html> 165 <html lang="en"> 166 <head> 167 <meta charset="UTF-8"> 168 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 169 <title>{title}</title> 170 <style> 171 * {{ 172 box-sizing: border-box; 173 }} 174 body {{ 175 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; 176 line-height: 1.6; 177 margin: 0; 178 padding: 0; 179 color: #333; 180 display: flex; 181 min-height: 100vh; 182 }} 183 .toc {{ 184 position: fixed; 185 left: 0; 186 top: 0; 187 width: 280px; 188 height: 100vh; 189 overflow-y: auto; 190 background: #f8f9fa; 191 border-right: 1px solid #e1e4e8; 192 padding: 20px; 193 z-index: 100; 194 }} 195 .toc-header {{ 196 font-size: 18px; 197 font-weight: 600; 198 color: #2c3e50; 199 margin-bottom: 15px; 200 padding-bottom: 10px; 201 border-bottom: 2px solid #3498db; 202 }} 203 .toc-list {{ 204 list-style: none; 205 padding: 0; 206 margin: 0; 207 }} 208 .toc-list li {{ 209 margin: 0; 210 padding: 0; 211 }} 212 .toc-link {{ 213 display: block; 214 padding: 6px 12px; 215 color: #586069; 216 text-decoration: none; 217 font-size: 14px; 218 border-left: 3px solid transparent; 219 transition: all 0.2s ease; 220 }} 221 .toc-link:hover {{ 222 background: #e8eaed; 223 color: #2c3e50; 224 border-left-color: #3498db; 225 }} 226 .toc-link.active {{ 227 background: #e3f2fd; 228 color: #1976d2; 229 border-left-color: #1976d2; 230 font-weight: 500; 231 }} 232 .toc-level-2 {{ 233 padding-left: 0; 234 }} 235 .toc-level-3 .toc-link {{ 236 padding-left: 24px; 237 font-size: 13px; 238 }} 239 .toc-level-4 .toc-link {{ 240 padding-left: 36px; 241 font-size: 12px; 242 }} 243 main {{ 244 margin-left: 280px; 245 padding: 40px; 246 max-width: 900px; 247 width: 100%; 248 }} 249 h1, h2, h3, h4 {{ 250 scroll-margin-top: 20px; 251 }} 252 h1 {{ 253 color: #2c3e50; 254 border-bottom: 3px solid #3498db; 255 padding-bottom: 10px; 256 margin-top: 0; 257 }} 258 h2 {{ 259 color: #34495e; 260 border-bottom: 2px solid #95a5a6; 261 padding-bottom: 8px; 262 margin-top: 40px; 263 }} 264 h3 {{ 265 color: #7f8c8d; 266 margin-top: 30px; 267 }} 268 h4 {{ 269 color: #95a5a6; 270 margin-top: 20px; 271 }} 272 table {{ 273 width: 100%; 274 border-collapse: collapse; 275 margin: 20px 0; 276 }} 277 th, td {{ 278 border: 1px solid #ddd; 279 padding: 12px; 280 text-align: left; 281 }} 282 th {{ 283 background-color: #3498db; 284 color: white; 285 }} 286 tr:nth-child(even) {{ 287 background-color: #f2f2f2; 288 }} 289 pre {{ 290 background-color: #f4f4f4; 291 border: 1px solid #ddd; 292 border-radius: 4px; 293 padding: 15px; 294 overflow-x: auto; 295 }} 296 code {{ 297 font-family: 'Courier New', monospace; 298 font-size: 14px; 299 }} 300 hr {{ 301 border: none; 302 border-top: 2px solid #ecf0f1; 303 margin: 30px 0; 304 }} 305 main ul {{ 306 margin-left: 20px; 307 }} 308 main li {{ 309 margin: 8px 0; 310 }} 311 a {{ 312 color: #3498db; 313 text-decoration: none; 314 }} 315 a:hover {{ 316 text-decoration: underline; 317 }} 318 /* Mobile responsive */ 319 @media (max-width: 768px) {{ 320 .toc {{ 321 width: 100%; 322 height: auto; 323 position: relative; 324 border-right: none; 325 border-bottom: 1px solid #e1e4e8; 326 }} 327 main {{ 328 margin-left: 0; 329 padding: 20px; 330 }} 331 }} 332 @media print {{ 333 .toc {{ 334 display: none; 335 }} 336 main {{ 337 margin-left: 0; 338 max-width: 100%; 339 }} 340 }} 341 </style> 342 </head> 343 <body> 344 {toc_html} 345 <main> 346 {body_html} 347 </main> 348 <script> 349 // Scroll spy - highlight current section in TOC 350 document.addEventListener('DOMContentLoaded', function() {{ 351 const tocLinks = document.querySelectorAll('.toc-link'); 352 const headings = Array.from(document.querySelectorAll('h2, h3, h4')).filter(h => h.id); 353 354 function updateActiveLink() {{ 355 let currentHeading = null; 356 const scrollPos = window.scrollY + 100; // offset for better UX 357 358 // Find the current heading based on scroll position 359 for (let i = headings.length - 1; i >= 0; i--) {{ 360 if (headings[i].offsetTop <= scrollPos) {{ 361 currentHeading = headings[i]; 362 break; 363 }} 364 }} 365 366 // Update active state 367 tocLinks.forEach(link => {{ 368 link.classList.remove('active'); 369 if (currentHeading && link.dataset.target === currentHeading.id) {{ 370 link.classList.add('active'); 371 // Scroll TOC to show active item 372 link.scrollIntoView({{ block: 'nearest', behavior: 'smooth' }}); 373 }} 374 }}); 375 }} 376 377 // Update on scroll (throttled) 378 let ticking = false; 379 window.addEventListener('scroll', function() {{ 380 if (!ticking) {{ 381 window.requestAnimationFrame(function() {{ 382 updateActiveLink(); 383 ticking = false; 384 }}); 385 ticking = true; 386 }} 387 }}); 388 389 // Update on load 390 updateActiveLink(); 391 392 // Smooth scroll on TOC link click 393 tocLinks.forEach(link => {{ 394 link.addEventListener('click', function(e) {{ 395 e.preventDefault(); 396 const targetId = this.getAttribute('href').substring(1); 397 const targetElement = document.getElementById(targetId); 398 if (targetElement) {{ 399 targetElement.scrollIntoView({{ behavior: 'smooth', block: 'start' }}); 400 // Update URL without triggering navigation 401 history.pushState(null, null, '#' + targetId); 402 }} 403 }}); 404 }}); 405 }}); 406 </script> 407 </body> 408 </html>''' 409 410 def main(): 411 if len(sys.argv) != 3: 412 print("Usage: python3 export-to-html.py input.md output.html") 413 print("Example: python3 export-to-html.py 333Method-Business-Plan.md 333Method-Business-Plan.html") 414 sys.exit(1) 415 416 input_file = sys.argv[1] 417 output_file = sys.argv[2] 418 419 # Read markdown file 420 try: 421 with open(input_file, 'r', encoding='utf-8') as f: 422 md_content = f.read() 423 except FileNotFoundError: 424 print(f"Error: Input file '{input_file}' not found") 425 sys.exit(1) 426 427 # Extract headings for TOC 428 headings = extract_headings(md_content) 429 430 # Convert to HTML 431 html_body = markdown_to_html(md_content) 432 433 # Build TOC 434 toc_html = build_toc_html(headings) 435 436 # Extract title from first H1 if present 437 title_match = re.search(r'<h1[^>]*>([^<]+)</h1>', html_body) 438 title = title_match.group(1) if title_match else "Document" 439 440 # Create full HTML document 441 html_document = create_html_document(html_body, toc_html, title) 442 443 # Write output 444 try: 445 with open(output_file, 'w', encoding='utf-8') as f: 446 f.write(html_document) 447 print(f"✅ HTML file created: {output_file}") 448 print(f" Size: {len(html_document):,} bytes") 449 except Exception as e: 450 print(f"Error writing output file: {e}") 451 sys.exit(1) 452 453 if __name__ == '__main__': 454 main()