/ docs / 09-business / export-to-html.py
export-to-html.py
  1  #!/usr/bin/env python3
  2  """
  3  Markdown to HTML Converter
  4  
  5  Converts markdown files to styled HTML with working anchor links.
  6  Designed for business plan exports but works with any markdown file.
  7  
  8  Usage:
  9      python3 export-to-html.py input.md output.html
 10      python3 export-to-html.py 333Method-Business-Plan.md 333Method-Business-Plan.html
 11  """
 12  
 13  import re
 14  import sys
 15  
 16  def slugify(text):
 17      """Convert heading text to URL-friendly slug"""
 18      # Remove HTML tags
 19      text = re.sub(r'<[^>]+>', '', text)
 20      # Convert to lowercase
 21      text = text.lower()
 22      # Replace spaces and special chars with hyphens
 23      text = re.sub(r'[^\w\s-]', '', text)
 24      text = re.sub(r'[\s_]+', '-', text)
 25      text = re.sub(r'-+', '-', text)
 26      return text.strip('-')
 27  
 28  def extract_headings(md_content):
 29      """Extract all headings for TOC generation"""
 30      headings = []
 31      for match in re.finditer(r'^(#{2,4})\s+(.+)$', md_content, re.MULTILINE):
 32          level = len(match.group(1))
 33          text = match.group(2)
 34          slug = slugify(text)
 35          headings.append({
 36              'level': level,
 37              'text': text,
 38              'slug': slug
 39          })
 40      return headings
 41  
 42  def markdown_to_html(md_content):
 43      html = md_content
 44  
 45      # Convert headers with slugified IDs
 46      def replace_h1(match):
 47          text = match.group(1)
 48          slug = slugify(text)
 49          return f'<h1 id="{slug}">{text}</h1>'
 50  
 51      def replace_h2(match):
 52          text = match.group(1)
 53          slug = slugify(text)
 54          return f'<h2 id="{slug}">{text}</h2>'
 55  
 56      def replace_h3(match):
 57          text = match.group(1)
 58          slug = slugify(text)
 59          return f'<h3 id="{slug}">{text}</h3>'
 60  
 61      def replace_h4(match):
 62          text = match.group(1)
 63          slug = slugify(text)
 64          return f'<h4 id="{slug}">{text}</h4>'
 65  
 66      html = re.sub(r'^# (.+)$', replace_h1, html, flags=re.MULTILINE)
 67      html = re.sub(r'^## (.+)$', replace_h2, html, flags=re.MULTILINE)
 68      html = re.sub(r'^### (.+)$', replace_h3, html, flags=re.MULTILINE)
 69      html = re.sub(r'^#### (.+)$', replace_h4, html, flags=re.MULTILINE)
 70  
 71      # Convert tables (basic support)
 72      table_pattern = r'\|(.+)\|\n\|[-:\| ]+\|\n((?:\|.+\|\n?)+)'
 73  
 74      def convert_table(match):
 75          header = match.group(1)
 76          rows = match.group(2)
 77  
 78          # Process header
 79          header_cells = [f'<th>{cell.strip()}</th>' for cell in header.split('|') if cell.strip()]
 80          header_row = f'<tr>{"".join(header_cells)}</tr>'
 81  
 82          # Process rows
 83          body_rows = []
 84          for row in rows.strip().split('\n'):
 85              cells = [f'<td>{cell.strip()}</td>' for cell in row.split('|') if cell.strip()]
 86              if cells:
 87                  body_rows.append(f'<tr>{"".join(cells)}</tr>')
 88  
 89          return f'<table>\n<thead>{header_row}</thead>\n<tbody>{"".join(body_rows)}</tbody>\n</table>'
 90  
 91      html = re.sub(table_pattern, convert_table, html, flags=re.MULTILINE)
 92  
 93      # Convert bold
 94      html = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', html)
 95  
 96      # Convert italic
 97      html = re.sub(r'\*(.+?)\*', r'<em>\1</em>', html)
 98  
 99      # Convert links [text](url) to <a href="url">text</a>
100      html = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', r'<a href="\2">\1</a>', html)
101  
102      # Convert horizontal rules
103      html = re.sub(r'^---+$', '<hr>', html, flags=re.MULTILINE)
104  
105      # Convert line breaks
106      html = re.sub(r'^<br>$', '<br>', html, flags=re.MULTILINE)
107  
108      # Convert lists
109      html = re.sub(r'^\- (.+)$', r'<li>\1</li>', html, flags=re.MULTILINE)
110      html = re.sub(r'(<li>.*</li>\n?)+', r'<ul>\n\g<0></ul>\n', html)
111  
112      # Convert paragraphs (lines that aren't HTML tags)
113      lines = html.split('\n')
114      result = []
115      in_paragraph = False
116  
117      for line in lines:
118          stripped = line.strip()
119          if not stripped:
120              if in_paragraph:
121                  result.append('</p>')
122                  in_paragraph = False
123              result.append('')
124          elif stripped.startswith('<') or '|' in stripped:
125              if in_paragraph:
126                  result.append('</p>')
127                  in_paragraph = False
128              result.append(line)
129          else:
130              if not in_paragraph:
131                  result.append('<p>')
132                  in_paragraph = True
133              result.append(line)
134  
135      if in_paragraph:
136          result.append('</p>')
137  
138      return '\n'.join(result)
139  
140  def build_toc_html(headings):
141      """Build hierarchical TOC from headings list"""
142      if not headings:
143          return ""
144  
145      toc_html = ['<nav id="toc" class="toc">']
146      toc_html.append('<div class="toc-header">Contents</div>')
147      toc_html.append('<ul class="toc-list">')
148  
149      for heading in headings:
150          level_class = f"toc-level-{heading['level']}"
151          toc_html.append(
152              f'<li class="{level_class}">'
153              f'<a href="#{heading["slug"]}" class="toc-link" data-target="{heading["slug"]}">'
154              f'{heading["text"]}'
155              f'</a></li>'
156          )
157  
158      toc_html.append('</ul>')
159      toc_html.append('</nav>')
160      return '\n'.join(toc_html)
161  
162  def create_html_document(body_html, toc_html, title="Audit&Fix Business Plan"):
163      """Wrap HTML body in complete document with styling"""
164      return f'''<!DOCTYPE html>
165  <html lang="en">
166  <head>
167      <meta charset="UTF-8">
168      <meta name="viewport" content="width=device-width, initial-scale=1.0">
169      <title>{title}</title>
170      <style>
171          * {{
172              box-sizing: border-box;
173          }}
174          body {{
175              font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
176              line-height: 1.6;
177              margin: 0;
178              padding: 0;
179              color: #333;
180              display: flex;
181              min-height: 100vh;
182          }}
183          .toc {{
184              position: fixed;
185              left: 0;
186              top: 0;
187              width: 280px;
188              height: 100vh;
189              overflow-y: auto;
190              background: #f8f9fa;
191              border-right: 1px solid #e1e4e8;
192              padding: 20px;
193              z-index: 100;
194          }}
195          .toc-header {{
196              font-size: 18px;
197              font-weight: 600;
198              color: #2c3e50;
199              margin-bottom: 15px;
200              padding-bottom: 10px;
201              border-bottom: 2px solid #3498db;
202          }}
203          .toc-list {{
204              list-style: none;
205              padding: 0;
206              margin: 0;
207          }}
208          .toc-list li {{
209              margin: 0;
210              padding: 0;
211          }}
212          .toc-link {{
213              display: block;
214              padding: 6px 12px;
215              color: #586069;
216              text-decoration: none;
217              font-size: 14px;
218              border-left: 3px solid transparent;
219              transition: all 0.2s ease;
220          }}
221          .toc-link:hover {{
222              background: #e8eaed;
223              color: #2c3e50;
224              border-left-color: #3498db;
225          }}
226          .toc-link.active {{
227              background: #e3f2fd;
228              color: #1976d2;
229              border-left-color: #1976d2;
230              font-weight: 500;
231          }}
232          .toc-level-2 {{
233              padding-left: 0;
234          }}
235          .toc-level-3 .toc-link {{
236              padding-left: 24px;
237              font-size: 13px;
238          }}
239          .toc-level-4 .toc-link {{
240              padding-left: 36px;
241              font-size: 12px;
242          }}
243          main {{
244              margin-left: 280px;
245              padding: 40px;
246              max-width: 900px;
247              width: 100%;
248          }}
249          h1, h2, h3, h4 {{
250              scroll-margin-top: 20px;
251          }}
252          h1 {{
253              color: #2c3e50;
254              border-bottom: 3px solid #3498db;
255              padding-bottom: 10px;
256              margin-top: 0;
257          }}
258          h2 {{
259              color: #34495e;
260              border-bottom: 2px solid #95a5a6;
261              padding-bottom: 8px;
262              margin-top: 40px;
263          }}
264          h3 {{
265              color: #7f8c8d;
266              margin-top: 30px;
267          }}
268          h4 {{
269              color: #95a5a6;
270              margin-top: 20px;
271          }}
272          table {{
273              width: 100%;
274              border-collapse: collapse;
275              margin: 20px 0;
276          }}
277          th, td {{
278              border: 1px solid #ddd;
279              padding: 12px;
280              text-align: left;
281          }}
282          th {{
283              background-color: #3498db;
284              color: white;
285          }}
286          tr:nth-child(even) {{
287              background-color: #f2f2f2;
288          }}
289          pre {{
290              background-color: #f4f4f4;
291              border: 1px solid #ddd;
292              border-radius: 4px;
293              padding: 15px;
294              overflow-x: auto;
295          }}
296          code {{
297              font-family: 'Courier New', monospace;
298              font-size: 14px;
299          }}
300          hr {{
301              border: none;
302              border-top: 2px solid #ecf0f1;
303              margin: 30px 0;
304          }}
305          main ul {{
306              margin-left: 20px;
307          }}
308          main li {{
309              margin: 8px 0;
310          }}
311          a {{
312              color: #3498db;
313              text-decoration: none;
314          }}
315          a:hover {{
316              text-decoration: underline;
317          }}
318          /* Mobile responsive */
319          @media (max-width: 768px) {{
320              .toc {{
321                  width: 100%;
322                  height: auto;
323                  position: relative;
324                  border-right: none;
325                  border-bottom: 1px solid #e1e4e8;
326              }}
327              main {{
328                  margin-left: 0;
329                  padding: 20px;
330              }}
331          }}
332          @media print {{
333              .toc {{
334                  display: none;
335              }}
336              main {{
337                  margin-left: 0;
338                  max-width: 100%;
339              }}
340          }}
341      </style>
342  </head>
343  <body>
344  {toc_html}
345  <main>
346  {body_html}
347  </main>
348  <script>
349  // Scroll spy - highlight current section in TOC
350  document.addEventListener('DOMContentLoaded', function() {{
351      const tocLinks = document.querySelectorAll('.toc-link');
352      const headings = Array.from(document.querySelectorAll('h2, h3, h4')).filter(h => h.id);
353  
354      function updateActiveLink() {{
355          let currentHeading = null;
356          const scrollPos = window.scrollY + 100; // offset for better UX
357  
358          // Find the current heading based on scroll position
359          for (let i = headings.length - 1; i >= 0; i--) {{
360              if (headings[i].offsetTop <= scrollPos) {{
361                  currentHeading = headings[i];
362                  break;
363              }}
364          }}
365  
366          // Update active state
367          tocLinks.forEach(link => {{
368              link.classList.remove('active');
369              if (currentHeading && link.dataset.target === currentHeading.id) {{
370                  link.classList.add('active');
371                  // Scroll TOC to show active item
372                  link.scrollIntoView({{ block: 'nearest', behavior: 'smooth' }});
373              }}
374          }});
375      }}
376  
377      // Update on scroll (throttled)
378      let ticking = false;
379      window.addEventListener('scroll', function() {{
380          if (!ticking) {{
381              window.requestAnimationFrame(function() {{
382                  updateActiveLink();
383                  ticking = false;
384              }});
385              ticking = true;
386          }}
387      }});
388  
389      // Update on load
390      updateActiveLink();
391  
392      // Smooth scroll on TOC link click
393      tocLinks.forEach(link => {{
394          link.addEventListener('click', function(e) {{
395              e.preventDefault();
396              const targetId = this.getAttribute('href').substring(1);
397              const targetElement = document.getElementById(targetId);
398              if (targetElement) {{
399                  targetElement.scrollIntoView({{ behavior: 'smooth', block: 'start' }});
400                  // Update URL without triggering navigation
401                  history.pushState(null, null, '#' + targetId);
402              }}
403          }});
404      }});
405  }});
406  </script>
407  </body>
408  </html>'''
409  
410  def main():
411      if len(sys.argv) != 3:
412          print("Usage: python3 export-to-html.py input.md output.html")
413          print("Example: python3 export-to-html.py 333Method-Business-Plan.md 333Method-Business-Plan.html")
414          sys.exit(1)
415  
416      input_file = sys.argv[1]
417      output_file = sys.argv[2]
418  
419      # Read markdown file
420      try:
421          with open(input_file, 'r', encoding='utf-8') as f:
422              md_content = f.read()
423      except FileNotFoundError:
424          print(f"Error: Input file '{input_file}' not found")
425          sys.exit(1)
426  
427      # Extract headings for TOC
428      headings = extract_headings(md_content)
429  
430      # Convert to HTML
431      html_body = markdown_to_html(md_content)
432  
433      # Build TOC
434      toc_html = build_toc_html(headings)
435  
436      # Extract title from first H1 if present
437      title_match = re.search(r'<h1[^>]*>([^<]+)</h1>', html_body)
438      title = title_match.group(1) if title_match else "Document"
439  
440      # Create full HTML document
441      html_document = create_html_document(html_body, toc_html, title)
442  
443      # Write output
444      try:
445          with open(output_file, 'w', encoding='utf-8') as f:
446              f.write(html_document)
447          print(f"✅ HTML file created: {output_file}")
448          print(f"   Size: {len(html_document):,} bytes")
449      except Exception as e:
450          print(f"Error writing output file: {e}")
451          sys.exit(1)
452  
453  if __name__ == '__main__':
454      main()