Cradicle Explorer

generate-docx.py
  1  #!/usr/bin/env python3
  2  """
  3  Generate auditandfix-business-plan.docx from the markdown source.
  4  Maps sections to the SBT Business Plan Template structure.
  5  
  6  Usage: python3 docs/09-business/generate-docx.py
  7  """
  8  
  9  import re
 10  from docx import Document
 11  from docx.shared import Pt, Inches, RGBColor
 12  from docx.enum.text import WD_ALIGN_PARAGRAPH
 13  from docx.enum.table import WD_TABLE_ALIGNMENT
 14  from docx.oxml.ns import qn
 15  
 16  
 17  def read_markdown(path):
 18      with open(path, 'r', encoding='utf-8') as f:
 19          return f.read()
 20  
 21  
 22  def add_formatted_text(paragraph, text):
 23      """Add text to a paragraph, handling **bold** and *italic* markdown."""
 24      # Split on bold and italic markers
 25      parts = re.split(r'(\*\*\*[^*]+\*\*\*|\*\*[^*]+\*\*|\*[^*]+\*)', text)
 26      for part in parts:
 27          if part.startswith('***') and part.endswith('***'):
 28              run = paragraph.add_run(part[3:-3])
 29              run.bold = True
 30              run.italic = True
 31          elif part.startswith('**') and part.endswith('**'):
 32              run = paragraph.add_run(part[2:-2])
 33              run.bold = True
 34          elif part.startswith('*') and part.endswith('*'):
 35              run = paragraph.add_run(part[1:-1])
 36              run.italic = True
 37          else:
 38              paragraph.add_run(part)
 39  
 40  
 41  def clean_text(text):
 42      """Remove markdown links but keep text, remove HTML comments."""
 43      # Remove HTML comments
 44      text = re.sub(r'<!--.*?-->', '', text)
 45      # Convert markdown links [text](url) to just text
 46      text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
 47      # Remove ~~strikethrough~~
 48      text = re.sub(r'~~([^~]+)~~', r'\1', text)
 49      # Clean up checkmarks and similar
 50      text = text.replace('✅ ', '')
 51      return text.strip()
 52  
 53  
 54  def parse_table(lines, start_idx):
 55      """Parse a markdown table starting at start_idx. Returns (rows, end_idx)."""
 56      rows = []
 57      i = start_idx
 58      while i < len(lines):
 59          line = lines[i].strip()
 60          if not line.startswith('|'):
 61              break
 62          # Skip separator line
 63          if re.match(r'^\|[\s\-:]+\|', line):
 64              i += 1
 65              continue
 66          cells = [clean_text(c.strip()) for c in line.split('|')[1:-1]]
 67          rows.append(cells)
 68          i += 1
 69      return rows, i
 70  
 71  
 72  def add_table_to_doc(doc, rows, first_row_header=True):
 73      """Add a table to the document from parsed rows."""
 74      if not rows:
 75          return
 76  
 77      num_cols = max(len(r) for r in rows)
 78      # Pad rows to same length
 79      for r in rows:
 80          while len(r) < num_cols:
 81              r.append('')
 82  
 83      table = doc.add_table(rows=len(rows), cols=num_cols, style='Table Grid')
 84      table.alignment = WD_TABLE_ALIGNMENT.LEFT
 85  
 86      for i, row_data in enumerate(rows):
 87          for j, cell_text in enumerate(row_data):
 88              cell = table.cell(i, j)
 89              cell.text = ''
 90              p = cell.paragraphs[0]
 91              # Handle <br> tags in cell content
 92              parts = re.split(r'<br\s*/?>', cell_text)
 93              for k, part in enumerate(parts):
 94                  if k > 0:
 95                      p.add_run('\n')
 96                  add_formatted_text(p, part.strip())
 97              p.style = doc.styles['Normal']
 98              # Bold header row
 99              if i == 0 and first_row_header:
100                  for run in p.runs:
101                      run.bold = True
102  
103      doc.add_paragraph()  # spacing after table
104  
105  
106  def get_sections(md_text):
107      """Split markdown into sections by ## headers."""
108      lines = md_text.split('\n')
109      sections = {}
110      current_section = None
111      current_lines = []
112  
113      for line in lines:
114          if line.startswith('## '):
115              if current_section:
116                  sections[current_section] = '\n'.join(current_lines)
117              current_section = clean_text(line[3:].strip().strip('*'))
118              current_lines = []
119          else:
120              current_lines.append(line)
121  
122      if current_section:
123          sections[current_section] = '\n'.join(current_lines)
124  
125      return sections
126  
127  
128  def process_section_content(doc, content, heading_level='Heading 4'):
129      """Process markdown content and add it to the document."""
130      lines = content.split('\n')
131      i = 0
132      in_code_block = False
133  
134      while i < len(lines):
135          line = lines[i]
136  
137          # Code blocks
138          if line.strip().startswith('```'):
139              if in_code_block:
140                  in_code_block = False
141                  i += 1
142                  continue
143              else:
144                  in_code_block = True
145                  i += 1
146                  continue
147  
148          if in_code_block:
149              p = doc.add_paragraph(line, style='Normal')
150              # Make code slightly different
151              for run in p.runs:
152                  run.font.name = 'Courier New'
153                  run.font.size = Pt(9)
154              i += 1
155              continue
156  
157          # Skip horizontal rules
158          if line.strip() == '---':
159              i += 1
160              continue
161  
162          # Skip empty lines
163          if not line.strip():
164              i += 1
165              continue
166  
167          # Tables
168          if line.strip().startswith('|') and i + 1 < len(lines) and lines[i + 1].strip().startswith('|'):
169              rows, end_idx = parse_table(lines, i)
170              add_table_to_doc(doc, rows)
171              i = end_idx
172              continue
173  
174          # Subsection headers (### level)
175          if line.startswith('### '):
176              header_text = clean_text(line[4:].strip().strip('*'))
177              doc.add_heading(header_text, level=4)
178              i += 1
179              continue
180  
181          # Sub-subsection headers (#### level)
182          if line.startswith('#### '):
183              header_text = clean_text(line[5:].strip().strip('*'))
184              p = doc.add_paragraph()
185              run = p.add_run(header_text)
186              run.bold = True
187              run.font.size = Pt(11)
188              i += 1
189              continue
190  
191          # Bullet points
192          if line.strip().startswith('- ') or line.strip().startswith('* '):
193              bullet_text = clean_text(line.strip()[2:])
194              p = doc.add_paragraph(style='List Bullet')
195              add_formatted_text(p, bullet_text)
196              i += 1
197              continue
198  
199          # Numbered lists
200          m = re.match(r'^\s*(\d+)\.\s+(.*)', line)
201          if m:
202              list_text = clean_text(m.group(2))
203              p = doc.add_paragraph(style='List Number')
204              add_formatted_text(p, list_text)
205              i += 1
206              continue
207  
208          # Indented bullet (sub-bullet)
209          if line.strip().startswith('  - ') or line.strip().startswith('  * '):
210              bullet_text = clean_text(line.strip()[2:].lstrip('- *'))
211              p = doc.add_paragraph(style='List Bullet 2')
212              add_formatted_text(p, bullet_text)
213              i += 1
214              continue
215  
216          # Block quote (> )
217          if line.strip().startswith('> '):
218              quote_text = clean_text(line.strip()[2:])
219              p = doc.add_paragraph()
220              add_formatted_text(p, quote_text)
221              p.paragraph_format.left_indent = Inches(0.5)
222              run = p.runs[0] if p.runs else None
223              if run:
224                  run.italic = True
225              i += 1
226              continue
227  
228          # Regular paragraph
229          text = clean_text(line.strip())
230          if text:
231              p = doc.add_paragraph()
232              add_formatted_text(p, text)
233          i += 1
234  
235  
236  # Sections to skip (financial spreadsheets content belongs in Excel)
237  SKIP_SECTIONS = {
238      'Financial Spreadsheets',
239      'Table of Contents',
240      '1. Establishment (Start-Up) Costs',
241      '2. Break-Even Point Calculation',
242      '3. Personal Budget (Cost of Living)',
243      '4. Cash Flow Forecast - Year 1 (Monthly)',
244      '5. Cash Flow Forecast - Year 2 (Quarterly)',
245      '6. Profit & Loss Forecast - Year 1 (Quarterly)',
246      '7. Profit & Loss Forecast - Year 2 (Quarterly)',
247      '8. Assumptions and Notes',
248      '9. Key Performance Indicators (KPIs)',
249      'Summary',
250      '11. Sources and Assumptions',
251  }
252  
253  
254  def main():
255      md_path = '/home/jason/code/333Method/docs/09-business/auditandfix-business-plan.md'
256      out_path = '/home/jason/code/333Method/docs/09-business/auditandfix-business-plan.docx'
257  
258      md_text = read_markdown(md_path)
259      lines = md_text.split('\n')
260  
261      doc = Document()
262  
263      # Set default font
264      style = doc.styles['Normal']
265      style.font.name = 'Calibri'
266      style.font.size = Pt(11)
267  
268      # Title
269      title = doc.add_heading('Audit&Fix Business Plan', level=0)
270      title.alignment = WD_ALIGN_PARAGRAPH.CENTER
271  
272      p = doc.add_paragraph()
273      p.alignment = WD_ALIGN_PARAGRAPH.CENTER
274      run = p.add_run('Date: March 9, 2026')
275      run.font.size = Pt(12)
276  
277      doc.add_paragraph()  # spacing
278  
279      # Now process the document section by section
280      # We track which ## section we're in and skip the ones in SKIP_SECTIONS
281      current_h2 = None
282      skip_current = False
283      section_lines = []
284  
285      i = 0
286      while i < len(lines):
287          line = lines[i]
288  
289          # Detect ## headers
290          if line.startswith('## '):
291              # Process any accumulated content from previous section
292              if current_h2 and not skip_current and section_lines:
293                  process_section_content(doc, '\n'.join(section_lines))
294  
295              header_text = clean_text(line[3:].strip().strip('*'))
296  
297              # Skip Table of Contents and Date
298              if header_text in ('Contents',) or header_text.startswith('Date:'):
299                  current_h2 = header_text
300                  skip_current = True
301                  section_lines = []
302                  i += 1
303                  continue
304  
305              # Check if this section should be skipped
306              if header_text in SKIP_SECTIONS:
307                  current_h2 = header_text
308                  skip_current = True
309                  section_lines = []
310                  i += 1
311                  continue
312  
313              # Add as Heading 1 (major sections)
314              current_h2 = header_text
315              skip_current = False
316              section_lines = []
317              doc.add_heading(header_text, level=1)
318              i += 1
319              continue
320  
321          # Skip the title line
322          if line.startswith('# ') and i < 3:
323              i += 1
324              continue
325  
326          if not skip_current:
327              section_lines.append(line)
328  
329          i += 1
330  
331      # Process remaining content
332      if current_h2 and not skip_current and section_lines:
333          process_section_content(doc, '\n'.join(section_lines))
334  
335      # Save
336      doc.save(out_path)
337      print(f'Created: {out_path}')
338  
339      # Verify
340      verify_doc = Document(out_path)
341      headings = [p.text for p in verify_doc.paragraphs if p.style.name.startswith('Heading')]
342      print(f'Total paragraphs: {len(verify_doc.paragraphs)}')
343      print(f'Total tables: {len(verify_doc.tables)}')
344      print(f'Heading 1 sections:')
345      for h in headings:
346          if any(p.style.name == 'Heading 1' and p.text == h for p in verify_doc.paragraphs):
347              print(f'  - {h}')
348  
349  
350  if __name__ == '__main__':
351      main()