generate-docx.py
1 #!/usr/bin/env python3 2 """ 3 Generate auditandfix-business-plan.docx from the markdown source. 4 Maps sections to the SBT Business Plan Template structure. 5 6 Usage: python3 docs/09-business/generate-docx.py 7 """ 8 9 import re 10 from docx import Document 11 from docx.shared import Pt, Inches, RGBColor 12 from docx.enum.text import WD_ALIGN_PARAGRAPH 13 from docx.enum.table import WD_TABLE_ALIGNMENT 14 from docx.oxml.ns import qn 15 16 17 def read_markdown(path): 18 with open(path, 'r', encoding='utf-8') as f: 19 return f.read() 20 21 22 def add_formatted_text(paragraph, text): 23 """Add text to a paragraph, handling **bold** and *italic* markdown.""" 24 # Split on bold and italic markers 25 parts = re.split(r'(\*\*\*[^*]+\*\*\*|\*\*[^*]+\*\*|\*[^*]+\*)', text) 26 for part in parts: 27 if part.startswith('***') and part.endswith('***'): 28 run = paragraph.add_run(part[3:-3]) 29 run.bold = True 30 run.italic = True 31 elif part.startswith('**') and part.endswith('**'): 32 run = paragraph.add_run(part[2:-2]) 33 run.bold = True 34 elif part.startswith('*') and part.endswith('*'): 35 run = paragraph.add_run(part[1:-1]) 36 run.italic = True 37 else: 38 paragraph.add_run(part) 39 40 41 def clean_text(text): 42 """Remove markdown links but keep text, remove HTML comments.""" 43 # Remove HTML comments 44 text = re.sub(r'<!--.*?-->', '', text) 45 # Convert markdown links [text](url) to just text 46 text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) 47 # Remove ~~strikethrough~~ 48 text = re.sub(r'~~([^~]+)~~', r'\1', text) 49 # Clean up checkmarks and similar 50 text = text.replace('✅ ', '') 51 return text.strip() 52 53 54 def parse_table(lines, start_idx): 55 """Parse a markdown table starting at start_idx. Returns (rows, end_idx).""" 56 rows = [] 57 i = start_idx 58 while i < len(lines): 59 line = lines[i].strip() 60 if not line.startswith('|'): 61 break 62 # Skip separator line 63 if re.match(r'^\|[\s\-:]+\|', line): 64 i += 1 65 continue 66 cells = [clean_text(c.strip()) for c in line.split('|')[1:-1]] 67 rows.append(cells) 68 i += 1 69 return rows, i 70 71 72 def add_table_to_doc(doc, rows, first_row_header=True): 73 """Add a table to the document from parsed rows.""" 74 if not rows: 75 return 76 77 num_cols = max(len(r) for r in rows) 78 # Pad rows to same length 79 for r in rows: 80 while len(r) < num_cols: 81 r.append('') 82 83 table = doc.add_table(rows=len(rows), cols=num_cols, style='Table Grid') 84 table.alignment = WD_TABLE_ALIGNMENT.LEFT 85 86 for i, row_data in enumerate(rows): 87 for j, cell_text in enumerate(row_data): 88 cell = table.cell(i, j) 89 cell.text = '' 90 p = cell.paragraphs[0] 91 # Handle <br> tags in cell content 92 parts = re.split(r'<br\s*/?>', cell_text) 93 for k, part in enumerate(parts): 94 if k > 0: 95 p.add_run('\n') 96 add_formatted_text(p, part.strip()) 97 p.style = doc.styles['Normal'] 98 # Bold header row 99 if i == 0 and first_row_header: 100 for run in p.runs: 101 run.bold = True 102 103 doc.add_paragraph() # spacing after table 104 105 106 def get_sections(md_text): 107 """Split markdown into sections by ## headers.""" 108 lines = md_text.split('\n') 109 sections = {} 110 current_section = None 111 current_lines = [] 112 113 for line in lines: 114 if line.startswith('## '): 115 if current_section: 116 sections[current_section] = '\n'.join(current_lines) 117 current_section = clean_text(line[3:].strip().strip('*')) 118 current_lines = [] 119 else: 120 current_lines.append(line) 121 122 if current_section: 123 sections[current_section] = '\n'.join(current_lines) 124 125 return sections 126 127 128 def process_section_content(doc, content, heading_level='Heading 4'): 129 """Process markdown content and add it to the document.""" 130 lines = content.split('\n') 131 i = 0 132 in_code_block = False 133 134 while i < len(lines): 135 line = lines[i] 136 137 # Code blocks 138 if line.strip().startswith('```'): 139 if in_code_block: 140 in_code_block = False 141 i += 1 142 continue 143 else: 144 in_code_block = True 145 i += 1 146 continue 147 148 if in_code_block: 149 p = doc.add_paragraph(line, style='Normal') 150 # Make code slightly different 151 for run in p.runs: 152 run.font.name = 'Courier New' 153 run.font.size = Pt(9) 154 i += 1 155 continue 156 157 # Skip horizontal rules 158 if line.strip() == '---': 159 i += 1 160 continue 161 162 # Skip empty lines 163 if not line.strip(): 164 i += 1 165 continue 166 167 # Tables 168 if line.strip().startswith('|') and i + 1 < len(lines) and lines[i + 1].strip().startswith('|'): 169 rows, end_idx = parse_table(lines, i) 170 add_table_to_doc(doc, rows) 171 i = end_idx 172 continue 173 174 # Subsection headers (### level) 175 if line.startswith('### '): 176 header_text = clean_text(line[4:].strip().strip('*')) 177 doc.add_heading(header_text, level=4) 178 i += 1 179 continue 180 181 # Sub-subsection headers (#### level) 182 if line.startswith('#### '): 183 header_text = clean_text(line[5:].strip().strip('*')) 184 p = doc.add_paragraph() 185 run = p.add_run(header_text) 186 run.bold = True 187 run.font.size = Pt(11) 188 i += 1 189 continue 190 191 # Bullet points 192 if line.strip().startswith('- ') or line.strip().startswith('* '): 193 bullet_text = clean_text(line.strip()[2:]) 194 p = doc.add_paragraph(style='List Bullet') 195 add_formatted_text(p, bullet_text) 196 i += 1 197 continue 198 199 # Numbered lists 200 m = re.match(r'^\s*(\d+)\.\s+(.*)', line) 201 if m: 202 list_text = clean_text(m.group(2)) 203 p = doc.add_paragraph(style='List Number') 204 add_formatted_text(p, list_text) 205 i += 1 206 continue 207 208 # Indented bullet (sub-bullet) 209 if line.strip().startswith(' - ') or line.strip().startswith(' * '): 210 bullet_text = clean_text(line.strip()[2:].lstrip('- *')) 211 p = doc.add_paragraph(style='List Bullet 2') 212 add_formatted_text(p, bullet_text) 213 i += 1 214 continue 215 216 # Block quote (> ) 217 if line.strip().startswith('> '): 218 quote_text = clean_text(line.strip()[2:]) 219 p = doc.add_paragraph() 220 add_formatted_text(p, quote_text) 221 p.paragraph_format.left_indent = Inches(0.5) 222 run = p.runs[0] if p.runs else None 223 if run: 224 run.italic = True 225 i += 1 226 continue 227 228 # Regular paragraph 229 text = clean_text(line.strip()) 230 if text: 231 p = doc.add_paragraph() 232 add_formatted_text(p, text) 233 i += 1 234 235 236 # Sections to skip (financial spreadsheets content belongs in Excel) 237 SKIP_SECTIONS = { 238 'Financial Spreadsheets', 239 'Table of Contents', 240 '1. Establishment (Start-Up) Costs', 241 '2. Break-Even Point Calculation', 242 '3. Personal Budget (Cost of Living)', 243 '4. Cash Flow Forecast - Year 1 (Monthly)', 244 '5. Cash Flow Forecast - Year 2 (Quarterly)', 245 '6. Profit & Loss Forecast - Year 1 (Quarterly)', 246 '7. Profit & Loss Forecast - Year 2 (Quarterly)', 247 '8. Assumptions and Notes', 248 '9. Key Performance Indicators (KPIs)', 249 'Summary', 250 '11. Sources and Assumptions', 251 } 252 253 254 def main(): 255 md_path = '/home/jason/code/333Method/docs/09-business/auditandfix-business-plan.md' 256 out_path = '/home/jason/code/333Method/docs/09-business/auditandfix-business-plan.docx' 257 258 md_text = read_markdown(md_path) 259 lines = md_text.split('\n') 260 261 doc = Document() 262 263 # Set default font 264 style = doc.styles['Normal'] 265 style.font.name = 'Calibri' 266 style.font.size = Pt(11) 267 268 # Title 269 title = doc.add_heading('Audit&Fix Business Plan', level=0) 270 title.alignment = WD_ALIGN_PARAGRAPH.CENTER 271 272 p = doc.add_paragraph() 273 p.alignment = WD_ALIGN_PARAGRAPH.CENTER 274 run = p.add_run('Date: March 9, 2026') 275 run.font.size = Pt(12) 276 277 doc.add_paragraph() # spacing 278 279 # Now process the document section by section 280 # We track which ## section we're in and skip the ones in SKIP_SECTIONS 281 current_h2 = None 282 skip_current = False 283 section_lines = [] 284 285 i = 0 286 while i < len(lines): 287 line = lines[i] 288 289 # Detect ## headers 290 if line.startswith('## '): 291 # Process any accumulated content from previous section 292 if current_h2 and not skip_current and section_lines: 293 process_section_content(doc, '\n'.join(section_lines)) 294 295 header_text = clean_text(line[3:].strip().strip('*')) 296 297 # Skip Table of Contents and Date 298 if header_text in ('Contents',) or header_text.startswith('Date:'): 299 current_h2 = header_text 300 skip_current = True 301 section_lines = [] 302 i += 1 303 continue 304 305 # Check if this section should be skipped 306 if header_text in SKIP_SECTIONS: 307 current_h2 = header_text 308 skip_current = True 309 section_lines = [] 310 i += 1 311 continue 312 313 # Add as Heading 1 (major sections) 314 current_h2 = header_text 315 skip_current = False 316 section_lines = [] 317 doc.add_heading(header_text, level=1) 318 i += 1 319 continue 320 321 # Skip the title line 322 if line.startswith('# ') and i < 3: 323 i += 1 324 continue 325 326 if not skip_current: 327 section_lines.append(line) 328 329 i += 1 330 331 # Process remaining content 332 if current_h2 and not skip_current and section_lines: 333 process_section_content(doc, '\n'.join(section_lines)) 334 335 # Save 336 doc.save(out_path) 337 print(f'Created: {out_path}') 338 339 # Verify 340 verify_doc = Document(out_path) 341 headings = [p.text for p in verify_doc.paragraphs if p.style.name.startswith('Heading')] 342 print(f'Total paragraphs: {len(verify_doc.paragraphs)}') 343 print(f'Total tables: {len(verify_doc.tables)}') 344 print(f'Heading 1 sections:') 345 for h in headings: 346 if any(p.style.name == 'Heading 1' and p.text == h for p in verify_doc.paragraphs): 347 print(f' - {h}') 348 349 350 if __name__ == '__main__': 351 main()