/ conversion-ops / survey_lead_magnet.py
survey_lead_magnet.py
1 #!/usr/bin/env python3 2 """ 3 Survey-to-Lead-Magnet Engine 4 ============================== 5 Takes survey response data (CSV), segments respondents by pain point clusters, 6 ranks segments by size and commercial potential, and auto-generates lead magnet 7 briefs targeting each segment. 8 9 Usage: 10 python survey_lead_magnet.py --csv survey_responses.csv 11 python survey_lead_magnet.py --csv survey.csv --pain-columns "biggest_challenge" "top_frustration" 12 python survey_lead_magnet.py --csv survey.csv --top-segments 5 --json 13 python survey_lead_magnet.py --csv survey.csv --output lead_magnets.json 14 """ 15 16 import argparse 17 import csv 18 import json 19 import os 20 import re 21 import sys 22 from collections import Counter 23 from dataclasses import dataclass, field, asdict 24 from typing import Optional 25 26 import numpy as np 27 import pandas as pd 28 from sklearn.feature_extraction.text import TfidfVectorizer 29 from sklearn.cluster import KMeans 30 from sklearn.metrics import silhouette_score 31 32 # --------------------------------------------------------------------------- 33 # Constants 34 # --------------------------------------------------------------------------- 35 36 # Columns that likely contain pain point / challenge responses 37 PAIN_COLUMN_PATTERNS = re.compile( 38 r"(challenge|pain|frustrat|struggle|problem|difficult|obstacle|" 39 r"barrier|concern|issue|blocker|worry|fear|hard|tough|" 40 r"biggest|main|top|primary|key|major|worst)", 41 re.IGNORECASE, 42 ) 43 44 # Words that signal commercial intent / buying readiness 45 COMMERCIAL_SIGNALS = re.compile( 46 r"\b(budget|cost|price|invest|spend|pay|afford|roi|revenue|" 47 r"software|tool|platform|solution|vendor|agency|consultant|" 48 r"hire|outsource|automate|scale|grow|implement|upgrade|" 49 r"need|want|looking for|searching|evaluating|considering)\b", 50 re.IGNORECASE, 51 ) 52 53 # Lead magnet format heuristics 54 FORMAT_KEYWORDS = { 55 "guide": ["understand", "learn", "how", "why", "strategy", "approach", "framework", "concept", "complex"], 56 "checklist": ["process", "steps", "workflow", "setup", "launch", "implement", "execute", "routine", "daily"], 57 "template": ["create", "write", "build", "design", "plan", "proposal", "email", "message", "document"], 58 "calculator": ["cost", "budget", "roi", "numbers", "forecast", "estimate", "pricing", "revenue", "metrics"], 59 "swipe_file": ["examples", "inspiration", "copy", "ads", "headlines", "subject lines", "creative", "ideas"], 60 } 61 62 # Stopwords for clustering (extend sklearn's default) 63 EXTRA_STOPWORDS = [ 64 "really", "just", "like", "thing", "things", "lot", "also", 65 "get", "getting", "got", "know", "dont", "don't", "can't", 66 "want", "need", "think", "feel", "make", "much", "many", 67 "very", "would", "could", "should", "way", "able", 68 "one", "two", "first", "new", "good", "bad", "hard", 69 "well", "time", "still", "even", "right", "going", 70 ] 71 72 73 # --------------------------------------------------------------------------- 74 # Data Classes 75 # --------------------------------------------------------------------------- 76 77 @dataclass 78 class PainSegment: 79 segment_id: int 80 theme: str 81 top_keywords: list 82 respondent_count: int 83 respondent_pct: float 84 commercial_score: float # 0-100 85 sample_responses: list 86 representative_quotes: list 87 88 89 @dataclass 90 class LeadMagnetBrief: 91 segment_id: int 92 segment_theme: str 93 title: str 94 format: str # guide, checklist, template, calculator, swipe_file 95 hook: str 96 outline: list 97 target_cta: str 98 distribution_channel: str 99 viral_potential: int # 0-100 100 conversion_potential: int # 0-100 101 combined_score: float 102 implementation_notes: str 103 104 105 @dataclass 106 class AnalysisResult: 107 total_respondents: int 108 columns_analyzed: list 109 segments: list 110 lead_magnets: list 111 implementation_roadmap: list 112 113 114 # --------------------------------------------------------------------------- 115 # Data Ingestion 116 # --------------------------------------------------------------------------- 117 118 def load_survey_data(csv_path: str) -> pd.DataFrame: 119 """Load survey CSV. Tries multiple encodings.""" 120 for encoding in ["utf-8", "utf-8-sig", "latin-1", "cp1252"]: 121 try: 122 df = pd.read_csv(csv_path, encoding=encoding) 123 return df 124 except (UnicodeDecodeError, pd.errors.ParserError): 125 continue 126 raise ValueError(f"Could not read CSV file: {csv_path}") 127 128 129 def detect_pain_columns(df: pd.DataFrame) -> list: 130 """Auto-detect columns that likely contain pain point / challenge data.""" 131 pain_cols = [] 132 for col in df.columns: 133 if PAIN_COLUMN_PATTERNS.search(col): 134 pain_cols.append(col) 135 136 # If no pattern matches, look for open-text columns (long average text) 137 if not pain_cols: 138 for col in df.columns: 139 if df[col].dtype == object: 140 avg_len = df[col].dropna().astype(str).str.len().mean() 141 if avg_len > 30: # likely free-text responses 142 pain_cols.append(col) 143 144 return pain_cols 145 146 147 def extract_responses(df: pd.DataFrame, pain_columns: list) -> list: 148 """Extract and combine text responses from pain columns.""" 149 responses = [] 150 for _, row in df.iterrows(): 151 parts = [] 152 for col in pain_columns: 153 val = row.get(col) 154 if pd.notna(val) and str(val).strip(): 155 parts.append(str(val).strip()) 156 combined = " ".join(parts) 157 if combined: 158 responses.append(combined) 159 return responses 160 161 162 # --------------------------------------------------------------------------- 163 # Clustering 164 # --------------------------------------------------------------------------- 165 166 def preprocess_text(text: str) -> str: 167 """Clean and normalize text for clustering.""" 168 text = text.lower() 169 text = re.sub(r"[^a-z\s]", " ", text) 170 text = re.sub(r"\s+", " ", text).strip() 171 return text 172 173 174 def cluster_responses(responses: list, n_clusters: Optional[int] = None) -> tuple: 175 """ 176 Cluster responses using TF-IDF + KMeans. 177 Returns (labels, vectorizer, tfidf_matrix, n_clusters). 178 """ 179 if len(responses) < 5: 180 # Too few responses — treat as single cluster 181 return [0] * len(responses), None, None, 1 182 183 cleaned = [preprocess_text(r) for r in responses] 184 185 # Build TF-IDF matrix 186 stop_words = list(TfidfVectorizer(stop_words="english").get_stop_words()) + EXTRA_STOPWORDS 187 vectorizer = TfidfVectorizer( 188 max_features=500, 189 stop_words=stop_words, 190 min_df=2 if len(responses) > 20 else 1, 191 max_df=0.85, 192 ngram_range=(1, 2), 193 ) 194 195 try: 196 tfidf_matrix = vectorizer.fit_transform(cleaned) 197 except ValueError: 198 # All responses too similar or empty after preprocessing 199 return [0] * len(responses), None, None, 1 200 201 # Auto-determine cluster count if not specified 202 if n_clusters is None: 203 max_k = min(10, len(responses) // 3, tfidf_matrix.shape[0] - 1) 204 max_k = max(2, max_k) 205 206 best_k = 3 207 best_score = -1 208 209 for k in range(2, max_k + 1): 210 try: 211 km = KMeans(n_clusters=k, random_state=42, n_init=10) 212 labels = km.fit_predict(tfidf_matrix) 213 score = silhouette_score(tfidf_matrix, labels) 214 if score > best_score: 215 best_score = score 216 best_k = k 217 except ValueError: 218 continue 219 220 n_clusters = best_k 221 222 km = KMeans(n_clusters=n_clusters, random_state=42, n_init=10) 223 labels = km.fit_predict(tfidf_matrix) 224 225 return labels, vectorizer, tfidf_matrix, n_clusters 226 227 228 def extract_cluster_keywords( 229 vectorizer: TfidfVectorizer, 230 tfidf_matrix, 231 labels: list, 232 cluster_id: int, 233 top_n: int = 8, 234 ) -> list: 235 """Get top keywords for a specific cluster.""" 236 if vectorizer is None: 237 return ["general"] 238 239 mask = np.array(labels) == cluster_id 240 cluster_matrix = tfidf_matrix[mask] 241 242 if cluster_matrix.shape[0] == 0: 243 return [] 244 245 mean_tfidf = cluster_matrix.mean(axis=0).A1 246 feature_names = vectorizer.get_feature_names_out() 247 top_indices = mean_tfidf.argsort()[-top_n:][::-1] 248 249 return [feature_names[i] for i in top_indices if mean_tfidf[i] > 0] 250 251 252 def generate_theme_label(keywords: list) -> str: 253 """Generate a human-readable theme label from top keywords.""" 254 if not keywords: 255 return "General Challenges" 256 257 # Take top 2-3 keywords and create a label 258 top = keywords[:3] 259 # Capitalize and join 260 theme = " & ".join(word.replace("_", " ").title() for word in top) 261 return theme 262 263 264 # --------------------------------------------------------------------------- 265 # Scoring 266 # --------------------------------------------------------------------------- 267 268 def score_commercial_potential(responses: list) -> float: 269 """Score how commercially valuable a segment is (0-100).""" 270 if not responses: 271 return 0 272 273 total_signals = 0 274 for resp in responses: 275 matches = COMMERCIAL_SIGNALS.findall(resp) 276 total_signals += len(matches) 277 278 # Normalize: avg signals per response, scaled to 0-100 279 avg_signals = total_signals / len(responses) 280 score = min(100, avg_signals * 25) # 4+ avg signals = 100 281 return round(score, 1) 282 283 284 def recommend_format(keywords: list, responses: list) -> str: 285 """Recommend the best lead magnet format based on pain cluster.""" 286 combined_text = " ".join(keywords) + " " + " ".join(responses[:10]) 287 combined_lower = combined_text.lower() 288 289 scores = {} 290 for fmt, trigger_words in FORMAT_KEYWORDS.items(): 291 score = sum(1 for word in trigger_words if word in combined_lower) 292 scores[fmt] = score 293 294 best = max(scores, key=scores.get) 295 if scores[best] == 0: 296 return "guide" # default 297 return best 298 299 300 def score_viral_potential(title: str, fmt: str, segment_size_pct: float) -> int: 301 """Score how likely a lead magnet is to be shared (0-100).""" 302 score = 30 # baseline 303 304 # Larger segments = more sharing potential 305 score += min(25, segment_size_pct * 1.5) 306 307 # Templates and checklists are more shareable 308 format_boost = { 309 "template": 15, 310 "checklist": 12, 311 "swipe_file": 18, 312 "calculator": 10, 313 "guide": 5, 314 } 315 score += format_boost.get(fmt, 0) 316 317 # Titles with numbers or specific outcomes 318 if re.search(r"\d+", title): 319 score += 10 320 if re.search(r"(ultimate|complete|definitive|proven|secret)", title, re.IGNORECASE): 321 score += 5 322 323 return min(100, int(score)) 324 325 326 def score_conversion_potential(commercial_score: float, segment_size_pct: float, fmt: str) -> int: 327 """Score how likely a lead magnet is to convert to leads/customers (0-100).""" 328 score = 20 # baseline 329 330 # Commercial intent is the strongest signal 331 score += commercial_score * 0.4 332 333 # Segment size matters but with diminishing returns 334 score += min(15, segment_size_pct * 0.8) 335 336 # Some formats convert better 337 conversion_boost = { 338 "calculator": 15, 339 "template": 12, 340 "checklist": 10, 341 "guide": 5, 342 "swipe_file": 8, 343 } 344 score += conversion_boost.get(fmt, 0) 345 346 return min(100, int(score)) 347 348 349 # --------------------------------------------------------------------------- 350 # Lead Magnet Brief Generator 351 # --------------------------------------------------------------------------- 352 353 FORMAT_LABELS = { 354 "guide": "Comprehensive Guide", 355 "checklist": "Actionable Checklist", 356 "template": "Ready-to-Use Template", 357 "calculator": "Interactive Calculator", 358 "swipe_file": "Swipe File Collection", 359 } 360 361 362 def generate_title(theme: str, fmt: str, keywords: list) -> str: 363 """Generate a lead magnet title.""" 364 templates = { 365 "guide": [ 366 f"The Complete Guide to {theme}", 367 f"How to Solve {theme}: A Step-by-Step Guide", 368 f"{theme} Mastery: Everything You Need to Know", 369 ], 370 "checklist": [ 371 f"The {theme} Checklist: {min(15, 5 + len(keywords))} Steps to Success", 372 f"Your {theme} Pre-Launch Checklist", 373 f"{theme}: The Essential Checklist", 374 ], 375 "template": [ 376 f"{theme} Template Pack: Copy, Customize, Launch", 377 f"The {theme} Template That Saves 10+ Hours/Week", 378 f"Plug-and-Play {theme} Templates", 379 ], 380 "calculator": [ 381 f"{theme} Calculator: Know Your Numbers in 5 Minutes", 382 f"The {theme} ROI Calculator", 383 f"Calculate Your {theme} Score", 384 ], 385 "swipe_file": [ 386 f"50+ {theme} Examples That Actually Work", 387 f"The {theme} Swipe File: Steal These Ideas", 388 f"Best-in-Class {theme} Examples (Curated Collection)", 389 ], 390 } 391 392 options = templates.get(fmt, templates["guide"]) 393 return options[0] 394 395 396 def generate_hook(theme: str, keywords: list, sample_responses: list) -> str: 397 """Generate a compelling hook for the lead magnet.""" 398 # Extract a pain point from sample responses for the hook 399 pain_phrase = "" 400 if sample_responses: 401 # Find the most representative short phrase 402 for resp in sample_responses[:5]: 403 if 20 < len(resp) < 150: 404 pain_phrase = resp 405 break 406 407 if pain_phrase: 408 return ( 409 f"If you've ever thought \"{pain_phrase[:80]}{'...' if len(pain_phrase) > 80 else ''}\" " 410 f"— this is for you. We analyzed hundreds of responses and found the exact " 411 f"patterns that separate those who overcome {keywords[0] if keywords else 'this challenge'} " 412 f"from those who stay stuck." 413 ) 414 else: 415 return ( 416 f"Most teams waste months trying to figure out {theme.lower()} on their own. " 417 f"This resource distills proven strategies into actionable steps you can " 418 f"implement today." 419 ) 420 421 422 def generate_outline(theme: str, fmt: str, keywords: list) -> list: 423 """Generate a content outline for the lead magnet.""" 424 sections = [f"Section 1: Why {theme} Matters Now (The Landscape)"] 425 426 if fmt == "guide": 427 sections.extend([ 428 f"Section 2: The Core Framework for {keywords[0].title() if keywords else 'Success'}", 429 f"Section 3: Common Mistakes (And How to Avoid Them)", 430 f"Section 4: Step-by-Step Implementation Plan", 431 f"Section 5: Tools & Resources You'll Need", 432 f"Section 6: Case Studies — What Good Looks Like", 433 f"Section 7: Quick-Start Action Plan", 434 ]) 435 elif fmt == "checklist": 436 sections.extend([ 437 f"Section 2: Pre-Work — What to Have Ready", 438 f"Section 3: Phase 1 — Foundation ({keywords[0].title() if keywords else 'Setup'})", 439 f"Section 4: Phase 2 — Execution ({keywords[1].title() if len(keywords) > 1 else 'Build'})", 440 f"Section 5: Phase 3 — Optimization & Measurement", 441 f"Section 6: Common Gotchas to Watch For", 442 ]) 443 elif fmt == "template": 444 sections.extend([ 445 f"Section 2: How to Use This Template", 446 f"Section 3: Template A — {keywords[0].title() if keywords else 'Standard'} Version", 447 f"Section 4: Template B — Advanced Version", 448 f"Section 5: Customization Guide", 449 f"Section 6: Real Examples (Filled-In Templates)", 450 ]) 451 elif fmt == "calculator": 452 sections.extend([ 453 f"Section 2: Key Metrics You Need to Track", 454 f"Section 3: Input Your Numbers", 455 f"Section 4: Understanding Your Results", 456 f"Section 5: Benchmarks — How You Compare", 457 f"Section 6: Action Steps Based on Your Score", 458 ]) 459 elif fmt == "swipe_file": 460 sections.extend([ 461 f"Section 2: What Makes These Examples Work", 462 f"Section 3: Category A — {keywords[0].title() if keywords else 'Top Performers'}", 463 f"Section 4: Category B — {keywords[1].title() if len(keywords) > 1 else 'Rising Stars'}", 464 f"Section 5: How to Adapt These for Your Business", 465 f"Section 6: Blank Templates to Get Started", 466 ]) 467 468 return sections 469 470 471 def generate_cta(fmt: str, theme: str) -> str: 472 """Generate the target CTA for the lead magnet landing page.""" 473 ctas = { 474 "guide": f"Download the Free {theme} Guide", 475 "checklist": f"Get Your Free {theme} Checklist", 476 "template": f"Grab the Free {theme} Templates", 477 "calculator": f"Try the Free {theme} Calculator", 478 "swipe_file": f"Download {theme} Swipe File", 479 } 480 return ctas.get(fmt, f"Get Free {theme} Resource") 481 482 483 def recommend_distribution(fmt: str, segment_size_pct: float) -> str: 484 """Recommend primary distribution channel.""" 485 if segment_size_pct > 25: 486 return "Homepage popup + dedicated landing page + paid social" 487 elif segment_size_pct > 15: 488 return "Blog content upgrade + email nurture sequence" 489 elif segment_size_pct > 8: 490 return "Targeted blog posts + LinkedIn organic" 491 else: 492 return "Niche community posts + targeted email segment" 493 494 495 def build_lead_magnet_brief(segment: PainSegment) -> LeadMagnetBrief: 496 """Generate a complete lead magnet brief for a pain segment.""" 497 fmt = recommend_format(segment.top_keywords, segment.sample_responses) 498 title = generate_title(segment.theme, fmt, segment.top_keywords) 499 hook = generate_hook(segment.theme, segment.top_keywords, segment.sample_responses) 500 outline = generate_outline(segment.theme, fmt, segment.top_keywords) 501 cta = generate_cta(fmt, segment.theme) 502 channel = recommend_distribution(fmt, segment.respondent_pct) 503 504 viral = score_viral_potential(title, fmt, segment.respondent_pct) 505 conversion = score_conversion_potential( 506 segment.commercial_score, segment.respondent_pct, fmt, 507 ) 508 combined = (viral * 0.4 + conversion * 0.6) 509 510 impl_notes = ( 511 f"Target segment: {segment.respondent_count} respondents ({segment.respondent_pct:.1f}% of total). " 512 f"Commercial intent score: {segment.commercial_score}/100. " 513 f"Recommended format: {FORMAT_LABELS.get(fmt, fmt)}. " 514 f"Estimated production time: {'1-2 days' if fmt in ('checklist', 'template') else '3-5 days'}." 515 ) 516 517 return LeadMagnetBrief( 518 segment_id=segment.segment_id, 519 segment_theme=segment.theme, 520 title=title, 521 format=FORMAT_LABELS.get(fmt, fmt), 522 hook=hook, 523 outline=outline, 524 target_cta=cta, 525 distribution_channel=channel, 526 viral_potential=viral, 527 conversion_potential=conversion, 528 combined_score=round(combined, 1), 529 implementation_notes=impl_notes, 530 ) 531 532 533 # --------------------------------------------------------------------------- 534 # Analysis Pipeline 535 # --------------------------------------------------------------------------- 536 537 def analyze_survey( 538 csv_path: str, 539 pain_columns: Optional[list] = None, 540 top_segments: int = 5, 541 ) -> AnalysisResult: 542 """Full analysis pipeline: load → cluster → score → generate briefs.""" 543 544 # Load data 545 df = load_survey_data(csv_path) 546 total_respondents = len(df) 547 548 # Detect or use specified pain columns 549 if pain_columns: 550 # Validate columns exist 551 missing = [c for c in pain_columns if c not in df.columns] 552 if missing: 553 # Try fuzzy match 554 actual_cols = [] 555 for pc in pain_columns: 556 matches = [c for c in df.columns if pc.lower() in c.lower()] 557 if matches: 558 actual_cols.append(matches[0]) 559 else: 560 raise ValueError(f"Column not found: '{pc}'. Available: {list(df.columns)}") 561 pain_columns = actual_cols 562 else: 563 pain_columns = detect_pain_columns(df) 564 if not pain_columns: 565 raise ValueError( 566 "Could not auto-detect pain point columns. " 567 "Use --pain-columns to specify which columns contain challenge/pain responses.\n" 568 f"Available columns: {list(df.columns)}" 569 ) 570 571 print(f"Analyzing columns: {pain_columns}", file=sys.stderr) 572 573 # Extract responses 574 responses = extract_responses(df, pain_columns) 575 if not responses: 576 raise ValueError("No non-empty responses found in the specified columns") 577 578 print(f"Found {len(responses)} responses from {total_respondents} respondents", file=sys.stderr) 579 580 # Cluster 581 labels, vectorizer, tfidf_matrix, n_clusters = cluster_responses( 582 responses, n_clusters=min(top_segments, len(responses) // 2) if len(responses) < 30 else None, 583 ) 584 585 # Build segments 586 segments = [] 587 for cluster_id in range(n_clusters): 588 mask = [i for i, l in enumerate(labels) if l == cluster_id] 589 cluster_responses_list = [responses[i] for i in mask] 590 591 keywords = extract_cluster_keywords(vectorizer, tfidf_matrix, labels, cluster_id) 592 theme = generate_theme_label(keywords) 593 commercial = score_commercial_potential(cluster_responses_list) 594 595 # Pick representative quotes (medium length, most representative) 596 quotes = sorted( 597 cluster_responses_list, 598 key=lambda r: abs(len(r) - 80), # prefer ~80 char responses 599 )[:3] 600 601 segment = PainSegment( 602 segment_id=cluster_id + 1, 603 theme=theme, 604 top_keywords=keywords, 605 respondent_count=len(mask), 606 respondent_pct=round(len(mask) / len(responses) * 100, 1), 607 commercial_score=commercial, 608 sample_responses=cluster_responses_list[:5], 609 representative_quotes=quotes, 610 ) 611 segments.append(segment) 612 613 # Sort by size × commercial score 614 segments.sort(key=lambda s: s.respondent_count * (s.commercial_score + 10), reverse=True) 615 616 # Limit to top N 617 segments = segments[:top_segments] 618 619 # Re-number after sorting 620 for i, seg in enumerate(segments): 621 seg.segment_id = i + 1 622 623 # Generate lead magnet briefs 624 lead_magnets = [] 625 for seg in segments: 626 brief = build_lead_magnet_brief(seg) 627 lead_magnets.append(brief) 628 629 # Sort briefs by combined score 630 lead_magnets.sort(key=lambda b: b.combined_score, reverse=True) 631 632 # Implementation roadmap 633 roadmap = [] 634 for i, lm in enumerate(lead_magnets, 1): 635 roadmap.append({ 636 "priority": i, 637 "title": lm.title, 638 "format": lm.format, 639 "segment_size": f"{lm.segment_theme} ({segments[lm.segment_id - 1].respondent_pct:.1f}%)", 640 "combined_score": lm.combined_score, 641 "estimated_effort": "1-2 days" if "Checklist" in lm.format or "Template" in lm.format else "3-5 days", 642 }) 643 644 return AnalysisResult( 645 total_respondents=total_respondents, 646 columns_analyzed=pain_columns, 647 segments=[asdict(s) for s in segments], 648 lead_magnets=[asdict(lm) for lm in lead_magnets], 649 implementation_roadmap=roadmap, 650 ) 651 652 653 # --------------------------------------------------------------------------- 654 # Output Formatters 655 # --------------------------------------------------------------------------- 656 657 def format_analysis_text(result: AnalysisResult) -> str: 658 """Format analysis as human-readable text.""" 659 lines = [] 660 lines.append("=" * 70) 661 lines.append(" SURVEY-TO-LEAD-MAGNET ANALYSIS") 662 lines.append("=" * 70) 663 lines.append("") 664 lines.append(f" Total respondents: {result.total_respondents}") 665 lines.append(f" Columns analyzed: {', '.join(result.columns_analyzed)}") 666 lines.append(f" Segments identified: {len(result.segments)}") 667 lines.append("") 668 669 # Segments 670 lines.append("-" * 70) 671 lines.append(" PAIN POINT SEGMENTS (ranked by opportunity)") 672 lines.append("-" * 70) 673 674 for seg in result.segments: 675 lines.append("") 676 lines.append(f" Segment #{seg['segment_id']}: {seg['theme']}") 677 lines.append(f" Respondents: {seg['respondent_count']} ({seg['respondent_pct']}%)") 678 lines.append(f" Commercial Score: {seg['commercial_score']}/100") 679 lines.append(f" Top Keywords: {', '.join(seg['top_keywords'][:5])}") 680 lines.append("") 681 lines.append(" Representative Quotes:") 682 for q in seg["representative_quotes"]: 683 lines.append(f" \"{q[:100]}{'...' if len(q) > 100 else ''}\"") 684 lines.append("") 685 686 # Lead Magnet Briefs 687 lines.append("=" * 70) 688 lines.append(" LEAD MAGNET BRIEFS (ranked by combined score)") 689 lines.append("=" * 70) 690 691 for lm in result.lead_magnets: 692 lines.append("") 693 lines.append(f" 📦 {lm['title']}") 694 lines.append(f" Format: {lm['format']}") 695 lines.append(f" Segment: {lm['segment_theme']}") 696 lines.append(f" Viral Potential: {lm['viral_potential']}/100 | Conversion Potential: {lm['conversion_potential']}/100") 697 lines.append(f" Combined Score: {lm['combined_score']}/100") 698 lines.append("") 699 lines.append(f" Hook: {lm['hook'][:200]}{'...' if len(lm['hook']) > 200 else ''}") 700 lines.append("") 701 lines.append(" Outline:") 702 for section in lm["outline"]: 703 lines.append(f" • {section}") 704 lines.append("") 705 lines.append(f" CTA: {lm['target_cta']}") 706 lines.append(f" Distribution: {lm['distribution_channel']}") 707 lines.append(f" Notes: {lm['implementation_notes']}") 708 lines.append("") 709 lines.append(" " + "-" * 50) 710 711 # Roadmap 712 lines.append("") 713 lines.append("=" * 70) 714 lines.append(" IMPLEMENTATION ROADMAP") 715 lines.append("=" * 70) 716 lines.append("") 717 718 for item in result.implementation_roadmap: 719 lines.append(f" #{item['priority']} [{item['estimated_effort']}] {item['title']}") 720 lines.append(f" Format: {item['format']} | Segment: {item['segment_size']} | Score: {item['combined_score']}") 721 lines.append("") 722 723 lines.append("=" * 70) 724 return "\n".join(lines) 725 726 727 # --------------------------------------------------------------------------- 728 # Main 729 # --------------------------------------------------------------------------- 730 731 def main(): 732 parser = argparse.ArgumentParser( 733 description="Survey-to-Lead-Magnet Engine — Turn survey data into targeted lead magnet briefs", 734 formatter_class=argparse.RawDescriptionHelpFormatter, 735 epilog=""" 736 Examples: 737 python survey_lead_magnet.py --csv survey_responses.csv 738 python survey_lead_magnet.py --csv survey.csv --pain-columns "biggest_challenge" "frustrations" 739 python survey_lead_magnet.py --csv survey.csv --top-segments 3 --json --output briefs.json 740 741 CSV Format: 742 Questions as column headers, one respondent per row. 743 Works with exports from Typeform, Google Forms, SurveyMonkey, etc. 744 """, 745 ) 746 parser.add_argument("--csv", required=True, help="Path to survey responses CSV") 747 parser.add_argument( 748 "--pain-columns", nargs="+", 749 help="Column names containing pain point / challenge responses (auto-detected if not specified)", 750 ) 751 parser.add_argument( 752 "--top-segments", type=int, default=5, 753 help="Number of top segments to analyze (default: 5)", 754 ) 755 parser.add_argument("--json", action="store_true", help="Output as JSON") 756 parser.add_argument("--output", help="Save output to file") 757 758 args = parser.parse_args() 759 760 if not os.path.exists(args.csv): 761 print(f"Error: File not found: {args.csv}", file=sys.stderr) 762 sys.exit(1) 763 764 try: 765 result = analyze_survey( 766 csv_path=args.csv, 767 pain_columns=args.pain_columns, 768 top_segments=args.top_segments, 769 ) 770 except ValueError as e: 771 print(f"Error: {e}", file=sys.stderr) 772 sys.exit(1) 773 774 # Output 775 if args.json: 776 output = json.dumps(asdict(result), indent=2, default=str) 777 if args.output: 778 with open(args.output, "w") as f: 779 f.write(output) 780 print(f"Output saved to {args.output}", file=sys.stderr) 781 else: 782 print(output) 783 else: 784 text_output = format_analysis_text(result) 785 if args.output: 786 with open(args.output, "w") as f: 787 f.write(text_output) 788 print(f"Output saved to {args.output}", file=sys.stderr) 789 else: 790 print(text_output) 791 792 793 if __name__ == "__main__": 794 main()