Cradicle Explorer

/ conversion-ops / survey_lead_magnet.py
survey_lead_magnet.py
  1  #!/usr/bin/env python3
  2  """
  3  Survey-to-Lead-Magnet Engine
  4  ==============================
  5  Takes survey response data (CSV), segments respondents by pain point clusters,
  6  ranks segments by size and commercial potential, and auto-generates lead magnet
  7  briefs targeting each segment.
  8  
  9  Usage:
 10      python survey_lead_magnet.py --csv survey_responses.csv
 11      python survey_lead_magnet.py --csv survey.csv --pain-columns "biggest_challenge" "top_frustration"
 12      python survey_lead_magnet.py --csv survey.csv --top-segments 5 --json
 13      python survey_lead_magnet.py --csv survey.csv --output lead_magnets.json
 14  """
 15  
 16  import argparse
 17  import csv
 18  import json
 19  import os
 20  import re
 21  import sys
 22  from collections import Counter
 23  from dataclasses import dataclass, field, asdict
 24  from typing import Optional
 25  
 26  import numpy as np
 27  import pandas as pd
 28  from sklearn.feature_extraction.text import TfidfVectorizer
 29  from sklearn.cluster import KMeans
 30  from sklearn.metrics import silhouette_score
 31  
 32  # ---------------------------------------------------------------------------
 33  # Constants
 34  # ---------------------------------------------------------------------------
 35  
 36  # Columns that likely contain pain point / challenge responses
 37  PAIN_COLUMN_PATTERNS = re.compile(
 38      r"(challenge|pain|frustrat|struggle|problem|difficult|obstacle|"
 39      r"barrier|concern|issue|blocker|worry|fear|hard|tough|"
 40      r"biggest|main|top|primary|key|major|worst)",
 41      re.IGNORECASE,
 42  )
 43  
 44  # Words that signal commercial intent / buying readiness
 45  COMMERCIAL_SIGNALS = re.compile(
 46      r"\b(budget|cost|price|invest|spend|pay|afford|roi|revenue|"
 47      r"software|tool|platform|solution|vendor|agency|consultant|"
 48      r"hire|outsource|automate|scale|grow|implement|upgrade|"
 49      r"need|want|looking for|searching|evaluating|considering)\b",
 50      re.IGNORECASE,
 51  )
 52  
 53  # Lead magnet format heuristics
 54  FORMAT_KEYWORDS = {
 55      "guide": ["understand", "learn", "how", "why", "strategy", "approach", "framework", "concept", "complex"],
 56      "checklist": ["process", "steps", "workflow", "setup", "launch", "implement", "execute", "routine", "daily"],
 57      "template": ["create", "write", "build", "design", "plan", "proposal", "email", "message", "document"],
 58      "calculator": ["cost", "budget", "roi", "numbers", "forecast", "estimate", "pricing", "revenue", "metrics"],
 59      "swipe_file": ["examples", "inspiration", "copy", "ads", "headlines", "subject lines", "creative", "ideas"],
 60  }
 61  
 62  # Stopwords for clustering (extend sklearn's default)
 63  EXTRA_STOPWORDS = [
 64      "really", "just", "like", "thing", "things", "lot", "also",
 65      "get", "getting", "got", "know", "dont", "don't", "can't",
 66      "want", "need", "think", "feel", "make", "much", "many",
 67      "very", "would", "could", "should", "way", "able",
 68      "one", "two", "first", "new", "good", "bad", "hard",
 69      "well", "time", "still", "even", "right", "going",
 70  ]
 71  
 72  
 73  # ---------------------------------------------------------------------------
 74  # Data Classes
 75  # ---------------------------------------------------------------------------
 76  
 77  @dataclass
 78  class PainSegment:
 79      segment_id: int
 80      theme: str
 81      top_keywords: list
 82      respondent_count: int
 83      respondent_pct: float
 84      commercial_score: float  # 0-100
 85      sample_responses: list
 86      representative_quotes: list
 87  
 88  
 89  @dataclass
 90  class LeadMagnetBrief:
 91      segment_id: int
 92      segment_theme: str
 93      title: str
 94      format: str  # guide, checklist, template, calculator, swipe_file
 95      hook: str
 96      outline: list
 97      target_cta: str
 98      distribution_channel: str
 99      viral_potential: int  # 0-100
100      conversion_potential: int  # 0-100
101      combined_score: float
102      implementation_notes: str
103  
104  
105  @dataclass
106  class AnalysisResult:
107      total_respondents: int
108      columns_analyzed: list
109      segments: list
110      lead_magnets: list
111      implementation_roadmap: list
112  
113  
114  # ---------------------------------------------------------------------------
115  # Data Ingestion
116  # ---------------------------------------------------------------------------
117  
118  def load_survey_data(csv_path: str) -> pd.DataFrame:
119      """Load survey CSV. Tries multiple encodings."""
120      for encoding in ["utf-8", "utf-8-sig", "latin-1", "cp1252"]:
121          try:
122              df = pd.read_csv(csv_path, encoding=encoding)
123              return df
124          except (UnicodeDecodeError, pd.errors.ParserError):
125              continue
126      raise ValueError(f"Could not read CSV file: {csv_path}")
127  
128  
129  def detect_pain_columns(df: pd.DataFrame) -> list:
130      """Auto-detect columns that likely contain pain point / challenge data."""
131      pain_cols = []
132      for col in df.columns:
133          if PAIN_COLUMN_PATTERNS.search(col):
134              pain_cols.append(col)
135  
136      # If no pattern matches, look for open-text columns (long average text)
137      if not pain_cols:
138          for col in df.columns:
139              if df[col].dtype == object:
140                  avg_len = df[col].dropna().astype(str).str.len().mean()
141                  if avg_len > 30:  # likely free-text responses
142                      pain_cols.append(col)
143  
144      return pain_cols
145  
146  
147  def extract_responses(df: pd.DataFrame, pain_columns: list) -> list:
148      """Extract and combine text responses from pain columns."""
149      responses = []
150      for _, row in df.iterrows():
151          parts = []
152          for col in pain_columns:
153              val = row.get(col)
154              if pd.notna(val) and str(val).strip():
155                  parts.append(str(val).strip())
156          combined = " ".join(parts)
157          if combined:
158              responses.append(combined)
159      return responses
160  
161  
162  # ---------------------------------------------------------------------------
163  # Clustering
164  # ---------------------------------------------------------------------------
165  
166  def preprocess_text(text: str) -> str:
167      """Clean and normalize text for clustering."""
168      text = text.lower()
169      text = re.sub(r"[^a-z\s]", " ", text)
170      text = re.sub(r"\s+", " ", text).strip()
171      return text
172  
173  
174  def cluster_responses(responses: list, n_clusters: Optional[int] = None) -> tuple:
175      """
176      Cluster responses using TF-IDF + KMeans.
177      Returns (labels, vectorizer, tfidf_matrix, n_clusters).
178      """
179      if len(responses) < 5:
180          # Too few responses — treat as single cluster
181          return [0] * len(responses), None, None, 1
182  
183      cleaned = [preprocess_text(r) for r in responses]
184  
185      # Build TF-IDF matrix
186      stop_words = list(TfidfVectorizer(stop_words="english").get_stop_words()) + EXTRA_STOPWORDS
187      vectorizer = TfidfVectorizer(
188          max_features=500,
189          stop_words=stop_words,
190          min_df=2 if len(responses) > 20 else 1,
191          max_df=0.85,
192          ngram_range=(1, 2),
193      )
194  
195      try:
196          tfidf_matrix = vectorizer.fit_transform(cleaned)
197      except ValueError:
198          # All responses too similar or empty after preprocessing
199          return [0] * len(responses), None, None, 1
200  
201      # Auto-determine cluster count if not specified
202      if n_clusters is None:
203          max_k = min(10, len(responses) // 3, tfidf_matrix.shape[0] - 1)
204          max_k = max(2, max_k)
205  
206          best_k = 3
207          best_score = -1
208  
209          for k in range(2, max_k + 1):
210              try:
211                  km = KMeans(n_clusters=k, random_state=42, n_init=10)
212                  labels = km.fit_predict(tfidf_matrix)
213                  score = silhouette_score(tfidf_matrix, labels)
214                  if score > best_score:
215                      best_score = score
216                      best_k = k
217              except ValueError:
218                  continue
219  
220          n_clusters = best_k
221  
222      km = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
223      labels = km.fit_predict(tfidf_matrix)
224  
225      return labels, vectorizer, tfidf_matrix, n_clusters
226  
227  
228  def extract_cluster_keywords(
229      vectorizer: TfidfVectorizer,
230      tfidf_matrix,
231      labels: list,
232      cluster_id: int,
233      top_n: int = 8,
234  ) -> list:
235      """Get top keywords for a specific cluster."""
236      if vectorizer is None:
237          return ["general"]
238  
239      mask = np.array(labels) == cluster_id
240      cluster_matrix = tfidf_matrix[mask]
241  
242      if cluster_matrix.shape[0] == 0:
243          return []
244  
245      mean_tfidf = cluster_matrix.mean(axis=0).A1
246      feature_names = vectorizer.get_feature_names_out()
247      top_indices = mean_tfidf.argsort()[-top_n:][::-1]
248  
249      return [feature_names[i] for i in top_indices if mean_tfidf[i] > 0]
250  
251  
252  def generate_theme_label(keywords: list) -> str:
253      """Generate a human-readable theme label from top keywords."""
254      if not keywords:
255          return "General Challenges"
256  
257      # Take top 2-3 keywords and create a label
258      top = keywords[:3]
259      # Capitalize and join
260      theme = " & ".join(word.replace("_", " ").title() for word in top)
261      return theme
262  
263  
264  # ---------------------------------------------------------------------------
265  # Scoring
266  # ---------------------------------------------------------------------------
267  
268  def score_commercial_potential(responses: list) -> float:
269      """Score how commercially valuable a segment is (0-100)."""
270      if not responses:
271          return 0
272  
273      total_signals = 0
274      for resp in responses:
275          matches = COMMERCIAL_SIGNALS.findall(resp)
276          total_signals += len(matches)
277  
278      # Normalize: avg signals per response, scaled to 0-100
279      avg_signals = total_signals / len(responses)
280      score = min(100, avg_signals * 25)  # 4+ avg signals = 100
281      return round(score, 1)
282  
283  
284  def recommend_format(keywords: list, responses: list) -> str:
285      """Recommend the best lead magnet format based on pain cluster."""
286      combined_text = " ".join(keywords) + " " + " ".join(responses[:10])
287      combined_lower = combined_text.lower()
288  
289      scores = {}
290      for fmt, trigger_words in FORMAT_KEYWORDS.items():
291          score = sum(1 for word in trigger_words if word in combined_lower)
292          scores[fmt] = score
293  
294      best = max(scores, key=scores.get)
295      if scores[best] == 0:
296          return "guide"  # default
297      return best
298  
299  
300  def score_viral_potential(title: str, fmt: str, segment_size_pct: float) -> int:
301      """Score how likely a lead magnet is to be shared (0-100)."""
302      score = 30  # baseline
303  
304      # Larger segments = more sharing potential
305      score += min(25, segment_size_pct * 1.5)
306  
307      # Templates and checklists are more shareable
308      format_boost = {
309          "template": 15,
310          "checklist": 12,
311          "swipe_file": 18,
312          "calculator": 10,
313          "guide": 5,
314      }
315      score += format_boost.get(fmt, 0)
316  
317      # Titles with numbers or specific outcomes
318      if re.search(r"\d+", title):
319          score += 10
320      if re.search(r"(ultimate|complete|definitive|proven|secret)", title, re.IGNORECASE):
321          score += 5
322  
323      return min(100, int(score))
324  
325  
326  def score_conversion_potential(commercial_score: float, segment_size_pct: float, fmt: str) -> int:
327      """Score how likely a lead magnet is to convert to leads/customers (0-100)."""
328      score = 20  # baseline
329  
330      # Commercial intent is the strongest signal
331      score += commercial_score * 0.4
332  
333      # Segment size matters but with diminishing returns
334      score += min(15, segment_size_pct * 0.8)
335  
336      # Some formats convert better
337      conversion_boost = {
338          "calculator": 15,
339          "template": 12,
340          "checklist": 10,
341          "guide": 5,
342          "swipe_file": 8,
343      }
344      score += conversion_boost.get(fmt, 0)
345  
346      return min(100, int(score))
347  
348  
349  # ---------------------------------------------------------------------------
350  # Lead Magnet Brief Generator
351  # ---------------------------------------------------------------------------
352  
353  FORMAT_LABELS = {
354      "guide": "Comprehensive Guide",
355      "checklist": "Actionable Checklist",
356      "template": "Ready-to-Use Template",
357      "calculator": "Interactive Calculator",
358      "swipe_file": "Swipe File Collection",
359  }
360  
361  
362  def generate_title(theme: str, fmt: str, keywords: list) -> str:
363      """Generate a lead magnet title."""
364      templates = {
365          "guide": [
366              f"The Complete Guide to {theme}",
367              f"How to Solve {theme}: A Step-by-Step Guide",
368              f"{theme} Mastery: Everything You Need to Know",
369          ],
370          "checklist": [
371              f"The {theme} Checklist: {min(15, 5 + len(keywords))} Steps to Success",
372              f"Your {theme} Pre-Launch Checklist",
373              f"{theme}: The Essential Checklist",
374          ],
375          "template": [
376              f"{theme} Template Pack: Copy, Customize, Launch",
377              f"The {theme} Template That Saves 10+ Hours/Week",
378              f"Plug-and-Play {theme} Templates",
379          ],
380          "calculator": [
381              f"{theme} Calculator: Know Your Numbers in 5 Minutes",
382              f"The {theme} ROI Calculator",
383              f"Calculate Your {theme} Score",
384          ],
385          "swipe_file": [
386              f"50+ {theme} Examples That Actually Work",
387              f"The {theme} Swipe File: Steal These Ideas",
388              f"Best-in-Class {theme} Examples (Curated Collection)",
389          ],
390      }
391  
392      options = templates.get(fmt, templates["guide"])
393      return options[0]
394  
395  
396  def generate_hook(theme: str, keywords: list, sample_responses: list) -> str:
397      """Generate a compelling hook for the lead magnet."""
398      # Extract a pain point from sample responses for the hook
399      pain_phrase = ""
400      if sample_responses:
401          # Find the most representative short phrase
402          for resp in sample_responses[:5]:
403              if 20 < len(resp) < 150:
404                  pain_phrase = resp
405                  break
406  
407      if pain_phrase:
408          return (
409              f"If you've ever thought \"{pain_phrase[:80]}{'...' if len(pain_phrase) > 80 else ''}\" "
410              f"— this is for you. We analyzed hundreds of responses and found the exact "
411              f"patterns that separate those who overcome {keywords[0] if keywords else 'this challenge'} "
412              f"from those who stay stuck."
413          )
414      else:
415          return (
416              f"Most teams waste months trying to figure out {theme.lower()} on their own. "
417              f"This resource distills proven strategies into actionable steps you can "
418              f"implement today."
419          )
420  
421  
422  def generate_outline(theme: str, fmt: str, keywords: list) -> list:
423      """Generate a content outline for the lead magnet."""
424      sections = [f"Section 1: Why {theme} Matters Now (The Landscape)"]
425  
426      if fmt == "guide":
427          sections.extend([
428              f"Section 2: The Core Framework for {keywords[0].title() if keywords else 'Success'}",
429              f"Section 3: Common Mistakes (And How to Avoid Them)",
430              f"Section 4: Step-by-Step Implementation Plan",
431              f"Section 5: Tools & Resources You'll Need",
432              f"Section 6: Case Studies — What Good Looks Like",
433              f"Section 7: Quick-Start Action Plan",
434          ])
435      elif fmt == "checklist":
436          sections.extend([
437              f"Section 2: Pre-Work — What to Have Ready",
438              f"Section 3: Phase 1 — Foundation ({keywords[0].title() if keywords else 'Setup'})",
439              f"Section 4: Phase 2 — Execution ({keywords[1].title() if len(keywords) > 1 else 'Build'})",
440              f"Section 5: Phase 3 — Optimization & Measurement",
441              f"Section 6: Common Gotchas to Watch For",
442          ])
443      elif fmt == "template":
444          sections.extend([
445              f"Section 2: How to Use This Template",
446              f"Section 3: Template A — {keywords[0].title() if keywords else 'Standard'} Version",
447              f"Section 4: Template B — Advanced Version",
448              f"Section 5: Customization Guide",
449              f"Section 6: Real Examples (Filled-In Templates)",
450          ])
451      elif fmt == "calculator":
452          sections.extend([
453              f"Section 2: Key Metrics You Need to Track",
454              f"Section 3: Input Your Numbers",
455              f"Section 4: Understanding Your Results",
456              f"Section 5: Benchmarks — How You Compare",
457              f"Section 6: Action Steps Based on Your Score",
458          ])
459      elif fmt == "swipe_file":
460          sections.extend([
461              f"Section 2: What Makes These Examples Work",
462              f"Section 3: Category A — {keywords[0].title() if keywords else 'Top Performers'}",
463              f"Section 4: Category B — {keywords[1].title() if len(keywords) > 1 else 'Rising Stars'}",
464              f"Section 5: How to Adapt These for Your Business",
465              f"Section 6: Blank Templates to Get Started",
466          ])
467  
468      return sections
469  
470  
471  def generate_cta(fmt: str, theme: str) -> str:
472      """Generate the target CTA for the lead magnet landing page."""
473      ctas = {
474          "guide": f"Download the Free {theme} Guide",
475          "checklist": f"Get Your Free {theme} Checklist",
476          "template": f"Grab the Free {theme} Templates",
477          "calculator": f"Try the Free {theme} Calculator",
478          "swipe_file": f"Download {theme} Swipe File",
479      }
480      return ctas.get(fmt, f"Get Free {theme} Resource")
481  
482  
483  def recommend_distribution(fmt: str, segment_size_pct: float) -> str:
484      """Recommend primary distribution channel."""
485      if segment_size_pct > 25:
486          return "Homepage popup + dedicated landing page + paid social"
487      elif segment_size_pct > 15:
488          return "Blog content upgrade + email nurture sequence"
489      elif segment_size_pct > 8:
490          return "Targeted blog posts + LinkedIn organic"
491      else:
492          return "Niche community posts + targeted email segment"
493  
494  
495  def build_lead_magnet_brief(segment: PainSegment) -> LeadMagnetBrief:
496      """Generate a complete lead magnet brief for a pain segment."""
497      fmt = recommend_format(segment.top_keywords, segment.sample_responses)
498      title = generate_title(segment.theme, fmt, segment.top_keywords)
499      hook = generate_hook(segment.theme, segment.top_keywords, segment.sample_responses)
500      outline = generate_outline(segment.theme, fmt, segment.top_keywords)
501      cta = generate_cta(fmt, segment.theme)
502      channel = recommend_distribution(fmt, segment.respondent_pct)
503  
504      viral = score_viral_potential(title, fmt, segment.respondent_pct)
505      conversion = score_conversion_potential(
506          segment.commercial_score, segment.respondent_pct, fmt,
507      )
508      combined = (viral * 0.4 + conversion * 0.6)
509  
510      impl_notes = (
511          f"Target segment: {segment.respondent_count} respondents ({segment.respondent_pct:.1f}% of total). "
512          f"Commercial intent score: {segment.commercial_score}/100. "
513          f"Recommended format: {FORMAT_LABELS.get(fmt, fmt)}. "
514          f"Estimated production time: {'1-2 days' if fmt in ('checklist', 'template') else '3-5 days'}."
515      )
516  
517      return LeadMagnetBrief(
518          segment_id=segment.segment_id,
519          segment_theme=segment.theme,
520          title=title,
521          format=FORMAT_LABELS.get(fmt, fmt),
522          hook=hook,
523          outline=outline,
524          target_cta=cta,
525          distribution_channel=channel,
526          viral_potential=viral,
527          conversion_potential=conversion,
528          combined_score=round(combined, 1),
529          implementation_notes=impl_notes,
530      )
531  
532  
533  # ---------------------------------------------------------------------------
534  # Analysis Pipeline
535  # ---------------------------------------------------------------------------
536  
537  def analyze_survey(
538      csv_path: str,
539      pain_columns: Optional[list] = None,
540      top_segments: int = 5,
541  ) -> AnalysisResult:
542      """Full analysis pipeline: load → cluster → score → generate briefs."""
543  
544      # Load data
545      df = load_survey_data(csv_path)
546      total_respondents = len(df)
547  
548      # Detect or use specified pain columns
549      if pain_columns:
550          # Validate columns exist
551          missing = [c for c in pain_columns if c not in df.columns]
552          if missing:
553              # Try fuzzy match
554              actual_cols = []
555              for pc in pain_columns:
556                  matches = [c for c in df.columns if pc.lower() in c.lower()]
557                  if matches:
558                      actual_cols.append(matches[0])
559                  else:
560                      raise ValueError(f"Column not found: '{pc}'. Available: {list(df.columns)}")
561              pain_columns = actual_cols
562      else:
563          pain_columns = detect_pain_columns(df)
564          if not pain_columns:
565              raise ValueError(
566                  "Could not auto-detect pain point columns. "
567                  "Use --pain-columns to specify which columns contain challenge/pain responses.\n"
568                  f"Available columns: {list(df.columns)}"
569              )
570  
571      print(f"Analyzing columns: {pain_columns}", file=sys.stderr)
572  
573      # Extract responses
574      responses = extract_responses(df, pain_columns)
575      if not responses:
576          raise ValueError("No non-empty responses found in the specified columns")
577  
578      print(f"Found {len(responses)} responses from {total_respondents} respondents", file=sys.stderr)
579  
580      # Cluster
581      labels, vectorizer, tfidf_matrix, n_clusters = cluster_responses(
582          responses, n_clusters=min(top_segments, len(responses) // 2) if len(responses) < 30 else None,
583      )
584  
585      # Build segments
586      segments = []
587      for cluster_id in range(n_clusters):
588          mask = [i for i, l in enumerate(labels) if l == cluster_id]
589          cluster_responses_list = [responses[i] for i in mask]
590  
591          keywords = extract_cluster_keywords(vectorizer, tfidf_matrix, labels, cluster_id)
592          theme = generate_theme_label(keywords)
593          commercial = score_commercial_potential(cluster_responses_list)
594  
595          # Pick representative quotes (medium length, most representative)
596          quotes = sorted(
597              cluster_responses_list,
598              key=lambda r: abs(len(r) - 80),  # prefer ~80 char responses
599          )[:3]
600  
601          segment = PainSegment(
602              segment_id=cluster_id + 1,
603              theme=theme,
604              top_keywords=keywords,
605              respondent_count=len(mask),
606              respondent_pct=round(len(mask) / len(responses) * 100, 1),
607              commercial_score=commercial,
608              sample_responses=cluster_responses_list[:5],
609              representative_quotes=quotes,
610          )
611          segments.append(segment)
612  
613      # Sort by size × commercial score
614      segments.sort(key=lambda s: s.respondent_count * (s.commercial_score + 10), reverse=True)
615  
616      # Limit to top N
617      segments = segments[:top_segments]
618  
619      # Re-number after sorting
620      for i, seg in enumerate(segments):
621          seg.segment_id = i + 1
622  
623      # Generate lead magnet briefs
624      lead_magnets = []
625      for seg in segments:
626          brief = build_lead_magnet_brief(seg)
627          lead_magnets.append(brief)
628  
629      # Sort briefs by combined score
630      lead_magnets.sort(key=lambda b: b.combined_score, reverse=True)
631  
632      # Implementation roadmap
633      roadmap = []
634      for i, lm in enumerate(lead_magnets, 1):
635          roadmap.append({
636              "priority": i,
637              "title": lm.title,
638              "format": lm.format,
639              "segment_size": f"{lm.segment_theme} ({segments[lm.segment_id - 1].respondent_pct:.1f}%)",
640              "combined_score": lm.combined_score,
641              "estimated_effort": "1-2 days" if "Checklist" in lm.format or "Template" in lm.format else "3-5 days",
642          })
643  
644      return AnalysisResult(
645          total_respondents=total_respondents,
646          columns_analyzed=pain_columns,
647          segments=[asdict(s) for s in segments],
648          lead_magnets=[asdict(lm) for lm in lead_magnets],
649          implementation_roadmap=roadmap,
650      )
651  
652  
653  # ---------------------------------------------------------------------------
654  # Output Formatters
655  # ---------------------------------------------------------------------------
656  
657  def format_analysis_text(result: AnalysisResult) -> str:
658      """Format analysis as human-readable text."""
659      lines = []
660      lines.append("=" * 70)
661      lines.append("  SURVEY-TO-LEAD-MAGNET ANALYSIS")
662      lines.append("=" * 70)
663      lines.append("")
664      lines.append(f"  Total respondents: {result.total_respondents}")
665      lines.append(f"  Columns analyzed: {', '.join(result.columns_analyzed)}")
666      lines.append(f"  Segments identified: {len(result.segments)}")
667      lines.append("")
668  
669      # Segments
670      lines.append("-" * 70)
671      lines.append("  PAIN POINT SEGMENTS (ranked by opportunity)")
672      lines.append("-" * 70)
673  
674      for seg in result.segments:
675          lines.append("")
676          lines.append(f"  Segment #{seg['segment_id']}: {seg['theme']}")
677          lines.append(f"  Respondents: {seg['respondent_count']} ({seg['respondent_pct']}%)")
678          lines.append(f"  Commercial Score: {seg['commercial_score']}/100")
679          lines.append(f"  Top Keywords: {', '.join(seg['top_keywords'][:5])}")
680          lines.append("")
681          lines.append("  Representative Quotes:")
682          for q in seg["representative_quotes"]:
683              lines.append(f"    \"{q[:100]}{'...' if len(q) > 100 else ''}\"")
684          lines.append("")
685  
686      # Lead Magnet Briefs
687      lines.append("=" * 70)
688      lines.append("  LEAD MAGNET BRIEFS (ranked by combined score)")
689      lines.append("=" * 70)
690  
691      for lm in result.lead_magnets:
692          lines.append("")
693          lines.append(f"  📦 {lm['title']}")
694          lines.append(f"  Format: {lm['format']}")
695          lines.append(f"  Segment: {lm['segment_theme']}")
696          lines.append(f"  Viral Potential: {lm['viral_potential']}/100  |  Conversion Potential: {lm['conversion_potential']}/100")
697          lines.append(f"  Combined Score: {lm['combined_score']}/100")
698          lines.append("")
699          lines.append(f"  Hook: {lm['hook'][:200]}{'...' if len(lm['hook']) > 200 else ''}")
700          lines.append("")
701          lines.append("  Outline:")
702          for section in lm["outline"]:
703              lines.append(f"    • {section}")
704          lines.append("")
705          lines.append(f"  CTA: {lm['target_cta']}")
706          lines.append(f"  Distribution: {lm['distribution_channel']}")
707          lines.append(f"  Notes: {lm['implementation_notes']}")
708          lines.append("")
709          lines.append("  " + "-" * 50)
710  
711      # Roadmap
712      lines.append("")
713      lines.append("=" * 70)
714      lines.append("  IMPLEMENTATION ROADMAP")
715      lines.append("=" * 70)
716      lines.append("")
717  
718      for item in result.implementation_roadmap:
719          lines.append(f"  #{item['priority']}  [{item['estimated_effort']}]  {item['title']}")
720          lines.append(f"       Format: {item['format']}  |  Segment: {item['segment_size']}  |  Score: {item['combined_score']}")
721          lines.append("")
722  
723      lines.append("=" * 70)
724      return "\n".join(lines)
725  
726  
727  # ---------------------------------------------------------------------------
728  # Main
729  # ---------------------------------------------------------------------------
730  
731  def main():
732      parser = argparse.ArgumentParser(
733          description="Survey-to-Lead-Magnet Engine — Turn survey data into targeted lead magnet briefs",
734          formatter_class=argparse.RawDescriptionHelpFormatter,
735          epilog="""
736  Examples:
737    python survey_lead_magnet.py --csv survey_responses.csv
738    python survey_lead_magnet.py --csv survey.csv --pain-columns "biggest_challenge" "frustrations"
739    python survey_lead_magnet.py --csv survey.csv --top-segments 3 --json --output briefs.json
740  
741  CSV Format:
742    Questions as column headers, one respondent per row.
743    Works with exports from Typeform, Google Forms, SurveyMonkey, etc.
744          """,
745      )
746      parser.add_argument("--csv", required=True, help="Path to survey responses CSV")
747      parser.add_argument(
748          "--pain-columns", nargs="+",
749          help="Column names containing pain point / challenge responses (auto-detected if not specified)",
750      )
751      parser.add_argument(
752          "--top-segments", type=int, default=5,
753          help="Number of top segments to analyze (default: 5)",
754      )
755      parser.add_argument("--json", action="store_true", help="Output as JSON")
756      parser.add_argument("--output", help="Save output to file")
757  
758      args = parser.parse_args()
759  
760      if not os.path.exists(args.csv):
761          print(f"Error: File not found: {args.csv}", file=sys.stderr)
762          sys.exit(1)
763  
764      try:
765          result = analyze_survey(
766              csv_path=args.csv,
767              pain_columns=args.pain_columns,
768              top_segments=args.top_segments,
769          )
770      except ValueError as e:
771          print(f"Error: {e}", file=sys.stderr)
772          sys.exit(1)
773  
774      # Output
775      if args.json:
776          output = json.dumps(asdict(result), indent=2, default=str)
777          if args.output:
778              with open(args.output, "w") as f:
779                  f.write(output)
780              print(f"Output saved to {args.output}", file=sys.stderr)
781          else:
782              print(output)
783      else:
784          text_output = format_analysis_text(result)
785          if args.output:
786              with open(args.output, "w") as f:
787                  f.write(text_output)
788              print(f"Output saved to {args.output}", file=sys.stderr)
789          else:
790              print(text_output)
791  
792  
793  if __name__ == "__main__":
794      main()