Cradicle Explorer

/ revenue-intelligence / gong_insight_pipeline.py
gong_insight_pipeline.py
  1  #!/usr/bin/env python3
  2  """
  3  Gong-to-Insight Pipeline
  4  
  5  Extracts structured intelligence from sales call transcripts:
  6  - Objections (pricing, timing, competition, authority, need)
  7  - Buying signals (budget, timeline, decision maker, champion)
  8  - Competitive mentions (who, context)
  9  - Pricing discussions
 10  - Content topic suggestions from recurring patterns
 11  - Personalized follow-up drafts
 12  
 13  Works with Gong API or plain transcript files.
 14  
 15  Usage:
 16      python gong_insight_pipeline.py --file transcript.txt
 17      python gong_insight_pipeline.py --dir ./transcripts/
 18      python gong_insight_pipeline.py --gong --days 7
 19      python gong_insight_pipeline.py --file transcript.txt --content-topics --follow-ups
 20  """
 21  
 22  import argparse
 23  import json
 24  import os
 25  import re
 26  import sys
 27  from collections import Counter, defaultdict
 28  from datetime import datetime, timedelta
 29  from pathlib import Path
 30  from typing import Optional
 31  
 32  # ---------------------------------------------------------------------------
 33  # Gong API client
 34  # ---------------------------------------------------------------------------
 35  
 36  # To use the Gong API:
 37  # 1. Set GONG_API_KEY (your Gong access key)
 38  # 2. Set GONG_API_BASE_URL (default: https://api.gong.io/v2)
 39  # 3. Generate API credentials in Gong > Settings > API
 40  
 41  GONG_API_KEY = os.environ.get("GONG_API_KEY", "")
 42  GONG_API_BASE_URL = os.environ.get("GONG_API_BASE_URL", "https://api.gong.io/v2")
 43  
 44  
 45  def _gong_headers() -> dict:
 46      """Build authorization headers for Gong API."""
 47      if not GONG_API_KEY:
 48          print("ERROR: GONG_API_KEY not set. Export it or pass --file/--dir instead.", file=sys.stderr)
 49          sys.exit(1)
 50      return {
 51          "Authorization": f"Bearer {GONG_API_KEY}",
 52          "Content-Type": "application/json",
 53      }
 54  
 55  
 56  def fetch_calls_from_gong(days: int = 7, call_id: Optional[str] = None) -> list[dict]:
 57      """
 58      Fetch call transcripts from Gong API.
 59  
 60      Returns list of dicts: [{"id": ..., "title": ..., "transcript": ..., "participants": [...]}]
 61  
 62      NOTE: This uses the Gong v2 API. You need:
 63      - API credentials with 'api:calls:read:transcript' scope
 64      - Calls must be processed (transcription complete)
 65      """
 66      try:
 67          import requests
 68      except ImportError:
 69          print("ERROR: 'requests' required for Gong API. Run: pip install requests", file=sys.stderr)
 70          sys.exit(1)
 71  
 72      headers = _gong_headers()
 73      calls = []
 74  
 75      if call_id:
 76          # Fetch a specific call
 77          # Step 1: Get call metadata
 78          resp = requests.get(f"{GONG_API_BASE_URL}/calls/{call_id}", headers=headers)
 79          resp.raise_for_status()
 80          call_data = resp.json()
 81  
 82          # Step 2: Get transcript
 83          transcript_resp = requests.post(
 84              f"{GONG_API_BASE_URL}/calls/transcript",
 85              headers=headers,
 86              json={"filter": {"callIds": [call_id]}},
 87          )
 88          transcript_resp.raise_for_status()
 89          transcript_data = transcript_resp.json()
 90  
 91          transcript_text = _assemble_transcript(transcript_data.get("callTranscripts", []))
 92          calls.append({
 93              "id": call_id,
 94              "title": call_data.get("metaData", {}).get("title", "Unknown"),
 95              "transcript": transcript_text,
 96              "participants": [p.get("name", "") for p in call_data.get("parties", [])],
 97          })
 98      else:
 99          # Fetch recent calls
100          from_dt = (datetime.utcnow() - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ")
101          to_dt = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
102  
103          # Step 1: List calls in date range
104          list_resp = requests.post(
105              f"{GONG_API_BASE_URL}/calls",
106              headers=headers,
107              json={"filter": {"fromDateTime": from_dt, "toDateTime": to_dt}},
108          )
109          list_resp.raise_for_status()
110          call_list = list_resp.json().get("calls", [])
111  
112          if not call_list:
113              print(f"No calls found in the last {days} days.", file=sys.stderr)
114              return []
115  
116          call_ids = [c["id"] for c in call_list]
117  
118          # Step 2: Batch fetch transcripts (Gong supports up to 100 per request)
119          for batch_start in range(0, len(call_ids), 100):
120              batch = call_ids[batch_start : batch_start + 100]
121              transcript_resp = requests.post(
122                  f"{GONG_API_BASE_URL}/calls/transcript",
123                  headers=headers,
124                  json={"filter": {"callIds": batch}},
125              )
126              transcript_resp.raise_for_status()
127              transcripts_by_id = {}
128              for ct in transcript_resp.json().get("callTranscripts", []):
129                  cid = ct.get("callId")
130                  text = "\n".join(
131                      f"{s.get('speakerName', 'Unknown')}: {' '.join(sent.get('text', '') for sent in s.get('sentences', []))}"
132                      for s in ct.get("transcript", [])
133                  )
134                  transcripts_by_id[cid] = text
135  
136              for c in call_list:
137                  if c["id"] in transcripts_by_id:
138                      calls.append({
139                          "id": c["id"],
140                          "title": c.get("title", "Unknown"),
141                          "transcript": transcripts_by_id[c["id"]],
142                          "participants": [p.get("name", "") for p in c.get("parties", [])],
143                      })
144  
145      return calls
146  
147  
148  def _assemble_transcript(call_transcripts: list) -> str:
149      """Assemble transcript text from Gong API response format."""
150      lines = []
151      for ct in call_transcripts:
152          for segment in ct.get("transcript", []):
153              speaker = segment.get("speakerName", "Unknown")
154              text = " ".join(s.get("text", "") for s in segment.get("sentences", []))
155              lines.append(f"{speaker}: {text}")
156      return "\n".join(lines)
157  
158  
159  # ---------------------------------------------------------------------------
160  # Transcript analysis engine
161  # ---------------------------------------------------------------------------
162  
163  # Objection patterns — maps regex patterns to objection categories
164  OBJECTION_PATTERNS = {
165      "pricing": [
166          r"(?i)(too expensive|over budget|can't afford|cost(s)? too|cheaper|lower price|discount|pricing is|budget.*tight|price.*high|expensive)",
167          r"(?i)(what('s| is) the (price|cost|pricing)|how much (does|will|would)|investment.*significant)",
168          r"(?i)(need to.*justify.*cost|hard to.*justify|roi.*unclear|not sure.*worth)",
169      ],
170      "timing": [
171          r"(?i)(not the right time|bad timing|next quarter|next year|revisit.*later|too soon|not ready|circle back|table this)",
172          r"(?i)(busy.*right now|other priorities|roadmap.*full|backlog|bandwidth|tied up)",
173          r"(?i)(maybe (in|after) (q[1-4]|january|february|march|april|may|june|july|august|september|october|november|december))",
174      ],
175      "competition": [
176          r"(?i)(already (using|working with|have)|current (vendor|provider|partner|agency)|locked in|contract.*with|compared to|vs\.?\s)",
177          r"(?i)(what makes you different|why.*switch|competitor|alternative|other option|looking at.*other)",
178      ],
179      "authority": [
180          r"(?i)(need to (talk to|run.*by|check with|get approval|ask) (my|the|our))",
181          r"(?i)(not my (decision|call)|someone else|boss|manager|board|committee|stakeholder.*approve)",
182          r"(?i)(decision.*committee|buying committee|multiple stakeholders|procurement)",
183      ],
184      "need": [
185          r"(?i)(don't (need|see the need|think we need)|not a priority|we're (fine|good|okay) (with|as)|status quo)",
186          r"(?i)(what problem.*solve|why would we|not sure.*fit|doesn't apply|not relevant)",
187          r"(?i)(happy with.*current|no pain|working well enough)",
188      ],
189  }
190  
191  # Buying signal patterns
192  BUYING_SIGNAL_PATTERNS = {
193      "budget_confirmed": [
194          r"(?i)(budget.*approved|have.*budget|allocated.*budget|budget (is|of) \$|earmarked|set aside.*for)",
195          r"(?i)(can.*invest|willing to (spend|invest|pay)|comfortable with.*price)",
196      ],
197      "timeline_mentioned": [
198          r"(?i)(want.*by (q[1-4]|end of|january|february|march|april|may|june|july|august|september|october|november|december))",
199          r"(?i)(need.*live by|launch.*by|deadline|go.?live|start (date|asap|immediately|next week|this month))",
200          r"(?i)(sooner.*better|asap|urgent|time.?sensitive|quickly)",
201      ],
202      "decision_maker_engaged": [
203          r"(?i)(ceo|cmo|cfo|cto|vp|vice president|chief|director|head of|svp|evp).*(?:join|call|meeting|asked me)",
204          r"(?i)(brought.*my (boss|manager|ceo|cmo)|loop(ed|ing) in|invited.*leadership)",
205          r"(?i)(decision maker|final say|sign.*off|authorize)",
206      ],
207      "champion_identified": [
208          r"(?i)(love (this|it|what)|really (like|impressed|excited)|sold on|big fan|advocate)",
209          r"(?i)(push.*internally|sell.*internally|convince.*team|champion|sponsor|rally|get.*buy.?in)",
210          r"(?i)(exactly what we need|this solves|perfect fit|game.?changer)",
211      ],
212      "next_steps_agreed": [
213          r"(?i)(next step|follow.?up|send.*proposal|schedule.*demo|set up.*call|let's (do|move|proceed))",
214          r"(?i)(send.*contract|nda|msa|sow|statement of work|proposal|agreement)",
215      ],
216  }
217  
218  # Competitive mention patterns — extend with your actual competitors
219  KNOWN_COMPETITORS = [
220      # Add your competitors here. These are common B2B marketing/agency competitors as examples.
221      "HubSpot", "Marketo", "Salesforce", "Drift", "6sense", "Demandbase",
222      "ZoomInfo", "Apollo", "Outreach", "Salesloft", "Gartner", "Forrester",
223      "WebFX", "Wpromote", "Tinuiti", "Power Digital", "Directive",
224  ]
225  
226  PRICING_DISCUSSION_PATTERNS = [
227      r"(?i)\$[\d,]+(\.\d{2})?(\s*(k|K|thousand|million|per month|/mo|/month|annually|per year))?",
228      r"(?i)(pricing (model|structure|tier|plan)|pay.*per|subscription|retainer|flat fee|hourly rate)",
229      r"(?i)(proposal|quote|estimate|ballpark|range|starting at|minimum.*engagement)",
230      r"(?i)(roi|return on investment|payback|break.?even|cost.*benefit)",
231  ]
232  
233  
234  def analyze_transcript(text: str, source_id: str = "unknown") -> dict:
235      """
236      Analyze a single transcript and return structured insights.
237  
238      Returns dict with: objections, buying_signals, competitive_mentions,
239      pricing_discussions, raw_quotes
240      """
241      lines = text.strip().split("\n")
242      insights = {
243          "source_id": source_id,
244          "analyzed_at": datetime.utcnow().isoformat() + "Z",
245          "objections": [],
246          "buying_signals": [],
247          "competitive_mentions": [],
248          "pricing_discussions": [],
249      }
250  
251      for i, line in enumerate(lines):
252          context_window = " ".join(lines[max(0, i - 1) : min(len(lines), i + 2)])
253  
254          # --- Objections ---
255          for category, patterns in OBJECTION_PATTERNS.items():
256              for pattern in patterns:
257                  match = re.search(pattern, line)
258                  if match:
259                      insights["objections"].append({
260                          "category": category,
261                          "quote": line.strip(),
262                          "match": match.group(),
263                          "line_number": i + 1,
264                          "context": context_window.strip(),
265                      })
266                      break  # One match per category per line
267  
268          # --- Buying Signals ---
269          for signal_type, patterns in BUYING_SIGNAL_PATTERNS.items():
270              for pattern in patterns:
271                  match = re.search(pattern, line)
272                  if match:
273                      insights["buying_signals"].append({
274                          "type": signal_type,
275                          "quote": line.strip(),
276                          "match": match.group(),
277                          "line_number": i + 1,
278                      })
279                      break
280  
281          # --- Competitive Mentions ---
282          for competitor in KNOWN_COMPETITORS:
283              if re.search(r"\b" + re.escape(competitor) + r"\b", line, re.IGNORECASE):
284                  # Determine context sentiment (basic heuristic)
285                  sentiment = "neutral"
286                  neg_words = ["problem", "issue", "bad", "worse", "hate", "frustrat", "limit", "lack", "miss", "fail", "leaving", "switch"]
287                  pos_words = ["good", "great", "love", "like", "happy", "better", "best", "strong"]
288                  line_lower = line.lower()
289                  if any(w in line_lower for w in neg_words):
290                      sentiment = "negative"
291                  elif any(w in line_lower for w in pos_words):
292                      sentiment = "positive"
293  
294                  insights["competitive_mentions"].append({
295                      "competitor": competitor,
296                      "context_sentiment": sentiment,
297                      "quote": line.strip(),
298                      "line_number": i + 1,
299                  })
300  
301          # --- Pricing Discussions ---
302          for pattern in PRICING_DISCUSSION_PATTERNS:
303              match = re.search(pattern, line)
304              if match:
305                  insights["pricing_discussions"].append({
306                      "quote": line.strip(),
307                      "match": match.group(),
308                      "line_number": i + 1,
309                  })
310                  break
311  
312      # Deduplicate (same quote can match multiple patterns)
313      insights["objections"] = _dedupe_by_line(insights["objections"])
314      insights["buying_signals"] = _dedupe_by_line(insights["buying_signals"])
315      insights["competitive_mentions"] = _dedupe_by_line(insights["competitive_mentions"])
316      insights["pricing_discussions"] = _dedupe_by_line(insights["pricing_discussions"])
317  
318      # Summary stats
319      insights["summary"] = {
320          "total_objections": len(insights["objections"]),
321          "objection_categories": dict(Counter(o["category"] for o in insights["objections"])),
322          "total_buying_signals": len(insights["buying_signals"]),
323          "signal_types": dict(Counter(s["type"] for s in insights["buying_signals"])),
324          "competitors_mentioned": list(set(c["competitor"] for c in insights["competitive_mentions"])),
325          "has_pricing_discussion": len(insights["pricing_discussions"]) > 0,
326          "deal_temperature": _score_deal_temperature(insights),
327      }
328  
329      return insights
330  
331  
332  def _dedupe_by_line(items: list) -> list:
333      """Remove duplicate entries for the same line number."""
334      seen = set()
335      deduped = []
336      for item in items:
337          key = item.get("line_number", id(item))
338          if key not in seen:
339              seen.add(key)
340              deduped.append(item)
341      return deduped
342  
343  
344  def _score_deal_temperature(insights: dict) -> str:
345      """
346      Score deal temperature based on signals vs objections.
347      Returns: hot, warm, cool, cold
348      """
349      signal_count = len(insights["buying_signals"])
350      objection_count = len(insights["objections"])
351  
352      # Weighted scoring
353      score = 0
354      for sig in insights["buying_signals"]:
355          weights = {
356              "budget_confirmed": 3,
357              "decision_maker_engaged": 3,
358              "timeline_mentioned": 2,
359              "champion_identified": 2,
360              "next_steps_agreed": 2,
361          }
362          score += weights.get(sig["type"], 1)
363  
364      for obj in insights["objections"]:
365          penalties = {
366              "need": -3,  # No need = worst signal
367              "authority": -1,
368              "timing": -1,
369              "pricing": -1,
370              "competition": -2,
371          }
372          score += penalties.get(obj["category"], -1)
373  
374      if score >= 6:
375          return "hot"
376      elif score >= 3:
377          return "warm"
378      elif score >= 0:
379          return "cool"
380      else:
381          return "cold"
382  
383  
384  # ---------------------------------------------------------------------------
385  # Content topic generator
386  # ---------------------------------------------------------------------------
387  
388  def generate_content_topics(all_insights: list[dict]) -> list[dict]:
389      """
390      Analyze recurring objections across multiple calls to suggest content topics.
391      Returns list of content topic suggestions.
392      """
393      objection_quotes = defaultdict(list)
394      for insight in all_insights:
395          for obj in insight.get("objections", []):
396              objection_quotes[obj["category"]].append(obj["quote"])
397  
398      topics = []
399  
400      # Map objection categories to content strategies
401      content_strategies = {
402          "pricing": {
403              "topic_template": "ROI Calculator: How {product} Pays for Itself in {timeframe}",
404              "content_types": ["blog post", "interactive calculator", "case study"],
405              "angle": "Address pricing objections with concrete ROI proof",
406          },
407          "timing": {
408              "topic_template": "The Cost of Waiting: What Happens When You Delay {solution}",
409              "content_types": ["blog post", "email sequence", "one-pager"],
410              "angle": "Create urgency with cost-of-inaction framing",
411          },
412          "competition": {
413              "topic_template": "{product} vs {competitor}: Honest Comparison for {use_case}",
414              "content_types": ["comparison page", "blog post", "battle card"],
415              "angle": "Win competitive deals with transparent comparison content",
416          },
417          "authority": {
418              "topic_template": "How to Build the Business Case for {product} (Template Included)",
419              "content_types": ["template", "guide", "executive summary"],
420              "angle": "Arm your champion with materials to sell internally",
421          },
422          "need": {
423              "topic_template": "Why Top {role}s Are Prioritizing {category} in {year}",
424              "content_types": ["thought leadership", "industry report", "webinar"],
425              "angle": "Build awareness and urgency around the problem",
426          },
427      }
428  
429      for category, quotes in objection_quotes.items():
430          count = len(quotes)
431          if count == 0:
432              continue
433  
434          strategy = content_strategies.get(category, {})
435          topics.append({
436              "category": category,
437              "frequency": count,
438              "sample_quotes": quotes[:3],  # Top 3 examples
439              "suggested_topic": strategy.get("topic_template", f"Content addressing {category} objections"),
440              "recommended_content_types": strategy.get("content_types", ["blog post"]),
441              "strategic_angle": strategy.get("angle", ""),
442              "priority": "high" if count >= 5 else "medium" if count >= 2 else "low",
443          })
444  
445      topics.sort(key=lambda t: t["frequency"], reverse=True)
446      return topics
447  
448  
449  # ---------------------------------------------------------------------------
450  # Follow-up generator
451  # ---------------------------------------------------------------------------
452  
453  def generate_follow_ups(insights: dict) -> list[dict]:
454      """
455      Generate personalized follow-up suggestions based on call insights.
456      """
457      follow_ups = []
458  
459      # Address top objections
460      for obj in insights.get("objections", [])[:3]:
461          templates = {
462              "pricing": {
463                  "subject": "Quick thought on the investment discussion",
464                  "body": "Following up on our pricing conversation. I put together a quick ROI model based on what you shared about {context}. The numbers suggest a {x}x return in the first year. Want me to walk through it?",
465                  "asset": "ROI calculator or case study with similar company metrics",
466              },
467              "timing": {
468                  "subject": "Timing + what others in your position did",
469                  "body": "I hear you on timing. Quick data point: companies that started in a similar position to yours saw {metric} within the first 90 days. Happy to share the case study if helpful.",
470                  "asset": "Quick-win case study showing fast time-to-value",
471              },
472              "competition": {
473                  "subject": "Honest take on {competitor} vs us",
474                  "body": "You mentioned you're also looking at {competitor}. Totally fair. Here's where we genuinely win and where they might be a better fit. I'd rather you make the right call than the easy one.",
475                  "asset": "Competitive battle card or comparison one-pager",
476              },
477              "authority": {
478                  "subject": "Materials for your team's review",
479                  "body": "I know you need to loop in {stakeholder}. I put together a one-page executive summary that hits the points they'll care about most: ROI, timeline, and risk. Want me to send it over?",
480                  "asset": "Executive summary one-pager, tailored to stakeholder concerns",
481              },
482              "need": {
483                  "subject": "Something that might change the calculus",
484                  "body": "I appreciated the honest pushback on whether this is a priority right now. One thing I didn't get to share: {relevant_insight}. Might be worth a 10-minute follow-up if you're open to it.",
485                  "asset": "Industry report or benchmark data showing peer adoption",
486              },
487          }
488  
489          template = templates.get(obj["category"], {})
490          follow_ups.append({
491              "type": "objection_response",
492              "objection_category": obj["category"],
493              "trigger_quote": obj["quote"],
494              "suggested_subject": template.get("subject", f"Following up on {obj['category']} discussion"),
495              "suggested_body": template.get("body", "Following up on our conversation..."),
496              "recommended_asset": template.get("asset", ""),
497              "timing": "Send within 24 hours of call",
498          })
499  
500      # Capitalize on buying signals
501      for sig in insights.get("buying_signals", [])[:2]:
502          if sig["type"] == "champion_identified":
503              follow_ups.append({
504                  "type": "champion_enablement",
505                  "signal": sig["quote"],
506                  "suggested_subject": "Ammo for your internal pitch",
507                  "suggested_body": "You clearly get the value here. I want to make sure you have everything you need to bring the team along. Here's a deck you can customize + the key metrics that usually close the deal internally.",
508                  "recommended_asset": "Internal pitch deck template + metrics cheat sheet",
509                  "timing": "Send within 12 hours",
510              })
511          elif sig["type"] == "next_steps_agreed":
512              follow_ups.append({
513                  "type": "momentum_keeper",
514                  "signal": sig["quote"],
515                  "suggested_subject": "Recap + next steps locked in",
516                  "suggested_body": "Great call. Here's what we agreed on: {next_steps}. I'll have {deliverable} ready by {date}. Let me know if anything changes on your end.",
517                  "recommended_asset": "Meeting summary with action items",
518                  "timing": "Send within 2 hours of call",
519              })
520  
521      return follow_ups
522  
523  
524  # ---------------------------------------------------------------------------
525  # File I/O
526  # ---------------------------------------------------------------------------
527  
528  def load_transcript_file(filepath: str) -> dict:
529      """Load a transcript from a text file."""
530      path = Path(filepath)
531      if not path.exists():
532          print(f"ERROR: File not found: {filepath}", file=sys.stderr)
533          sys.exit(1)
534      text = path.read_text(encoding="utf-8")
535      return {"id": path.stem, "title": path.stem, "transcript": text, "participants": []}
536  
537  
538  def load_transcript_dir(dirpath: str) -> list[dict]:
539      """Load all .txt transcript files from a directory."""
540      path = Path(dirpath)
541      if not path.is_dir():
542          print(f"ERROR: Directory not found: {dirpath}", file=sys.stderr)
543          sys.exit(1)
544      files = sorted(path.glob("*.txt"))
545      if not files:
546          print(f"WARNING: No .txt files found in {dirpath}", file=sys.stderr)
547          return []
548      return [load_transcript_file(str(f)) for f in files]
549  
550  
551  # ---------------------------------------------------------------------------
552  # Output
553  # ---------------------------------------------------------------------------
554  
555  def print_summary(insights: dict) -> None:
556      """Print a human-readable summary of insights."""
557      s = insights["summary"]
558      print(f"\n{'='*60}")
559      print(f"  Call: {insights['source_id']}")
560      print(f"  Temperature: {s['deal_temperature'].upper()}")
561      print(f"{'='*60}")
562  
563      if s["total_objections"]:
564          print(f"\n  🚫 Objections ({s['total_objections']}):")
565          for cat, count in sorted(s["objection_categories"].items(), key=lambda x: -x[1]):
566              print(f"     {cat}: {count}")
567          for obj in insights["objections"][:3]:
568              print(f"     → [{obj['category']}] \"{obj['quote'][:80]}...\"" if len(obj['quote']) > 80 else f"     → [{obj['category']}] \"{obj['quote']}\"")
569  
570      if s["total_buying_signals"]:
571          print(f"\n  ✅ Buying Signals ({s['total_buying_signals']}):")
572          for sig_type, count in sorted(s["signal_types"].items(), key=lambda x: -x[1]):
573              print(f"     {sig_type}: {count}")
574  
575      if s["competitors_mentioned"]:
576          print(f"\n  ⚔️  Competitors: {', '.join(s['competitors_mentioned'])}")
577  
578      if s["has_pricing_discussion"]:
579          print(f"\n  💰 Pricing discussed: Yes ({len(insights['pricing_discussions'])} mentions)")
580  
581      print()
582  
583  
584  # ---------------------------------------------------------------------------
585  # Main
586  # ---------------------------------------------------------------------------
587  
588  def main():
589      parser = argparse.ArgumentParser(
590          description="Extract structured insights from sales call transcripts.",
591          formatter_class=argparse.RawDescriptionHelpFormatter,
592          epilog="""
593  Examples:
594    %(prog)s --file transcript.txt
595    %(prog)s --dir ./transcripts/ --content-topics
596    %(prog)s --gong --days 7 --follow-ups
597    %(prog)s --file call.txt --output insights.json
598          """,
599      )
600  
601      # Input sources (mutually exclusive)
602      source = parser.add_mutually_exclusive_group(required=True)
603      source.add_argument("--file", help="Path to a single transcript file (.txt)")
604      source.add_argument("--dir", help="Path to directory of transcript files (.txt)")
605      source.add_argument("--gong", action="store_true", help="Pull transcripts from Gong API")
606  
607      # Gong options
608      parser.add_argument("--days", type=int, default=7, help="Days of history to pull from Gong (default: 7)")
609      parser.add_argument("--call-id", help="Specific Gong call ID to analyze")
610  
611      # Output options
612      parser.add_argument("--output", "-o", help="Write JSON output to file")
613      parser.add_argument("--json", action="store_true", help="Output raw JSON to stdout")
614      parser.add_argument("--content-topics", action="store_true", help="Generate content topics from recurring objections")
615      parser.add_argument("--follow-ups", action="store_true", help="Generate follow-up suggestions")
616  
617      args = parser.parse_args()
618  
619      # Load transcripts
620      calls = []
621      if args.file:
622          calls = [load_transcript_file(args.file)]
623      elif args.dir:
624          calls = load_transcript_dir(args.dir)
625      elif args.gong:
626          calls = fetch_calls_from_gong(days=args.days, call_id=args.call_id)
627  
628      if not calls:
629          print("No transcripts to analyze.", file=sys.stderr)
630          sys.exit(1)
631  
632      # Analyze
633      all_insights = []
634      for call in calls:
635          insights = analyze_transcript(call["transcript"], source_id=call.get("id", "unknown"))
636          insights["title"] = call.get("title", "")
637          all_insights.append(insights)
638  
639          if not args.json:
640              print_summary(insights)
641  
642      # Content topics
643      content_topics = []
644      if args.content_topics and len(all_insights) > 0:
645          content_topics = generate_content_topics(all_insights)
646          if not args.json:
647              print(f"\n{'='*60}")
648              print("  📝 Content Topics from Recurring Objections")
649              print(f"{'='*60}")
650              for topic in content_topics:
651                  print(f"\n  [{topic['priority'].upper()}] {topic['category']} (mentioned {topic['frequency']}x)")
652                  print(f"  Topic: {topic['suggested_topic']}")
653                  print(f"  Types: {', '.join(topic['recommended_content_types'])}")
654                  print(f"  Angle: {topic['strategic_angle']}")
655  
656      # Follow-ups
657      all_follow_ups = []
658      if args.follow_ups:
659          for insights in all_insights:
660              follow_ups = generate_follow_ups(insights)
661              all_follow_ups.extend(follow_ups)
662              if not args.json:
663                  print(f"\n{'='*60}")
664                  print(f"  📧 Follow-up Suggestions for: {insights['source_id']}")
665                  print(f"{'='*60}")
666                  for fu in follow_ups:
667                      print(f"\n  Type: {fu['type']}")
668                      print(f"  Subject: {fu['suggested_subject']}")
669                      print(f"  Timing: {fu['timing']}")
670                      if fu.get("recommended_asset"):
671                          print(f"  Asset: {fu['recommended_asset']}")
672  
673      # Build output
674      output = {
675          "analyzed_at": datetime.utcnow().isoformat() + "Z",
676          "total_calls": len(all_insights),
677          "calls": all_insights,
678      }
679      if content_topics:
680          output["content_topics"] = content_topics
681      if all_follow_ups:
682          output["follow_ups"] = all_follow_ups
683  
684      # Aggregate stats
685      output["aggregate"] = {
686          "total_objections": sum(i["summary"]["total_objections"] for i in all_insights),
687          "total_buying_signals": sum(i["summary"]["total_buying_signals"] for i in all_insights),
688          "all_competitors": list(set(c for i in all_insights for c in i["summary"]["competitors_mentioned"])),
689          "temperature_distribution": dict(Counter(i["summary"]["deal_temperature"] for i in all_insights)),
690      }
691  
692      # Output
693      if args.json:
694          print(json.dumps(output, indent=2))
695  
696      if args.output:
697          out_path = Path(args.output)
698          out_path.parent.mkdir(parents=True, exist_ok=True)
699          out_path.write_text(json.dumps(output, indent=2))
700          if not args.json:
701              print(f"\n✅ Output written to {args.output}")
702  
703  
704  if __name__ == "__main__":
705      main()