Cradicle Explorer

/ revenue-intelligence / client_report_generator.py
client_report_generator.py
   1  #!/usr/bin/env python3
   2  """
   3  Multi-Source Client Report Generator
   4  
   5  Pulls from GA4 + HubSpot + Ahrefs + Gong to generate unified client-ready BI reports.
   6  Includes executive summary, anomaly detection, and multi-format output.
   7  
   8  Usage:
   9      python client_report_generator.py --client "Acme Corp"
  10      python client_report_generator.py --client "Acme Corp" --format markdown --output report.md
  11      python client_report_generator.py --client "Acme Corp" --anomalies --compare previous-month
  12  """
  13  
  14  import argparse
  15  import json
  16  import math
  17  import os
  18  import sys
  19  from collections import defaultdict
  20  from datetime import datetime, timedelta
  21  from pathlib import Path
  22  from typing import Optional
  23  
  24  # ---------------------------------------------------------------------------
  25  # API Configuration
  26  # ---------------------------------------------------------------------------
  27  
  28  # GA4: Google Analytics Data API
  29  # Set GA4_PROPERTY_ID and GA4_CREDENTIALS_JSON (service account JSON path)
  30  GA4_PROPERTY_ID = os.environ.get("GA4_PROPERTY_ID", "")
  31  GA4_CREDENTIALS_JSON = os.environ.get("GA4_CREDENTIALS_JSON", "")
  32  
  33  # HubSpot: Private App Token
  34  # Required scopes: crm.objects.deals.read, crm.objects.contacts.read
  35  HUBSPOT_API_KEY = os.environ.get("HUBSPOT_API_KEY", "")
  36  
  37  # Ahrefs: API Token
  38  # Get from: https://ahrefs.com/api
  39  AHREFS_TOKEN = os.environ.get("AHREFS_TOKEN", "")
  40  
  41  # Gong: API Access Key
  42  # Get from: Gong > Settings > API
  43  GONG_API_KEY = os.environ.get("GONG_API_KEY", "")
  44  GONG_API_BASE_URL = os.environ.get("GONG_API_BASE_URL", "https://api.gong.io/v2")
  45  
  46  OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./output")
  47  
  48  
  49  # ---------------------------------------------------------------------------
  50  # Data Source Fetchers
  51  # ---------------------------------------------------------------------------
  52  
  53  def fetch_ga4_traffic(start_date: str, end_date: str, prev_start: Optional[str] = None, prev_end: Optional[str] = None) -> dict:
  54      """
  55      Fetch traffic metrics from GA4.
  56  
  57      Returns: {
  58          "current": {"sessions": N, "users": N, "pageviews": N, "bounce_rate": N, ...},
  59          "previous": {...} or None,
  60          "top_pages": [...],
  61          "channels": [...]
  62      }
  63      """
  64      if not GA4_PROPERTY_ID or not GA4_CREDENTIALS_JSON:
  65          print("INFO: GA4 credentials not configured. Using sample data.", file=sys.stderr)
  66          return _sample_ga4_traffic()
  67  
  68      try:
  69          from google.analytics.data_v1beta import BetaAnalyticsDataClient
  70          from google.analytics.data_v1beta.types import (
  71              DateRange,
  72              Dimension,
  73              Metric,
  74              RunReportRequest,
  75              OrderBy,
  76          )
  77  
  78          client = BetaAnalyticsDataClient.from_service_account_json(GA4_CREDENTIALS_JSON)
  79  
  80          # Overall metrics
  81          date_ranges = [DateRange(start_date=start_date, end_date=end_date)]
  82          if prev_start and prev_end:
  83              date_ranges.append(DateRange(start_date=prev_start, end_date=prev_end))
  84  
  85          overview_req = RunReportRequest(
  86              property=f"properties/{GA4_PROPERTY_ID}",
  87              metrics=[
  88                  Metric(name="sessions"),
  89                  Metric(name="totalUsers"),
  90                  Metric(name="screenPageViews"),
  91                  Metric(name="bounceRate"),
  92                  Metric(name="averageSessionDuration"),
  93                  Metric(name="conversions"),
  94              ],
  95              date_ranges=date_ranges,
  96          )
  97          overview_resp = client.run_report(overview_req)
  98  
  99          current = _parse_ga4_metrics(overview_resp.rows[0] if overview_resp.rows else None)
 100          previous = _parse_ga4_metrics(overview_resp.rows[1] if len(overview_resp.rows) > 1 else None)
 101  
 102          # Top pages
 103          pages_req = RunReportRequest(
 104              property=f"properties/{GA4_PROPERTY_ID}",
 105              dimensions=[Dimension(name="pagePath")],
 106              metrics=[Metric(name="sessions"), Metric(name="conversions")],
 107              date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
 108              order_bys=[OrderBy(metric=OrderBy.MetricOrderBy(metric_name="sessions"), desc=True)],
 109              limit=10,
 110          )
 111          pages_resp = client.run_report(pages_req)
 112          top_pages = [
 113              {
 114                  "page": row.dimension_values[0].value,
 115                  "sessions": int(row.metric_values[0].value),
 116                  "conversions": int(row.metric_values[1].value),
 117              }
 118              for row in pages_resp.rows
 119          ]
 120  
 121          # Channel breakdown
 122          channels_req = RunReportRequest(
 123              property=f"properties/{GA4_PROPERTY_ID}",
 124              dimensions=[Dimension(name="sessionDefaultChannelGroup")],
 125              metrics=[Metric(name="sessions"), Metric(name="totalUsers"), Metric(name="conversions")],
 126              date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
 127              order_bys=[OrderBy(metric=OrderBy.MetricOrderBy(metric_name="sessions"), desc=True)],
 128          )
 129          channels_resp = client.run_report(channels_req)
 130          channels = [
 131              {
 132                  "channel": row.dimension_values[0].value,
 133                  "sessions": int(row.metric_values[0].value),
 134                  "users": int(row.metric_values[1].value),
 135                  "conversions": int(row.metric_values[2].value),
 136              }
 137              for row in channels_resp.rows
 138          ]
 139  
 140          return {
 141              "current": current,
 142              "previous": previous,
 143              "top_pages": top_pages,
 144              "channels": channels,
 145          }
 146  
 147      except Exception as e:
 148          print(f"WARNING: GA4 API error: {e}. Using sample data.", file=sys.stderr)
 149          return _sample_ga4_traffic()
 150  
 151  
 152  def _parse_ga4_metrics(row) -> Optional[dict]:
 153      if not row:
 154          return None
 155      return {
 156          "sessions": int(row.metric_values[0].value),
 157          "users": int(row.metric_values[1].value),
 158          "pageviews": int(row.metric_values[2].value),
 159          "bounce_rate": round(float(row.metric_values[3].value) * 100, 1),
 160          "avg_session_duration": round(float(row.metric_values[4].value), 1),
 161          "conversions": int(row.metric_values[5].value),
 162      }
 163  
 164  
 165  def _sample_ga4_traffic() -> dict:
 166      return {
 167          "current": {
 168              "sessions": 45200,
 169              "users": 38400,
 170              "pageviews": 112000,
 171              "bounce_rate": 52.3,
 172              "avg_session_duration": 185.4,
 173              "conversions": 342,
 174          },
 175          "previous": {
 176              "sessions": 41800,
 177              "users": 35600,
 178              "pageviews": 98000,
 179              "bounce_rate": 55.1,
 180              "avg_session_duration": 172.8,
 181              "conversions": 298,
 182          },
 183          "top_pages": [
 184              {"page": "/blog/seo-strategy-2025", "sessions": 4200, "conversions": 12},
 185              {"page": "/blog/ai-marketing-tools", "sessions": 3800, "conversions": 15},
 186              {"page": "/", "sessions": 3500, "conversions": 8},
 187              {"page": "/blog/content-marketing-roi", "sessions": 3100, "conversions": 8},
 188              {"page": "/pricing", "sessions": 2900, "conversions": 18},
 189              {"page": "/blog/b2b-lead-generation", "sessions": 2700, "conversions": 5},
 190              {"page": "/services", "sessions": 2400, "conversions": 14},
 191              {"page": "/case-studies", "sessions": 1800, "conversions": 9},
 192              {"page": "/blog/paid-media-benchmarks", "sessions": 1600, "conversions": 4},
 193              {"page": "/about", "sessions": 1200, "conversions": 2},
 194          ],
 195          "channels": [
 196              {"channel": "Organic Search", "sessions": 22100, "users": 19200, "conversions": 156},
 197              {"channel": "Direct", "sessions": 9800, "users": 8100, "conversions": 89},
 198              {"channel": "Paid Search", "sessions": 5400, "users": 5100, "conversions": 52},
 199              {"channel": "Social", "sessions": 4200, "users": 3800, "conversions": 18},
 200              {"channel": "Email", "sessions": 2100, "users": 1900, "conversions": 22},
 201              {"channel": "Referral", "sessions": 1600, "users": 1300, "conversions": 5},
 202          ],
 203      }
 204  
 205  
 206  def fetch_hubspot_pipeline(start_date: str, end_date: str) -> dict:
 207      """
 208      Fetch pipeline metrics from HubSpot.
 209  
 210      Returns: {
 211          "deals_created": N,
 212          "deals_closed_won": N,
 213          "deals_closed_lost": N,
 214          "revenue_closed": N,
 215          "pipeline_value": N,
 216          "avg_deal_size": N,
 217          "avg_sales_cycle_days": N,
 218          "top_deals": [...]
 219      }
 220      """
 221      if not HUBSPOT_API_KEY:
 222          print("INFO: HubSpot credentials not configured. Using sample data.", file=sys.stderr)
 223          return _sample_hubspot_pipeline()
 224  
 225      try:
 226          import requests
 227          headers = {"Authorization": f"Bearer {HUBSPOT_API_KEY}"}
 228  
 229          # Fetch deals created in period
 230          search_body = {
 231              "filterGroups": [{
 232                  "filters": [
 233                      {"propertyName": "createdate", "operator": "GTE", "value": f"{start_date}T00:00:00Z"},
 234                      {"propertyName": "createdate", "operator": "LTE", "value": f"{end_date}T23:59:59Z"},
 235                  ]
 236              }],
 237              "properties": ["dealname", "amount", "dealstage", "closedate", "createdate", "hs_date_entered_closedwon"],
 238              "limit": 100,
 239          }
 240  
 241          resp = requests.post(
 242              "https://api.hubapi.com/crm/v3/objects/deals/search",
 243              headers=headers,
 244              json=search_body,
 245          )
 246          resp.raise_for_status()
 247          deals = resp.json().get("results", [])
 248  
 249          created = len(deals)
 250          won = sum(1 for d in deals if d["properties"].get("dealstage") == "closedwon")
 251          lost = sum(1 for d in deals if d["properties"].get("dealstage") == "closedlost")
 252          revenue = sum(float(d["properties"].get("amount", 0) or 0) for d in deals if d["properties"].get("dealstage") == "closedwon")
 253          pipeline_value = sum(float(d["properties"].get("amount", 0) or 0) for d in deals if d["properties"].get("dealstage") not in ("closedwon", "closedlost"))
 254  
 255          # Calculate avg sales cycle for won deals
 256          cycle_days = []
 257          for d in deals:
 258              if d["properties"].get("dealstage") == "closedwon":
 259                  try:
 260                      created_dt = datetime.strptime(d["properties"]["createdate"][:10], "%Y-%m-%d")
 261                      closed_dt = datetime.strptime(d["properties"]["closedate"][:10], "%Y-%m-%d")
 262                      cycle_days.append((closed_dt - created_dt).days)
 263                  except (ValueError, KeyError, TypeError):
 264                      pass
 265  
 266          avg_cycle = round(sum(cycle_days) / len(cycle_days), 1) if cycle_days else 0
 267  
 268          top_deals = sorted(
 269              [{"name": d["properties"].get("dealname", ""), "amount": float(d["properties"].get("amount", 0) or 0), "stage": d["properties"].get("dealstage", "")}
 270               for d in deals],
 271              key=lambda x: x["amount"],
 272              reverse=True,
 273          )[:5]
 274  
 275          return {
 276              "deals_created": created,
 277              "deals_closed_won": won,
 278              "deals_closed_lost": lost,
 279              "revenue_closed": revenue,
 280              "pipeline_value": pipeline_value,
 281              "avg_deal_size": round(revenue / won, 0) if won else 0,
 282              "avg_sales_cycle_days": avg_cycle,
 283              "win_rate": round(won / (won + lost) * 100, 1) if (won + lost) else 0,
 284              "top_deals": top_deals,
 285          }
 286  
 287      except Exception as e:
 288          print(f"WARNING: HubSpot API error: {e}. Using sample data.", file=sys.stderr)
 289          return _sample_hubspot_pipeline()
 290  
 291  
 292  def _sample_hubspot_pipeline() -> dict:
 293      return {
 294          "deals_created": 47,
 295          "deals_closed_won": 12,
 296          "deals_closed_lost": 8,
 297          "revenue_closed": 1440000,
 298          "pipeline_value": 2850000,
 299          "avg_deal_size": 120000,
 300          "avg_sales_cycle_days": 38.5,
 301          "win_rate": 60.0,
 302          "top_deals": [
 303              {"name": "Enterprise Ltd - Full Service", "amount": 360000, "stage": "closedwon"},
 304              {"name": "TechStart Inc - SEO + Content", "amount": 240000, "stage": "closedwon"},
 305              {"name": "DataCo - Pipeline Build", "amount": 180000, "stage": "contractsent"},
 306              {"name": "ScaleUp - Paid Media", "amount": 156000, "stage": "closedwon"},
 307              {"name": "GrowthCo - Content Marketing", "amount": 120000, "stage": "qualifiedtobuy"},
 308          ],
 309      }
 310  
 311  
 312  def fetch_ahrefs_seo(domain: str) -> dict:
 313      """
 314      Fetch SEO metrics from Ahrefs.
 315  
 316      Returns: {
 317          "domain_rating": N,
 318          "referring_domains": N,
 319          "backlinks": N,
 320          "organic_keywords": N,
 321          "organic_traffic": N,
 322          "top_keywords": [...],
 323          "new_backlinks": N,
 324          "lost_backlinks": N
 325      }
 326      """
 327      if not AHREFS_TOKEN:
 328          print("INFO: Ahrefs credentials not configured. Using sample data.", file=sys.stderr)
 329          return _sample_ahrefs_data()
 330  
 331      try:
 332          import requests
 333  
 334          headers = {"Authorization": f"Bearer {AHREFS_TOKEN}"}
 335          base = "https://api.ahrefs.com/v3"
 336  
 337          # Domain overview
 338          overview_resp = requests.get(
 339              f"{base}/site-explorer/overview",
 340              headers=headers,
 341              params={"target": domain, "mode": "domain"},
 342          )
 343          overview_resp.raise_for_status()
 344          overview = overview_resp.json()
 345  
 346          # Top organic keywords
 347          keywords_resp = requests.get(
 348              f"{base}/site-explorer/organic-keywords",
 349              headers=headers,
 350              params={"target": domain, "mode": "domain", "limit": 10, "order_by": "traffic:desc"},
 351          )
 352          keywords_data = keywords_resp.json() if keywords_resp.ok else {}
 353  
 354          top_keywords = [
 355              {
 356                  "keyword": kw.get("keyword", ""),
 357                  "position": kw.get("position", 0),
 358                  "volume": kw.get("volume", 0),
 359                  "traffic": kw.get("traffic", 0),
 360              }
 361              for kw in keywords_data.get("keywords", [])
 362          ]
 363  
 364          return {
 365              "domain_rating": overview.get("domain_rating", 0),
 366              "referring_domains": overview.get("referring_domains", 0),
 367              "backlinks": overview.get("backlinks", 0),
 368              "organic_keywords": overview.get("organic_keywords", 0),
 369              "organic_traffic": overview.get("organic_traffic", 0),
 370              "top_keywords": top_keywords,
 371              "new_backlinks": overview.get("new_backlinks_30d", 0),
 372              "lost_backlinks": overview.get("lost_backlinks_30d", 0),
 373          }
 374  
 375      except Exception as e:
 376          print(f"WARNING: Ahrefs API error: {e}. Using sample data.", file=sys.stderr)
 377          return _sample_ahrefs_data()
 378  
 379  
 380  def _sample_ahrefs_data() -> dict:
 381      return {
 382          "domain_rating": 72,
 383          "referring_domains": 4850,
 384          "backlinks": 89200,
 385          "organic_keywords": 28400,
 386          "organic_traffic": 156000,
 387          "top_keywords": [
 388              {"keyword": "digital marketing agency", "position": 3, "volume": 18100, "traffic": 4200},
 389              {"keyword": "seo services", "position": 5, "volume": 14800, "traffic": 2100},
 390              {"keyword": "content marketing strategy", "position": 2, "volume": 9900, "traffic": 3800},
 391              {"keyword": "b2b marketing agency", "position": 4, "volume": 6600, "traffic": 1400},
 392              {"keyword": "marketing automation", "position": 7, "volume": 12100, "traffic": 980},
 393              {"keyword": "ppc management", "position": 6, "volume": 5400, "traffic": 890},
 394              {"keyword": "growth marketing", "position": 1, "volume": 4400, "traffic": 2900},
 395              {"keyword": "seo audit", "position": 8, "volume": 8800, "traffic": 640},
 396              {"keyword": "link building services", "position": 3, "volume": 3600, "traffic": 780},
 397              {"keyword": "saas marketing", "position": 2, "volume": 3200, "traffic": 1200},
 398          ],
 399          "new_backlinks": 342,
 400          "lost_backlinks": 128,
 401      }
 402  
 403  
 404  def fetch_gong_call_quality(start_date: str, end_date: str) -> dict:
 405      """
 406      Fetch call quality metrics from Gong.
 407  
 408      Returns: {
 409          "total_calls": N,
 410          "avg_talk_ratio": N (percent rep spoke),
 411          "avg_longest_monologue": N (seconds),
 412          "avg_patience": N (seconds before interrupting),
 413          "topics_discussed": [...],
 414          "win_rate_by_talk_ratio": {...}
 415      }
 416      """
 417      if not GONG_API_KEY:
 418          print("INFO: Gong credentials not configured. Using sample data.", file=sys.stderr)
 419          return _sample_gong_data()
 420  
 421      try:
 422          import requests
 423  
 424          headers = {
 425              "Authorization": f"Bearer {GONG_API_KEY}",
 426              "Content-Type": "application/json",
 427          }
 428  
 429          # Fetch call stats
 430          from_dt = f"{start_date}T00:00:00Z"
 431          to_dt = f"{end_date}T23:59:59Z"
 432  
 433          stats_resp = requests.post(
 434              f"{GONG_API_BASE_URL}/stats/activity/aggregate",
 435              headers=headers,
 436              json={
 437                  "filter": {"fromDateTime": from_dt, "toDateTime": to_dt},
 438                  "aggregation": {"aggregateBy": "user"},
 439              },
 440          )
 441  
 442          if stats_resp.ok:
 443              stats = stats_resp.json()
 444              # Process aggregate stats
 445              # (Gong's actual response format varies; adjust parsing as needed)
 446              return {
 447                  "total_calls": stats.get("totalCalls", 0),
 448                  "avg_talk_ratio": stats.get("avgTalkRatio", 0),
 449                  "source": "gong_api",
 450              }
 451          else:
 452              return _sample_gong_data()
 453  
 454      except Exception as e:
 455          print(f"WARNING: Gong API error: {e}. Using sample data.", file=sys.stderr)
 456          return _sample_gong_data()
 457  
 458  
 459  def _sample_gong_data() -> dict:
 460      return {
 461          "total_calls": 156,
 462          "avg_talk_ratio": 54.2,
 463          "avg_longest_monologue_sec": 142,
 464          "avg_patience_sec": 1.8,
 465          "avg_call_duration_min": 32.5,
 466          "calls_with_next_steps": 118,
 467          "next_steps_rate": 75.6,
 468          "top_topics": [
 469              {"topic": "Pricing", "frequency": 89, "pct": 57.1},
 470              {"topic": "Implementation", "frequency": 72, "pct": 46.2},
 471              {"topic": "ROI", "frequency": 68, "pct": 43.6},
 472              {"topic": "Timeline", "frequency": 54, "pct": 34.6},
 473              {"topic": "Competition", "frequency": 41, "pct": 26.3},
 474          ],
 475          "talk_ratio_vs_win_rate": {
 476              "40-50%": {"calls": 42, "win_rate": 38.1},
 477              "50-60%": {"calls": 58, "win_rate": 45.2},
 478              "60-70%": {"calls": 36, "win_rate": 31.4},
 479              "70%+": {"calls": 20, "win_rate": 15.0},
 480          },
 481      }
 482  
 483  
 484  # ---------------------------------------------------------------------------
 485  # Anomaly Detection
 486  # ---------------------------------------------------------------------------
 487  
 488  def detect_anomalies(current: dict, previous: dict, thresholds: Optional[dict] = None) -> list[dict]:
 489      """
 490      Compare current vs previous period metrics and flag anomalies.
 491  
 492      Default thresholds: >20% change = warning, >40% change = critical
 493      """
 494      if not thresholds:
 495          thresholds = {"warning": 0.20, "critical": 0.40}
 496  
 497      anomalies = []
 498  
 499      metric_labels = {
 500          "sessions": "Website Sessions",
 501          "users": "Unique Users",
 502          "pageviews": "Pageviews",
 503          "bounce_rate": "Bounce Rate",
 504          "conversions": "Conversions",
 505          "avg_session_duration": "Avg Session Duration",
 506      }
 507  
 508      for metric, label in metric_labels.items():
 509          curr_val = current.get(metric, 0)
 510          prev_val = previous.get(metric, 0)
 511  
 512          if prev_val == 0:
 513              continue
 514  
 515          pct_change = (curr_val - prev_val) / prev_val
 516          abs_change = abs(pct_change)
 517  
 518          if abs_change >= thresholds["critical"]:
 519              severity = "critical"
 520          elif abs_change >= thresholds["warning"]:
 521              severity = "warning"
 522          else:
 523              continue
 524  
 525          direction = "increase" if pct_change > 0 else "decrease"
 526          # For bounce rate, increase is bad; for others, decrease is bad
 527          is_positive = (direction == "increase") if metric != "bounce_rate" else (direction == "decrease")
 528  
 529          anomalies.append({
 530              "metric": metric,
 531              "label": label,
 532              "current_value": curr_val,
 533              "previous_value": prev_val,
 534              "pct_change": round(pct_change * 100, 1),
 535              "direction": direction,
 536              "severity": severity,
 537              "sentiment": "positive" if is_positive else "negative",
 538              "summary": f"{label} {'📈' if is_positive else '📉'} {abs(round(pct_change * 100, 1))}% {direction} ({prev_val:,} → {curr_val:,})",
 539          })
 540  
 541      return anomalies
 542  
 543  
 544  # ---------------------------------------------------------------------------
 545  # Report Builder
 546  # ---------------------------------------------------------------------------
 547  
 548  def build_report(
 549      client_name: str,
 550      start_date: str,
 551      end_date: str,
 552      skip_sources: list[str] = None,
 553      enable_anomalies: bool = False,
 554      compare: Optional[str] = None,
 555      domain: str = "",
 556  ) -> dict:
 557      """Build the full client report from all sources."""
 558      skip = skip_sources or []
 559  
 560      # Calculate comparison period
 561      start_dt = datetime.strptime(start_date, "%Y-%m-%d")
 562      end_dt = datetime.strptime(end_date, "%Y-%m-%d")
 563      period_days = (end_dt - start_dt).days
 564  
 565      prev_end = (start_dt - timedelta(days=1)).strftime("%Y-%m-%d")
 566      prev_start = (start_dt - timedelta(days=period_days + 1)).strftime("%Y-%m-%d")
 567  
 568      report = {
 569          "client": client_name,
 570          "period": {"start": start_date, "end": end_date, "days": period_days},
 571          "generated_at": datetime.utcnow().isoformat() + "Z",
 572          "sections": {},
 573      }
 574  
 575      # --- GA4 Traffic ---
 576      if "ga4" not in skip:
 577          ga4 = fetch_ga4_traffic(start_date, end_date, prev_start, prev_end)
 578          report["sections"]["traffic"] = ga4
 579  
 580          if enable_anomalies and ga4.get("current") and ga4.get("previous"):
 581              anomalies = detect_anomalies(ga4["current"], ga4["previous"])
 582              report["sections"]["traffic"]["anomalies"] = anomalies
 583  
 584      # --- HubSpot Pipeline ---
 585      if "hubspot" not in skip:
 586          pipeline = fetch_hubspot_pipeline(start_date, end_date)
 587          report["sections"]["pipeline"] = pipeline
 588  
 589      # --- Ahrefs SEO ---
 590      if "ahrefs" not in skip:
 591          target_domain = domain or os.environ.get("YOUR_DOMAIN", "example.com")
 592          seo = fetch_ahrefs_seo(target_domain)
 593          report["sections"]["seo"] = seo
 594  
 595      # --- Gong Call Quality ---
 596      if "gong" not in skip:
 597          call_quality = fetch_gong_call_quality(start_date, end_date)
 598          report["sections"]["call_quality"] = call_quality
 599  
 600      # --- Executive Summary ---
 601      report["executive_summary"] = generate_executive_summary(report)
 602  
 603      return report
 604  
 605  
 606  def generate_executive_summary(report: dict) -> dict:
 607      """Auto-generate executive summary from report data."""
 608      highlights = []
 609      concerns = []
 610      recommendations = []
 611  
 612      sections = report.get("sections", {})
 613  
 614      # Traffic insights
 615      traffic = sections.get("traffic", {})
 616      current = traffic.get("current", {})
 617      previous = traffic.get("previous", {})
 618  
 619      if current and previous:
 620          sessions_change = ((current.get("sessions", 0) - previous.get("sessions", 0)) / previous.get("sessions", 1)) * 100
 621          conv_change = ((current.get("conversions", 0) - previous.get("conversions", 0)) / max(previous.get("conversions", 1), 1)) * 100
 622  
 623          if sessions_change > 10:
 624              highlights.append(f"Traffic up {sessions_change:.1f}% ({current['sessions']:,} sessions)")
 625          elif sessions_change < -10:
 626              concerns.append(f"Traffic down {abs(sessions_change):.1f}% ({current['sessions']:,} sessions)")
 627  
 628          if conv_change > 15:
 629              highlights.append(f"Conversions up {conv_change:.1f}% ({current['conversions']} total)")
 630          elif conv_change < -15:
 631              concerns.append(f"Conversions down {abs(conv_change):.1f}%")
 632  
 633      # Pipeline insights
 634      pipeline = sections.get("pipeline", {})
 635      if pipeline:
 636          if pipeline.get("win_rate", 0) >= 50:
 637              highlights.append(f"Win rate at {pipeline['win_rate']}% ({pipeline['deals_closed_won']} won)")
 638          elif pipeline.get("win_rate", 0) < 30:
 639              concerns.append(f"Win rate below 30% ({pipeline['win_rate']}%)")
 640  
 641          if pipeline.get("revenue_closed", 0) > 0:
 642              highlights.append(f"${pipeline['revenue_closed']:,.0f} revenue closed")
 643  
 644          if pipeline.get("pipeline_value", 0) > 0:
 645              highlights.append(f"${pipeline['pipeline_value']:,.0f} in active pipeline")
 646  
 647      # SEO insights
 648      seo = sections.get("seo", {})
 649      if seo:
 650          net_backlinks = seo.get("new_backlinks", 0) - seo.get("lost_backlinks", 0)
 651          if net_backlinks > 100:
 652              highlights.append(f"Net +{net_backlinks} backlinks this period")
 653          elif net_backlinks < -50:
 654              concerns.append(f"Net loss of {abs(net_backlinks)} backlinks")
 655  
 656          top_kws = seo.get("top_keywords", [])
 657          top3_count = sum(1 for kw in top_kws if kw.get("position", 99) <= 3)
 658          if top3_count >= 3:
 659              highlights.append(f"{top3_count} keywords in top 3 positions")
 660  
 661      # Call quality insights
 662      calls = sections.get("call_quality", {})
 663      if calls:
 664          talk_ratio = calls.get("avg_talk_ratio", 0)
 665          if talk_ratio > 65:
 666              concerns.append(f"Reps talking too much ({talk_ratio}% talk ratio). Best practice: 40-60%.")
 667              recommendations.append("Run talk-ratio coaching sessions. Reps at 40-60% have 2x win rate vs 70%+.")
 668          if calls.get("next_steps_rate", 0) < 70:
 669              concerns.append(f"Only {calls.get('next_steps_rate', 0)}% of calls end with clear next steps.")
 670              recommendations.append("Implement mandatory next-steps template for all discovery calls.")
 671  
 672      # Anomaly-based recommendations
 673      anomalies = traffic.get("anomalies", [])
 674      for a in anomalies:
 675          if a["severity"] == "critical" and a["sentiment"] == "negative":
 676              recommendations.append(f"Investigate {a['label']} drop ({a['pct_change']}%). Check for technical issues, algorithm updates, or campaign pauses.")
 677  
 678      return {
 679          "highlights": highlights,
 680          "concerns": concerns,
 681          "recommendations": recommendations,
 682          "overall_health": "strong" if len(highlights) > len(concerns) else "needs_attention" if concerns else "stable",
 683      }
 684  
 685  
 686  # ---------------------------------------------------------------------------
 687  # Output Formatters
 688  # ---------------------------------------------------------------------------
 689  
 690  def format_markdown(report: dict) -> str:
 691      """Format report as client-ready markdown."""
 692      lines = []
 693      lines.append(f"# {report['client']} - Performance Report")
 694      lines.append(f"**Period:** {report['period']['start']} to {report['period']['end']} ({report['period']['days']} days)")
 695      lines.append(f"**Generated:** {report['generated_at'][:10]}")
 696      lines.append("")
 697  
 698      # Executive Summary
 699      summary = report.get("executive_summary", {})
 700      lines.append("## Executive Summary")
 701      lines.append("")
 702  
 703      health = summary.get("overall_health", "stable")
 704      health_emoji = {"strong": "🟢", "stable": "🟡", "needs_attention": "🔴"}.get(health, "⚪")
 705      lines.append(f"**Overall Health:** {health_emoji} {health.replace('_', ' ').title()}")
 706      lines.append("")
 707  
 708      if summary.get("highlights"):
 709          lines.append("### ✅ Highlights")
 710          for h in summary["highlights"]:
 711              lines.append(f"- {h}")
 712          lines.append("")
 713  
 714      if summary.get("concerns"):
 715          lines.append("### ⚠️ Concerns")
 716          for c in summary["concerns"]:
 717              lines.append(f"- {c}")
 718          lines.append("")
 719  
 720      if summary.get("recommendations"):
 721          lines.append("### 💡 Recommendations")
 722          for r in summary["recommendations"]:
 723              lines.append(f"- {r}")
 724          lines.append("")
 725  
 726      # Traffic
 727      sections = report.get("sections", {})
 728      traffic = sections.get("traffic", {})
 729      if traffic:
 730          current = traffic.get("current", {})
 731          previous = traffic.get("previous", {})
 732  
 733          lines.append("---")
 734          lines.append("## 📊 Traffic")
 735          lines.append("")
 736          lines.append("| Metric | Current | Previous | Change |")
 737          lines.append("|--------|---------|----------|--------|")
 738  
 739          for metric, label in [("sessions", "Sessions"), ("users", "Users"), ("pageviews", "Pageviews"), ("bounce_rate", "Bounce Rate"), ("conversions", "Conversions")]:
 740              curr = current.get(metric, 0)
 741              prev = previous.get(metric, 0) if previous else 0
 742              if prev:
 743                  change = ((curr - prev) / prev) * 100
 744                  change_str = f"{'↑' if change > 0 else '↓'} {abs(change):.1f}%"
 745              else:
 746                  change_str = "N/A"
 747              fmt = f"{curr:.1f}%" if metric == "bounce_rate" else f"{curr:,}"
 748              prev_fmt = f"{prev:.1f}%" if metric == "bounce_rate" else f"{prev:,}"
 749              lines.append(f"| {label} | {fmt} | {prev_fmt} | {change_str} |")
 750  
 751          lines.append("")
 752  
 753          if traffic.get("channels"):
 754              lines.append("### Channel Breakdown")
 755              lines.append("")
 756              lines.append("| Channel | Sessions | Conversions |")
 757              lines.append("|---------|----------|-------------|")
 758              for ch in traffic["channels"]:
 759                  lines.append(f"| {ch['channel']} | {ch['sessions']:,} | {ch['conversions']} |")
 760              lines.append("")
 761  
 762          if traffic.get("top_pages"):
 763              lines.append("### Top Pages")
 764              lines.append("")
 765              lines.append("| Page | Sessions | Conversions |")
 766              lines.append("|------|----------|-------------|")
 767              for p in traffic["top_pages"][:10]:
 768                  lines.append(f"| {p['page']} | {p['sessions']:,} | {p['conversions']} |")
 769              lines.append("")
 770  
 771          # Anomalies
 772          if traffic.get("anomalies"):
 773              lines.append("### 🚨 Anomalies Detected")
 774              lines.append("")
 775              for a in traffic["anomalies"]:
 776                  icon = "🔴" if a["severity"] == "critical" else "🟡"
 777                  lines.append(f"- {icon} {a['summary']}")
 778              lines.append("")
 779  
 780      # Pipeline
 781      pipeline = sections.get("pipeline", {})
 782      if pipeline:
 783          lines.append("---")
 784          lines.append("## 🎯 Pipeline")
 785          lines.append("")
 786          lines.append(f"| Metric | Value |")
 787          lines.append(f"|--------|-------|")
 788          lines.append(f"| Deals Created | {pipeline.get('deals_created', 0)} |")
 789          lines.append(f"| Deals Won | {pipeline.get('deals_closed_won', 0)} |")
 790          lines.append(f"| Deals Lost | {pipeline.get('deals_closed_lost', 0)} |")
 791          lines.append(f"| Win Rate | {pipeline.get('win_rate', 0)}% |")
 792          lines.append(f"| Revenue Closed | ${pipeline.get('revenue_closed', 0):,.0f} |")
 793          lines.append(f"| Pipeline Value | ${pipeline.get('pipeline_value', 0):,.0f} |")
 794          lines.append(f"| Avg Deal Size | ${pipeline.get('avg_deal_size', 0):,.0f} |")
 795          lines.append(f"| Avg Sales Cycle | {pipeline.get('avg_sales_cycle_days', 0)} days |")
 796          lines.append("")
 797  
 798          if pipeline.get("top_deals"):
 799              lines.append("### Top Deals")
 800              lines.append("")
 801              lines.append("| Deal | Amount | Stage |")
 802              lines.append("|------|--------|-------|")
 803              for d in pipeline["top_deals"]:
 804                  lines.append(f"| {d['name']} | ${d['amount']:,.0f} | {d['stage']} |")
 805              lines.append("")
 806  
 807      # SEO
 808      seo = sections.get("seo", {})
 809      if seo:
 810          lines.append("---")
 811          lines.append("## 🔍 SEO")
 812          lines.append("")
 813          lines.append(f"| Metric | Value |")
 814          lines.append(f"|--------|-------|")
 815          lines.append(f"| Domain Rating | {seo.get('domain_rating', 0)} |")
 816          lines.append(f"| Referring Domains | {seo.get('referring_domains', 0):,} |")
 817          lines.append(f"| Total Backlinks | {seo.get('backlinks', 0):,} |")
 818          lines.append(f"| Organic Keywords | {seo.get('organic_keywords', 0):,} |")
 819          lines.append(f"| Organic Traffic | {seo.get('organic_traffic', 0):,} |")
 820          lines.append(f"| New Backlinks (30d) | +{seo.get('new_backlinks', 0)} |")
 821          lines.append(f"| Lost Backlinks (30d) | -{seo.get('lost_backlinks', 0)} |")
 822          lines.append("")
 823  
 824          if seo.get("top_keywords"):
 825              lines.append("### Top Keywords")
 826              lines.append("")
 827              lines.append("| Keyword | Position | Volume | Traffic |")
 828              lines.append("|---------|----------|--------|---------|")
 829              for kw in seo["top_keywords"]:
 830                  lines.append(f"| {kw['keyword']} | {kw['position']} | {kw['volume']:,} | {kw['traffic']:,} |")
 831              lines.append("")
 832  
 833      # Call Quality
 834      calls = sections.get("call_quality", {})
 835      if calls:
 836          lines.append("---")
 837          lines.append("## 📞 Call Quality")
 838          lines.append("")
 839          lines.append(f"| Metric | Value |")
 840          lines.append(f"|--------|-------|")
 841          lines.append(f"| Total Calls | {calls.get('total_calls', 0)} |")
 842          lines.append(f"| Avg Talk Ratio | {calls.get('avg_talk_ratio', 0)}% |")
 843          lines.append(f"| Avg Call Duration | {calls.get('avg_call_duration_min', 0)} min |")
 844          lines.append(f"| Longest Monologue | {calls.get('avg_longest_monologue_sec', 0)}s |")
 845          lines.append(f"| Next Steps Rate | {calls.get('next_steps_rate', 0)}% |")
 846          lines.append("")
 847  
 848          if calls.get("top_topics"):
 849              lines.append("### Top Discussion Topics")
 850              lines.append("")
 851              lines.append("| Topic | Frequency | % of Calls |")
 852              lines.append("|-------|-----------|------------|")
 853              for t in calls["top_topics"]:
 854                  lines.append(f"| {t['topic']} | {t['frequency']} | {t['pct']}% |")
 855              lines.append("")
 856  
 857          if calls.get("talk_ratio_vs_win_rate"):
 858              lines.append("### Talk Ratio vs Win Rate")
 859              lines.append("")
 860              lines.append("| Talk Ratio | Calls | Win Rate |")
 861              lines.append("|------------|-------|----------|")
 862              for ratio, data in calls["talk_ratio_vs_win_rate"].items():
 863                  lines.append(f"| {ratio} | {data['calls']} | {data['win_rate']}% |")
 864              lines.append("")
 865  
 866      lines.append("---")
 867      lines.append(f"*Report generated automatically on {report['generated_at'][:10]}*")
 868  
 869      return "\n".join(lines)
 870  
 871  
 872  def print_report_console(report: dict) -> None:
 873      """Print a condensed version to console."""
 874      summary = report.get("executive_summary", {})
 875  
 876      print(f"\n{'='*70}")
 877      print(f"  {report['client']} - Performance Report")
 878      print(f"  {report['period']['start']} to {report['period']['end']}")
 879      print(f"{'='*70}")
 880  
 881      health = summary.get("overall_health", "stable")
 882      health_emoji = {"strong": "🟢", "stable": "🟡", "needs_attention": "🔴"}.get(health, "⚪")
 883      print(f"\n  {health_emoji} Overall: {health.replace('_', ' ').title()}")
 884  
 885      if summary.get("highlights"):
 886          print(f"\n  ✅ Highlights:")
 887          for h in summary["highlights"]:
 888              print(f"     • {h}")
 889  
 890      if summary.get("concerns"):
 891          print(f"\n  ⚠️  Concerns:")
 892          for c in summary["concerns"]:
 893              print(f"     • {c}")
 894  
 895      if summary.get("recommendations"):
 896          print(f"\n  💡 Recommendations:")
 897          for r in summary["recommendations"]:
 898              print(f"     • {r}")
 899  
 900      # Key numbers
 901      sections = report.get("sections", {})
 902  
 903      traffic = sections.get("traffic", {}).get("current", {})
 904      if traffic:
 905          print(f"\n  📊 Traffic: {traffic.get('sessions', 0):,} sessions | {traffic.get('conversions', 0)} conversions")
 906  
 907      pipeline = sections.get("pipeline", {})
 908      if pipeline:
 909          print(f"  🎯 Pipeline: ${pipeline.get('revenue_closed', 0):,.0f} closed | ${pipeline.get('pipeline_value', 0):,.0f} active | {pipeline.get('win_rate', 0)}% win rate")
 910  
 911      seo = sections.get("seo", {})
 912      if seo:
 913          print(f"  🔍 SEO: DR {seo.get('domain_rating', 0)} | {seo.get('organic_keywords', 0):,} keywords | {seo.get('organic_traffic', 0):,} organic traffic")
 914  
 915      calls = sections.get("call_quality", {})
 916      if calls:
 917          print(f"  📞 Calls: {calls.get('total_calls', 0)} calls | {calls.get('avg_talk_ratio', 0)}% talk ratio | {calls.get('next_steps_rate', 0)}% next steps")
 918  
 919      # Anomalies
 920      anomalies = sections.get("traffic", {}).get("anomalies", [])
 921      if anomalies:
 922          print(f"\n  🚨 Anomalies:")
 923          for a in anomalies:
 924              icon = "🔴" if a["severity"] == "critical" else "🟡"
 925              print(f"     {icon} {a['summary']}")
 926  
 927      print()
 928  
 929  
 930  # ---------------------------------------------------------------------------
 931  # Main
 932  # ---------------------------------------------------------------------------
 933  
 934  def main():
 935      parser = argparse.ArgumentParser(
 936          description="Generate unified client BI reports from GA4 + HubSpot + Ahrefs + Gong.",
 937          formatter_class=argparse.RawDescriptionHelpFormatter,
 938          epilog="""
 939  Examples:
 940    %(prog)s --client "Acme Corp"
 941    %(prog)s --client "Acme Corp" --format markdown --output report.md
 942    %(prog)s --client "Acme Corp" --anomalies --compare previous-month
 943    %(prog)s --client "Acme Corp" --skip gong,ahrefs --format json
 944          """,
 945      )
 946  
 947      parser.add_argument("--client", required=True, help="Client name for the report header")
 948      parser.add_argument("--start", help="Start date YYYY-MM-DD (default: 30 days ago)")
 949      parser.add_argument("--end", help="End date YYYY-MM-DD (default: today)")
 950      parser.add_argument("--domain", help="Domain for Ahrefs data (default: YOUR_DOMAIN env var)")
 951  
 952      parser.add_argument("--format", choices=["markdown", "json", "console"], default="console",
 953                          help="Output format (default: console)")
 954      parser.add_argument("--output", "-o", help="Write output to file")
 955      parser.add_argument("--skip", help="Comma-separated sources to skip (ga4,hubspot,ahrefs,gong)")
 956      parser.add_argument("--anomalies", action="store_true", help="Enable anomaly detection")
 957      parser.add_argument("--compare", choices=["previous-month", "previous-quarter", "yoy"],
 958                          help="Comparison period (requires anomaly detection)")
 959  
 960      args = parser.parse_args()
 961  
 962      # Dates
 963      end_date = args.end or datetime.utcnow().strftime("%Y-%m-%d")
 964      start_date = args.start or (datetime.utcnow() - timedelta(days=30)).strftime("%Y-%m-%d")
 965  
 966      skip_sources = [s.strip() for s in args.skip.split(",")] if args.skip else []
 967  
 968      if args.compare:
 969          args.anomalies = True
 970  
 971      print(f"Building report for {args.client} ({start_date} to {end_date})...", file=sys.stderr)
 972  
 973      # Build report
 974      report = build_report(
 975          client_name=args.client,
 976          start_date=start_date,
 977          end_date=end_date,
 978          skip_sources=skip_sources,
 979          enable_anomalies=args.anomalies,
 980          compare=args.compare,
 981          domain=args.domain or "",
 982      )
 983  
 984      # Output
 985      if args.format == "json":
 986          output_text = json.dumps(report, indent=2, default=str)
 987          print(output_text)
 988      elif args.format == "markdown":
 989          output_text = format_markdown(report)
 990          if not args.output:
 991              print(output_text)
 992      else:
 993          print_report_console(report)
 994          output_text = None
 995  
 996      if args.output:
 997          out_path = Path(args.output)
 998          out_path.parent.mkdir(parents=True, exist_ok=True)
 999  
1000          if args.format == "json":
1001              out_path.write_text(json.dumps(report, indent=2, default=str))
1002          elif args.format == "markdown":
1003              out_path.write_text(format_markdown(report))
1004          else:
1005              out_path.write_text(json.dumps(report, indent=2, default=str))
1006  
1007          print(f"\n✅ Report written to {args.output}", file=sys.stderr)
1008  
1009  
1010  if __name__ == "__main__":
1011      main()