/ revenue-intelligence / client_report_generator.py
client_report_generator.py
1 #!/usr/bin/env python3 2 """ 3 Multi-Source Client Report Generator 4 5 Pulls from GA4 + HubSpot + Ahrefs + Gong to generate unified client-ready BI reports. 6 Includes executive summary, anomaly detection, and multi-format output. 7 8 Usage: 9 python client_report_generator.py --client "Acme Corp" 10 python client_report_generator.py --client "Acme Corp" --format markdown --output report.md 11 python client_report_generator.py --client "Acme Corp" --anomalies --compare previous-month 12 """ 13 14 import argparse 15 import json 16 import math 17 import os 18 import sys 19 from collections import defaultdict 20 from datetime import datetime, timedelta 21 from pathlib import Path 22 from typing import Optional 23 24 # --------------------------------------------------------------------------- 25 # API Configuration 26 # --------------------------------------------------------------------------- 27 28 # GA4: Google Analytics Data API 29 # Set GA4_PROPERTY_ID and GA4_CREDENTIALS_JSON (service account JSON path) 30 GA4_PROPERTY_ID = os.environ.get("GA4_PROPERTY_ID", "") 31 GA4_CREDENTIALS_JSON = os.environ.get("GA4_CREDENTIALS_JSON", "") 32 33 # HubSpot: Private App Token 34 # Required scopes: crm.objects.deals.read, crm.objects.contacts.read 35 HUBSPOT_API_KEY = os.environ.get("HUBSPOT_API_KEY", "") 36 37 # Ahrefs: API Token 38 # Get from: https://ahrefs.com/api 39 AHREFS_TOKEN = os.environ.get("AHREFS_TOKEN", "") 40 41 # Gong: API Access Key 42 # Get from: Gong > Settings > API 43 GONG_API_KEY = os.environ.get("GONG_API_KEY", "") 44 GONG_API_BASE_URL = os.environ.get("GONG_API_BASE_URL", "https://api.gong.io/v2") 45 46 OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./output") 47 48 49 # --------------------------------------------------------------------------- 50 # Data Source Fetchers 51 # --------------------------------------------------------------------------- 52 53 def fetch_ga4_traffic(start_date: str, end_date: str, prev_start: Optional[str] = None, prev_end: Optional[str] = None) -> dict: 54 """ 55 Fetch traffic metrics from GA4. 56 57 Returns: { 58 "current": {"sessions": N, "users": N, "pageviews": N, "bounce_rate": N, ...}, 59 "previous": {...} or None, 60 "top_pages": [...], 61 "channels": [...] 62 } 63 """ 64 if not GA4_PROPERTY_ID or not GA4_CREDENTIALS_JSON: 65 print("INFO: GA4 credentials not configured. Using sample data.", file=sys.stderr) 66 return _sample_ga4_traffic() 67 68 try: 69 from google.analytics.data_v1beta import BetaAnalyticsDataClient 70 from google.analytics.data_v1beta.types import ( 71 DateRange, 72 Dimension, 73 Metric, 74 RunReportRequest, 75 OrderBy, 76 ) 77 78 client = BetaAnalyticsDataClient.from_service_account_json(GA4_CREDENTIALS_JSON) 79 80 # Overall metrics 81 date_ranges = [DateRange(start_date=start_date, end_date=end_date)] 82 if prev_start and prev_end: 83 date_ranges.append(DateRange(start_date=prev_start, end_date=prev_end)) 84 85 overview_req = RunReportRequest( 86 property=f"properties/{GA4_PROPERTY_ID}", 87 metrics=[ 88 Metric(name="sessions"), 89 Metric(name="totalUsers"), 90 Metric(name="screenPageViews"), 91 Metric(name="bounceRate"), 92 Metric(name="averageSessionDuration"), 93 Metric(name="conversions"), 94 ], 95 date_ranges=date_ranges, 96 ) 97 overview_resp = client.run_report(overview_req) 98 99 current = _parse_ga4_metrics(overview_resp.rows[0] if overview_resp.rows else None) 100 previous = _parse_ga4_metrics(overview_resp.rows[1] if len(overview_resp.rows) > 1 else None) 101 102 # Top pages 103 pages_req = RunReportRequest( 104 property=f"properties/{GA4_PROPERTY_ID}", 105 dimensions=[Dimension(name="pagePath")], 106 metrics=[Metric(name="sessions"), Metric(name="conversions")], 107 date_ranges=[DateRange(start_date=start_date, end_date=end_date)], 108 order_bys=[OrderBy(metric=OrderBy.MetricOrderBy(metric_name="sessions"), desc=True)], 109 limit=10, 110 ) 111 pages_resp = client.run_report(pages_req) 112 top_pages = [ 113 { 114 "page": row.dimension_values[0].value, 115 "sessions": int(row.metric_values[0].value), 116 "conversions": int(row.metric_values[1].value), 117 } 118 for row in pages_resp.rows 119 ] 120 121 # Channel breakdown 122 channels_req = RunReportRequest( 123 property=f"properties/{GA4_PROPERTY_ID}", 124 dimensions=[Dimension(name="sessionDefaultChannelGroup")], 125 metrics=[Metric(name="sessions"), Metric(name="totalUsers"), Metric(name="conversions")], 126 date_ranges=[DateRange(start_date=start_date, end_date=end_date)], 127 order_bys=[OrderBy(metric=OrderBy.MetricOrderBy(metric_name="sessions"), desc=True)], 128 ) 129 channels_resp = client.run_report(channels_req) 130 channels = [ 131 { 132 "channel": row.dimension_values[0].value, 133 "sessions": int(row.metric_values[0].value), 134 "users": int(row.metric_values[1].value), 135 "conversions": int(row.metric_values[2].value), 136 } 137 for row in channels_resp.rows 138 ] 139 140 return { 141 "current": current, 142 "previous": previous, 143 "top_pages": top_pages, 144 "channels": channels, 145 } 146 147 except Exception as e: 148 print(f"WARNING: GA4 API error: {e}. Using sample data.", file=sys.stderr) 149 return _sample_ga4_traffic() 150 151 152 def _parse_ga4_metrics(row) -> Optional[dict]: 153 if not row: 154 return None 155 return { 156 "sessions": int(row.metric_values[0].value), 157 "users": int(row.metric_values[1].value), 158 "pageviews": int(row.metric_values[2].value), 159 "bounce_rate": round(float(row.metric_values[3].value) * 100, 1), 160 "avg_session_duration": round(float(row.metric_values[4].value), 1), 161 "conversions": int(row.metric_values[5].value), 162 } 163 164 165 def _sample_ga4_traffic() -> dict: 166 return { 167 "current": { 168 "sessions": 45200, 169 "users": 38400, 170 "pageviews": 112000, 171 "bounce_rate": 52.3, 172 "avg_session_duration": 185.4, 173 "conversions": 342, 174 }, 175 "previous": { 176 "sessions": 41800, 177 "users": 35600, 178 "pageviews": 98000, 179 "bounce_rate": 55.1, 180 "avg_session_duration": 172.8, 181 "conversions": 298, 182 }, 183 "top_pages": [ 184 {"page": "/blog/seo-strategy-2025", "sessions": 4200, "conversions": 12}, 185 {"page": "/blog/ai-marketing-tools", "sessions": 3800, "conversions": 15}, 186 {"page": "/", "sessions": 3500, "conversions": 8}, 187 {"page": "/blog/content-marketing-roi", "sessions": 3100, "conversions": 8}, 188 {"page": "/pricing", "sessions": 2900, "conversions": 18}, 189 {"page": "/blog/b2b-lead-generation", "sessions": 2700, "conversions": 5}, 190 {"page": "/services", "sessions": 2400, "conversions": 14}, 191 {"page": "/case-studies", "sessions": 1800, "conversions": 9}, 192 {"page": "/blog/paid-media-benchmarks", "sessions": 1600, "conversions": 4}, 193 {"page": "/about", "sessions": 1200, "conversions": 2}, 194 ], 195 "channels": [ 196 {"channel": "Organic Search", "sessions": 22100, "users": 19200, "conversions": 156}, 197 {"channel": "Direct", "sessions": 9800, "users": 8100, "conversions": 89}, 198 {"channel": "Paid Search", "sessions": 5400, "users": 5100, "conversions": 52}, 199 {"channel": "Social", "sessions": 4200, "users": 3800, "conversions": 18}, 200 {"channel": "Email", "sessions": 2100, "users": 1900, "conversions": 22}, 201 {"channel": "Referral", "sessions": 1600, "users": 1300, "conversions": 5}, 202 ], 203 } 204 205 206 def fetch_hubspot_pipeline(start_date: str, end_date: str) -> dict: 207 """ 208 Fetch pipeline metrics from HubSpot. 209 210 Returns: { 211 "deals_created": N, 212 "deals_closed_won": N, 213 "deals_closed_lost": N, 214 "revenue_closed": N, 215 "pipeline_value": N, 216 "avg_deal_size": N, 217 "avg_sales_cycle_days": N, 218 "top_deals": [...] 219 } 220 """ 221 if not HUBSPOT_API_KEY: 222 print("INFO: HubSpot credentials not configured. Using sample data.", file=sys.stderr) 223 return _sample_hubspot_pipeline() 224 225 try: 226 import requests 227 headers = {"Authorization": f"Bearer {HUBSPOT_API_KEY}"} 228 229 # Fetch deals created in period 230 search_body = { 231 "filterGroups": [{ 232 "filters": [ 233 {"propertyName": "createdate", "operator": "GTE", "value": f"{start_date}T00:00:00Z"}, 234 {"propertyName": "createdate", "operator": "LTE", "value": f"{end_date}T23:59:59Z"}, 235 ] 236 }], 237 "properties": ["dealname", "amount", "dealstage", "closedate", "createdate", "hs_date_entered_closedwon"], 238 "limit": 100, 239 } 240 241 resp = requests.post( 242 "https://api.hubapi.com/crm/v3/objects/deals/search", 243 headers=headers, 244 json=search_body, 245 ) 246 resp.raise_for_status() 247 deals = resp.json().get("results", []) 248 249 created = len(deals) 250 won = sum(1 for d in deals if d["properties"].get("dealstage") == "closedwon") 251 lost = sum(1 for d in deals if d["properties"].get("dealstage") == "closedlost") 252 revenue = sum(float(d["properties"].get("amount", 0) or 0) for d in deals if d["properties"].get("dealstage") == "closedwon") 253 pipeline_value = sum(float(d["properties"].get("amount", 0) or 0) for d in deals if d["properties"].get("dealstage") not in ("closedwon", "closedlost")) 254 255 # Calculate avg sales cycle for won deals 256 cycle_days = [] 257 for d in deals: 258 if d["properties"].get("dealstage") == "closedwon": 259 try: 260 created_dt = datetime.strptime(d["properties"]["createdate"][:10], "%Y-%m-%d") 261 closed_dt = datetime.strptime(d["properties"]["closedate"][:10], "%Y-%m-%d") 262 cycle_days.append((closed_dt - created_dt).days) 263 except (ValueError, KeyError, TypeError): 264 pass 265 266 avg_cycle = round(sum(cycle_days) / len(cycle_days), 1) if cycle_days else 0 267 268 top_deals = sorted( 269 [{"name": d["properties"].get("dealname", ""), "amount": float(d["properties"].get("amount", 0) or 0), "stage": d["properties"].get("dealstage", "")} 270 for d in deals], 271 key=lambda x: x["amount"], 272 reverse=True, 273 )[:5] 274 275 return { 276 "deals_created": created, 277 "deals_closed_won": won, 278 "deals_closed_lost": lost, 279 "revenue_closed": revenue, 280 "pipeline_value": pipeline_value, 281 "avg_deal_size": round(revenue / won, 0) if won else 0, 282 "avg_sales_cycle_days": avg_cycle, 283 "win_rate": round(won / (won + lost) * 100, 1) if (won + lost) else 0, 284 "top_deals": top_deals, 285 } 286 287 except Exception as e: 288 print(f"WARNING: HubSpot API error: {e}. Using sample data.", file=sys.stderr) 289 return _sample_hubspot_pipeline() 290 291 292 def _sample_hubspot_pipeline() -> dict: 293 return { 294 "deals_created": 47, 295 "deals_closed_won": 12, 296 "deals_closed_lost": 8, 297 "revenue_closed": 1440000, 298 "pipeline_value": 2850000, 299 "avg_deal_size": 120000, 300 "avg_sales_cycle_days": 38.5, 301 "win_rate": 60.0, 302 "top_deals": [ 303 {"name": "Enterprise Ltd - Full Service", "amount": 360000, "stage": "closedwon"}, 304 {"name": "TechStart Inc - SEO + Content", "amount": 240000, "stage": "closedwon"}, 305 {"name": "DataCo - Pipeline Build", "amount": 180000, "stage": "contractsent"}, 306 {"name": "ScaleUp - Paid Media", "amount": 156000, "stage": "closedwon"}, 307 {"name": "GrowthCo - Content Marketing", "amount": 120000, "stage": "qualifiedtobuy"}, 308 ], 309 } 310 311 312 def fetch_ahrefs_seo(domain: str) -> dict: 313 """ 314 Fetch SEO metrics from Ahrefs. 315 316 Returns: { 317 "domain_rating": N, 318 "referring_domains": N, 319 "backlinks": N, 320 "organic_keywords": N, 321 "organic_traffic": N, 322 "top_keywords": [...], 323 "new_backlinks": N, 324 "lost_backlinks": N 325 } 326 """ 327 if not AHREFS_TOKEN: 328 print("INFO: Ahrefs credentials not configured. Using sample data.", file=sys.stderr) 329 return _sample_ahrefs_data() 330 331 try: 332 import requests 333 334 headers = {"Authorization": f"Bearer {AHREFS_TOKEN}"} 335 base = "https://api.ahrefs.com/v3" 336 337 # Domain overview 338 overview_resp = requests.get( 339 f"{base}/site-explorer/overview", 340 headers=headers, 341 params={"target": domain, "mode": "domain"}, 342 ) 343 overview_resp.raise_for_status() 344 overview = overview_resp.json() 345 346 # Top organic keywords 347 keywords_resp = requests.get( 348 f"{base}/site-explorer/organic-keywords", 349 headers=headers, 350 params={"target": domain, "mode": "domain", "limit": 10, "order_by": "traffic:desc"}, 351 ) 352 keywords_data = keywords_resp.json() if keywords_resp.ok else {} 353 354 top_keywords = [ 355 { 356 "keyword": kw.get("keyword", ""), 357 "position": kw.get("position", 0), 358 "volume": kw.get("volume", 0), 359 "traffic": kw.get("traffic", 0), 360 } 361 for kw in keywords_data.get("keywords", []) 362 ] 363 364 return { 365 "domain_rating": overview.get("domain_rating", 0), 366 "referring_domains": overview.get("referring_domains", 0), 367 "backlinks": overview.get("backlinks", 0), 368 "organic_keywords": overview.get("organic_keywords", 0), 369 "organic_traffic": overview.get("organic_traffic", 0), 370 "top_keywords": top_keywords, 371 "new_backlinks": overview.get("new_backlinks_30d", 0), 372 "lost_backlinks": overview.get("lost_backlinks_30d", 0), 373 } 374 375 except Exception as e: 376 print(f"WARNING: Ahrefs API error: {e}. Using sample data.", file=sys.stderr) 377 return _sample_ahrefs_data() 378 379 380 def _sample_ahrefs_data() -> dict: 381 return { 382 "domain_rating": 72, 383 "referring_domains": 4850, 384 "backlinks": 89200, 385 "organic_keywords": 28400, 386 "organic_traffic": 156000, 387 "top_keywords": [ 388 {"keyword": "digital marketing agency", "position": 3, "volume": 18100, "traffic": 4200}, 389 {"keyword": "seo services", "position": 5, "volume": 14800, "traffic": 2100}, 390 {"keyword": "content marketing strategy", "position": 2, "volume": 9900, "traffic": 3800}, 391 {"keyword": "b2b marketing agency", "position": 4, "volume": 6600, "traffic": 1400}, 392 {"keyword": "marketing automation", "position": 7, "volume": 12100, "traffic": 980}, 393 {"keyword": "ppc management", "position": 6, "volume": 5400, "traffic": 890}, 394 {"keyword": "growth marketing", "position": 1, "volume": 4400, "traffic": 2900}, 395 {"keyword": "seo audit", "position": 8, "volume": 8800, "traffic": 640}, 396 {"keyword": "link building services", "position": 3, "volume": 3600, "traffic": 780}, 397 {"keyword": "saas marketing", "position": 2, "volume": 3200, "traffic": 1200}, 398 ], 399 "new_backlinks": 342, 400 "lost_backlinks": 128, 401 } 402 403 404 def fetch_gong_call_quality(start_date: str, end_date: str) -> dict: 405 """ 406 Fetch call quality metrics from Gong. 407 408 Returns: { 409 "total_calls": N, 410 "avg_talk_ratio": N (percent rep spoke), 411 "avg_longest_monologue": N (seconds), 412 "avg_patience": N (seconds before interrupting), 413 "topics_discussed": [...], 414 "win_rate_by_talk_ratio": {...} 415 } 416 """ 417 if not GONG_API_KEY: 418 print("INFO: Gong credentials not configured. Using sample data.", file=sys.stderr) 419 return _sample_gong_data() 420 421 try: 422 import requests 423 424 headers = { 425 "Authorization": f"Bearer {GONG_API_KEY}", 426 "Content-Type": "application/json", 427 } 428 429 # Fetch call stats 430 from_dt = f"{start_date}T00:00:00Z" 431 to_dt = f"{end_date}T23:59:59Z" 432 433 stats_resp = requests.post( 434 f"{GONG_API_BASE_URL}/stats/activity/aggregate", 435 headers=headers, 436 json={ 437 "filter": {"fromDateTime": from_dt, "toDateTime": to_dt}, 438 "aggregation": {"aggregateBy": "user"}, 439 }, 440 ) 441 442 if stats_resp.ok: 443 stats = stats_resp.json() 444 # Process aggregate stats 445 # (Gong's actual response format varies; adjust parsing as needed) 446 return { 447 "total_calls": stats.get("totalCalls", 0), 448 "avg_talk_ratio": stats.get("avgTalkRatio", 0), 449 "source": "gong_api", 450 } 451 else: 452 return _sample_gong_data() 453 454 except Exception as e: 455 print(f"WARNING: Gong API error: {e}. Using sample data.", file=sys.stderr) 456 return _sample_gong_data() 457 458 459 def _sample_gong_data() -> dict: 460 return { 461 "total_calls": 156, 462 "avg_talk_ratio": 54.2, 463 "avg_longest_monologue_sec": 142, 464 "avg_patience_sec": 1.8, 465 "avg_call_duration_min": 32.5, 466 "calls_with_next_steps": 118, 467 "next_steps_rate": 75.6, 468 "top_topics": [ 469 {"topic": "Pricing", "frequency": 89, "pct": 57.1}, 470 {"topic": "Implementation", "frequency": 72, "pct": 46.2}, 471 {"topic": "ROI", "frequency": 68, "pct": 43.6}, 472 {"topic": "Timeline", "frequency": 54, "pct": 34.6}, 473 {"topic": "Competition", "frequency": 41, "pct": 26.3}, 474 ], 475 "talk_ratio_vs_win_rate": { 476 "40-50%": {"calls": 42, "win_rate": 38.1}, 477 "50-60%": {"calls": 58, "win_rate": 45.2}, 478 "60-70%": {"calls": 36, "win_rate": 31.4}, 479 "70%+": {"calls": 20, "win_rate": 15.0}, 480 }, 481 } 482 483 484 # --------------------------------------------------------------------------- 485 # Anomaly Detection 486 # --------------------------------------------------------------------------- 487 488 def detect_anomalies(current: dict, previous: dict, thresholds: Optional[dict] = None) -> list[dict]: 489 """ 490 Compare current vs previous period metrics and flag anomalies. 491 492 Default thresholds: >20% change = warning, >40% change = critical 493 """ 494 if not thresholds: 495 thresholds = {"warning": 0.20, "critical": 0.40} 496 497 anomalies = [] 498 499 metric_labels = { 500 "sessions": "Website Sessions", 501 "users": "Unique Users", 502 "pageviews": "Pageviews", 503 "bounce_rate": "Bounce Rate", 504 "conversions": "Conversions", 505 "avg_session_duration": "Avg Session Duration", 506 } 507 508 for metric, label in metric_labels.items(): 509 curr_val = current.get(metric, 0) 510 prev_val = previous.get(metric, 0) 511 512 if prev_val == 0: 513 continue 514 515 pct_change = (curr_val - prev_val) / prev_val 516 abs_change = abs(pct_change) 517 518 if abs_change >= thresholds["critical"]: 519 severity = "critical" 520 elif abs_change >= thresholds["warning"]: 521 severity = "warning" 522 else: 523 continue 524 525 direction = "increase" if pct_change > 0 else "decrease" 526 # For bounce rate, increase is bad; for others, decrease is bad 527 is_positive = (direction == "increase") if metric != "bounce_rate" else (direction == "decrease") 528 529 anomalies.append({ 530 "metric": metric, 531 "label": label, 532 "current_value": curr_val, 533 "previous_value": prev_val, 534 "pct_change": round(pct_change * 100, 1), 535 "direction": direction, 536 "severity": severity, 537 "sentiment": "positive" if is_positive else "negative", 538 "summary": f"{label} {'š' if is_positive else 'š'} {abs(round(pct_change * 100, 1))}% {direction} ({prev_val:,} ā {curr_val:,})", 539 }) 540 541 return anomalies 542 543 544 # --------------------------------------------------------------------------- 545 # Report Builder 546 # --------------------------------------------------------------------------- 547 548 def build_report( 549 client_name: str, 550 start_date: str, 551 end_date: str, 552 skip_sources: list[str] = None, 553 enable_anomalies: bool = False, 554 compare: Optional[str] = None, 555 domain: str = "", 556 ) -> dict: 557 """Build the full client report from all sources.""" 558 skip = skip_sources or [] 559 560 # Calculate comparison period 561 start_dt = datetime.strptime(start_date, "%Y-%m-%d") 562 end_dt = datetime.strptime(end_date, "%Y-%m-%d") 563 period_days = (end_dt - start_dt).days 564 565 prev_end = (start_dt - timedelta(days=1)).strftime("%Y-%m-%d") 566 prev_start = (start_dt - timedelta(days=period_days + 1)).strftime("%Y-%m-%d") 567 568 report = { 569 "client": client_name, 570 "period": {"start": start_date, "end": end_date, "days": period_days}, 571 "generated_at": datetime.utcnow().isoformat() + "Z", 572 "sections": {}, 573 } 574 575 # --- GA4 Traffic --- 576 if "ga4" not in skip: 577 ga4 = fetch_ga4_traffic(start_date, end_date, prev_start, prev_end) 578 report["sections"]["traffic"] = ga4 579 580 if enable_anomalies and ga4.get("current") and ga4.get("previous"): 581 anomalies = detect_anomalies(ga4["current"], ga4["previous"]) 582 report["sections"]["traffic"]["anomalies"] = anomalies 583 584 # --- HubSpot Pipeline --- 585 if "hubspot" not in skip: 586 pipeline = fetch_hubspot_pipeline(start_date, end_date) 587 report["sections"]["pipeline"] = pipeline 588 589 # --- Ahrefs SEO --- 590 if "ahrefs" not in skip: 591 target_domain = domain or os.environ.get("YOUR_DOMAIN", "example.com") 592 seo = fetch_ahrefs_seo(target_domain) 593 report["sections"]["seo"] = seo 594 595 # --- Gong Call Quality --- 596 if "gong" not in skip: 597 call_quality = fetch_gong_call_quality(start_date, end_date) 598 report["sections"]["call_quality"] = call_quality 599 600 # --- Executive Summary --- 601 report["executive_summary"] = generate_executive_summary(report) 602 603 return report 604 605 606 def generate_executive_summary(report: dict) -> dict: 607 """Auto-generate executive summary from report data.""" 608 highlights = [] 609 concerns = [] 610 recommendations = [] 611 612 sections = report.get("sections", {}) 613 614 # Traffic insights 615 traffic = sections.get("traffic", {}) 616 current = traffic.get("current", {}) 617 previous = traffic.get("previous", {}) 618 619 if current and previous: 620 sessions_change = ((current.get("sessions", 0) - previous.get("sessions", 0)) / previous.get("sessions", 1)) * 100 621 conv_change = ((current.get("conversions", 0) - previous.get("conversions", 0)) / max(previous.get("conversions", 1), 1)) * 100 622 623 if sessions_change > 10: 624 highlights.append(f"Traffic up {sessions_change:.1f}% ({current['sessions']:,} sessions)") 625 elif sessions_change < -10: 626 concerns.append(f"Traffic down {abs(sessions_change):.1f}% ({current['sessions']:,} sessions)") 627 628 if conv_change > 15: 629 highlights.append(f"Conversions up {conv_change:.1f}% ({current['conversions']} total)") 630 elif conv_change < -15: 631 concerns.append(f"Conversions down {abs(conv_change):.1f}%") 632 633 # Pipeline insights 634 pipeline = sections.get("pipeline", {}) 635 if pipeline: 636 if pipeline.get("win_rate", 0) >= 50: 637 highlights.append(f"Win rate at {pipeline['win_rate']}% ({pipeline['deals_closed_won']} won)") 638 elif pipeline.get("win_rate", 0) < 30: 639 concerns.append(f"Win rate below 30% ({pipeline['win_rate']}%)") 640 641 if pipeline.get("revenue_closed", 0) > 0: 642 highlights.append(f"${pipeline['revenue_closed']:,.0f} revenue closed") 643 644 if pipeline.get("pipeline_value", 0) > 0: 645 highlights.append(f"${pipeline['pipeline_value']:,.0f} in active pipeline") 646 647 # SEO insights 648 seo = sections.get("seo", {}) 649 if seo: 650 net_backlinks = seo.get("new_backlinks", 0) - seo.get("lost_backlinks", 0) 651 if net_backlinks > 100: 652 highlights.append(f"Net +{net_backlinks} backlinks this period") 653 elif net_backlinks < -50: 654 concerns.append(f"Net loss of {abs(net_backlinks)} backlinks") 655 656 top_kws = seo.get("top_keywords", []) 657 top3_count = sum(1 for kw in top_kws if kw.get("position", 99) <= 3) 658 if top3_count >= 3: 659 highlights.append(f"{top3_count} keywords in top 3 positions") 660 661 # Call quality insights 662 calls = sections.get("call_quality", {}) 663 if calls: 664 talk_ratio = calls.get("avg_talk_ratio", 0) 665 if talk_ratio > 65: 666 concerns.append(f"Reps talking too much ({talk_ratio}% talk ratio). Best practice: 40-60%.") 667 recommendations.append("Run talk-ratio coaching sessions. Reps at 40-60% have 2x win rate vs 70%+.") 668 if calls.get("next_steps_rate", 0) < 70: 669 concerns.append(f"Only {calls.get('next_steps_rate', 0)}% of calls end with clear next steps.") 670 recommendations.append("Implement mandatory next-steps template for all discovery calls.") 671 672 # Anomaly-based recommendations 673 anomalies = traffic.get("anomalies", []) 674 for a in anomalies: 675 if a["severity"] == "critical" and a["sentiment"] == "negative": 676 recommendations.append(f"Investigate {a['label']} drop ({a['pct_change']}%). Check for technical issues, algorithm updates, or campaign pauses.") 677 678 return { 679 "highlights": highlights, 680 "concerns": concerns, 681 "recommendations": recommendations, 682 "overall_health": "strong" if len(highlights) > len(concerns) else "needs_attention" if concerns else "stable", 683 } 684 685 686 # --------------------------------------------------------------------------- 687 # Output Formatters 688 # --------------------------------------------------------------------------- 689 690 def format_markdown(report: dict) -> str: 691 """Format report as client-ready markdown.""" 692 lines = [] 693 lines.append(f"# {report['client']} - Performance Report") 694 lines.append(f"**Period:** {report['period']['start']} to {report['period']['end']} ({report['period']['days']} days)") 695 lines.append(f"**Generated:** {report['generated_at'][:10]}") 696 lines.append("") 697 698 # Executive Summary 699 summary = report.get("executive_summary", {}) 700 lines.append("## Executive Summary") 701 lines.append("") 702 703 health = summary.get("overall_health", "stable") 704 health_emoji = {"strong": "š¢", "stable": "š”", "needs_attention": "š“"}.get(health, "āŖ") 705 lines.append(f"**Overall Health:** {health_emoji} {health.replace('_', ' ').title()}") 706 lines.append("") 707 708 if summary.get("highlights"): 709 lines.append("### ā Highlights") 710 for h in summary["highlights"]: 711 lines.append(f"- {h}") 712 lines.append("") 713 714 if summary.get("concerns"): 715 lines.append("### ā ļø Concerns") 716 for c in summary["concerns"]: 717 lines.append(f"- {c}") 718 lines.append("") 719 720 if summary.get("recommendations"): 721 lines.append("### š” Recommendations") 722 for r in summary["recommendations"]: 723 lines.append(f"- {r}") 724 lines.append("") 725 726 # Traffic 727 sections = report.get("sections", {}) 728 traffic = sections.get("traffic", {}) 729 if traffic: 730 current = traffic.get("current", {}) 731 previous = traffic.get("previous", {}) 732 733 lines.append("---") 734 lines.append("## š Traffic") 735 lines.append("") 736 lines.append("| Metric | Current | Previous | Change |") 737 lines.append("|--------|---------|----------|--------|") 738 739 for metric, label in [("sessions", "Sessions"), ("users", "Users"), ("pageviews", "Pageviews"), ("bounce_rate", "Bounce Rate"), ("conversions", "Conversions")]: 740 curr = current.get(metric, 0) 741 prev = previous.get(metric, 0) if previous else 0 742 if prev: 743 change = ((curr - prev) / prev) * 100 744 change_str = f"{'ā' if change > 0 else 'ā'} {abs(change):.1f}%" 745 else: 746 change_str = "N/A" 747 fmt = f"{curr:.1f}%" if metric == "bounce_rate" else f"{curr:,}" 748 prev_fmt = f"{prev:.1f}%" if metric == "bounce_rate" else f"{prev:,}" 749 lines.append(f"| {label} | {fmt} | {prev_fmt} | {change_str} |") 750 751 lines.append("") 752 753 if traffic.get("channels"): 754 lines.append("### Channel Breakdown") 755 lines.append("") 756 lines.append("| Channel | Sessions | Conversions |") 757 lines.append("|---------|----------|-------------|") 758 for ch in traffic["channels"]: 759 lines.append(f"| {ch['channel']} | {ch['sessions']:,} | {ch['conversions']} |") 760 lines.append("") 761 762 if traffic.get("top_pages"): 763 lines.append("### Top Pages") 764 lines.append("") 765 lines.append("| Page | Sessions | Conversions |") 766 lines.append("|------|----------|-------------|") 767 for p in traffic["top_pages"][:10]: 768 lines.append(f"| {p['page']} | {p['sessions']:,} | {p['conversions']} |") 769 lines.append("") 770 771 # Anomalies 772 if traffic.get("anomalies"): 773 lines.append("### šØ Anomalies Detected") 774 lines.append("") 775 for a in traffic["anomalies"]: 776 icon = "š“" if a["severity"] == "critical" else "š”" 777 lines.append(f"- {icon} {a['summary']}") 778 lines.append("") 779 780 # Pipeline 781 pipeline = sections.get("pipeline", {}) 782 if pipeline: 783 lines.append("---") 784 lines.append("## šÆ Pipeline") 785 lines.append("") 786 lines.append(f"| Metric | Value |") 787 lines.append(f"|--------|-------|") 788 lines.append(f"| Deals Created | {pipeline.get('deals_created', 0)} |") 789 lines.append(f"| Deals Won | {pipeline.get('deals_closed_won', 0)} |") 790 lines.append(f"| Deals Lost | {pipeline.get('deals_closed_lost', 0)} |") 791 lines.append(f"| Win Rate | {pipeline.get('win_rate', 0)}% |") 792 lines.append(f"| Revenue Closed | ${pipeline.get('revenue_closed', 0):,.0f} |") 793 lines.append(f"| Pipeline Value | ${pipeline.get('pipeline_value', 0):,.0f} |") 794 lines.append(f"| Avg Deal Size | ${pipeline.get('avg_deal_size', 0):,.0f} |") 795 lines.append(f"| Avg Sales Cycle | {pipeline.get('avg_sales_cycle_days', 0)} days |") 796 lines.append("") 797 798 if pipeline.get("top_deals"): 799 lines.append("### Top Deals") 800 lines.append("") 801 lines.append("| Deal | Amount | Stage |") 802 lines.append("|------|--------|-------|") 803 for d in pipeline["top_deals"]: 804 lines.append(f"| {d['name']} | ${d['amount']:,.0f} | {d['stage']} |") 805 lines.append("") 806 807 # SEO 808 seo = sections.get("seo", {}) 809 if seo: 810 lines.append("---") 811 lines.append("## š SEO") 812 lines.append("") 813 lines.append(f"| Metric | Value |") 814 lines.append(f"|--------|-------|") 815 lines.append(f"| Domain Rating | {seo.get('domain_rating', 0)} |") 816 lines.append(f"| Referring Domains | {seo.get('referring_domains', 0):,} |") 817 lines.append(f"| Total Backlinks | {seo.get('backlinks', 0):,} |") 818 lines.append(f"| Organic Keywords | {seo.get('organic_keywords', 0):,} |") 819 lines.append(f"| Organic Traffic | {seo.get('organic_traffic', 0):,} |") 820 lines.append(f"| New Backlinks (30d) | +{seo.get('new_backlinks', 0)} |") 821 lines.append(f"| Lost Backlinks (30d) | -{seo.get('lost_backlinks', 0)} |") 822 lines.append("") 823 824 if seo.get("top_keywords"): 825 lines.append("### Top Keywords") 826 lines.append("") 827 lines.append("| Keyword | Position | Volume | Traffic |") 828 lines.append("|---------|----------|--------|---------|") 829 for kw in seo["top_keywords"]: 830 lines.append(f"| {kw['keyword']} | {kw['position']} | {kw['volume']:,} | {kw['traffic']:,} |") 831 lines.append("") 832 833 # Call Quality 834 calls = sections.get("call_quality", {}) 835 if calls: 836 lines.append("---") 837 lines.append("## š Call Quality") 838 lines.append("") 839 lines.append(f"| Metric | Value |") 840 lines.append(f"|--------|-------|") 841 lines.append(f"| Total Calls | {calls.get('total_calls', 0)} |") 842 lines.append(f"| Avg Talk Ratio | {calls.get('avg_talk_ratio', 0)}% |") 843 lines.append(f"| Avg Call Duration | {calls.get('avg_call_duration_min', 0)} min |") 844 lines.append(f"| Longest Monologue | {calls.get('avg_longest_monologue_sec', 0)}s |") 845 lines.append(f"| Next Steps Rate | {calls.get('next_steps_rate', 0)}% |") 846 lines.append("") 847 848 if calls.get("top_topics"): 849 lines.append("### Top Discussion Topics") 850 lines.append("") 851 lines.append("| Topic | Frequency | % of Calls |") 852 lines.append("|-------|-----------|------------|") 853 for t in calls["top_topics"]: 854 lines.append(f"| {t['topic']} | {t['frequency']} | {t['pct']}% |") 855 lines.append("") 856 857 if calls.get("talk_ratio_vs_win_rate"): 858 lines.append("### Talk Ratio vs Win Rate") 859 lines.append("") 860 lines.append("| Talk Ratio | Calls | Win Rate |") 861 lines.append("|------------|-------|----------|") 862 for ratio, data in calls["talk_ratio_vs_win_rate"].items(): 863 lines.append(f"| {ratio} | {data['calls']} | {data['win_rate']}% |") 864 lines.append("") 865 866 lines.append("---") 867 lines.append(f"*Report generated automatically on {report['generated_at'][:10]}*") 868 869 return "\n".join(lines) 870 871 872 def print_report_console(report: dict) -> None: 873 """Print a condensed version to console.""" 874 summary = report.get("executive_summary", {}) 875 876 print(f"\n{'='*70}") 877 print(f" {report['client']} - Performance Report") 878 print(f" {report['period']['start']} to {report['period']['end']}") 879 print(f"{'='*70}") 880 881 health = summary.get("overall_health", "stable") 882 health_emoji = {"strong": "š¢", "stable": "š”", "needs_attention": "š“"}.get(health, "āŖ") 883 print(f"\n {health_emoji} Overall: {health.replace('_', ' ').title()}") 884 885 if summary.get("highlights"): 886 print(f"\n ā Highlights:") 887 for h in summary["highlights"]: 888 print(f" ⢠{h}") 889 890 if summary.get("concerns"): 891 print(f"\n ā ļø Concerns:") 892 for c in summary["concerns"]: 893 print(f" ⢠{c}") 894 895 if summary.get("recommendations"): 896 print(f"\n š” Recommendations:") 897 for r in summary["recommendations"]: 898 print(f" ⢠{r}") 899 900 # Key numbers 901 sections = report.get("sections", {}) 902 903 traffic = sections.get("traffic", {}).get("current", {}) 904 if traffic: 905 print(f"\n š Traffic: {traffic.get('sessions', 0):,} sessions | {traffic.get('conversions', 0)} conversions") 906 907 pipeline = sections.get("pipeline", {}) 908 if pipeline: 909 print(f" šÆ Pipeline: ${pipeline.get('revenue_closed', 0):,.0f} closed | ${pipeline.get('pipeline_value', 0):,.0f} active | {pipeline.get('win_rate', 0)}% win rate") 910 911 seo = sections.get("seo", {}) 912 if seo: 913 print(f" š SEO: DR {seo.get('domain_rating', 0)} | {seo.get('organic_keywords', 0):,} keywords | {seo.get('organic_traffic', 0):,} organic traffic") 914 915 calls = sections.get("call_quality", {}) 916 if calls: 917 print(f" š Calls: {calls.get('total_calls', 0)} calls | {calls.get('avg_talk_ratio', 0)}% talk ratio | {calls.get('next_steps_rate', 0)}% next steps") 918 919 # Anomalies 920 anomalies = sections.get("traffic", {}).get("anomalies", []) 921 if anomalies: 922 print(f"\n šØ Anomalies:") 923 for a in anomalies: 924 icon = "š“" if a["severity"] == "critical" else "š”" 925 print(f" {icon} {a['summary']}") 926 927 print() 928 929 930 # --------------------------------------------------------------------------- 931 # Main 932 # --------------------------------------------------------------------------- 933 934 def main(): 935 parser = argparse.ArgumentParser( 936 description="Generate unified client BI reports from GA4 + HubSpot + Ahrefs + Gong.", 937 formatter_class=argparse.RawDescriptionHelpFormatter, 938 epilog=""" 939 Examples: 940 %(prog)s --client "Acme Corp" 941 %(prog)s --client "Acme Corp" --format markdown --output report.md 942 %(prog)s --client "Acme Corp" --anomalies --compare previous-month 943 %(prog)s --client "Acme Corp" --skip gong,ahrefs --format json 944 """, 945 ) 946 947 parser.add_argument("--client", required=True, help="Client name for the report header") 948 parser.add_argument("--start", help="Start date YYYY-MM-DD (default: 30 days ago)") 949 parser.add_argument("--end", help="End date YYYY-MM-DD (default: today)") 950 parser.add_argument("--domain", help="Domain for Ahrefs data (default: YOUR_DOMAIN env var)") 951 952 parser.add_argument("--format", choices=["markdown", "json", "console"], default="console", 953 help="Output format (default: console)") 954 parser.add_argument("--output", "-o", help="Write output to file") 955 parser.add_argument("--skip", help="Comma-separated sources to skip (ga4,hubspot,ahrefs,gong)") 956 parser.add_argument("--anomalies", action="store_true", help="Enable anomaly detection") 957 parser.add_argument("--compare", choices=["previous-month", "previous-quarter", "yoy"], 958 help="Comparison period (requires anomaly detection)") 959 960 args = parser.parse_args() 961 962 # Dates 963 end_date = args.end or datetime.utcnow().strftime("%Y-%m-%d") 964 start_date = args.start or (datetime.utcnow() - timedelta(days=30)).strftime("%Y-%m-%d") 965 966 skip_sources = [s.strip() for s in args.skip.split(",")] if args.skip else [] 967 968 if args.compare: 969 args.anomalies = True 970 971 print(f"Building report for {args.client} ({start_date} to {end_date})...", file=sys.stderr) 972 973 # Build report 974 report = build_report( 975 client_name=args.client, 976 start_date=start_date, 977 end_date=end_date, 978 skip_sources=skip_sources, 979 enable_anomalies=args.anomalies, 980 compare=args.compare, 981 domain=args.domain or "", 982 ) 983 984 # Output 985 if args.format == "json": 986 output_text = json.dumps(report, indent=2, default=str) 987 print(output_text) 988 elif args.format == "markdown": 989 output_text = format_markdown(report) 990 if not args.output: 991 print(output_text) 992 else: 993 print_report_console(report) 994 output_text = None 995 996 if args.output: 997 out_path = Path(args.output) 998 out_path.parent.mkdir(parents=True, exist_ok=True) 999 1000 if args.format == "json": 1001 out_path.write_text(json.dumps(report, indent=2, default=str)) 1002 elif args.format == "markdown": 1003 out_path.write_text(format_markdown(report)) 1004 else: 1005 out_path.write_text(json.dumps(report, indent=2, default=str)) 1006 1007 print(f"\nā Report written to {args.output}", file=sys.stderr) 1008 1009 1010 if __name__ == "__main__": 1011 main()