pipeline_widgets.py
1 """ 2 Pipeline Progress Widgets 3 4 Production-ready visualizations for pipeline processing progress. 5 All widgets handle the 'ignore' status separately from the main flow. 6 """ 7 8 import plotly.graph_objects as go 9 import plotly.express as px 10 import pandas as pd 11 from typing import Dict, Optional, Tuple 12 import streamlit as st 13 14 15 # Main pipeline stages (in order) 16 MAIN_STAGES = [ 17 'found', 18 'assets_captured', 19 'scored', 20 'rescored', 21 'enriched', 22 'proposals_drafted', 23 'outreach_sent' 24 ] 25 26 # Stage display names 27 STAGE_NAMES = { 28 'found': 'Found', 29 'assets_captured': 'Assets Captured', 30 'scored': 'Scored', 31 'rescored': 'Rescored', 32 'enriched': 'Enriched', 33 'proposals_drafted': 'Proposals Drafted', 34 'outreach_sent': 'Outreach Sent', 35 'ignore': 'Ignored' 36 } 37 38 # Color scheme 39 STAGE_COLORS = { 40 'found': '#3498DB', # Blue 41 'assets_captured': '#9B59B6', # Purple 42 'scored': '#E67E22', # Orange 43 'rescored': '#E74C3C', # Red 44 'enriched': '#1ABC9C', # Turquoise 45 'proposals_drafted': '#2ECC71', # Green 46 'outreach_sent': '#27AE60', # Dark Green 47 'ignore': '#95A5A6' # Gray 48 } 49 50 51 def prepare_data(data: pd.DataFrame) -> Dict: 52 """ 53 Prepare data for visualization. 54 55 Args: 56 data: DataFrame with 'status' and 'count' columns 57 58 Returns: 59 Dict with main_flow and ignored counts 60 """ 61 # Convert to dict for easy lookup 62 status_counts = dict(zip(data['status'], data['count'])) 63 64 # Separate main flow from ignored 65 main_flow = {stage: status_counts.get(stage, 0) for stage in MAIN_STAGES} 66 ignored = status_counts.get('ignore', 0) 67 68 return { 69 'main_flow': main_flow, 70 'ignored': ignored, 71 'total': sum(main_flow.values()) + ignored 72 } 73 74 75 def create_sankey_flow(data: pd.DataFrame) -> go.Figure: 76 """ 77 Sankey Flow Diagram with corrected stage ordering. 78 79 Shows progression through pipeline stages with 'ignore' branching off early. 80 Stages are ordered correctly from left to right. 81 """ 82 prepared = prepare_data(data) 83 main_flow = prepared['main_flow'] 84 ignored = prepared['ignored'] 85 86 # Build nodes in correct order 87 labels = [STAGE_NAMES[stage] for stage in MAIN_STAGES] + ['Ignored'] 88 colors = [STAGE_COLORS[stage] for stage in MAIN_STAGES] + [STAGE_COLORS['ignore']] 89 90 # Build links - each link goes from current stage to next stage 91 # The value is the count at the TARGET stage (how many made it through) 92 sources = [] 93 targets = [] 94 values = [] 95 link_colors = [] 96 97 # Main flow: each stage flows to the next 98 # Value = number of sites that reached the target stage 99 for i in range(len(MAIN_STAGES) - 1): 100 current_stage = MAIN_STAGES[i] 101 next_stage = MAIN_STAGES[i + 1] 102 103 # Flow from current to next = count at next stage 104 flow_value = main_flow[next_stage] 105 106 if flow_value > 0: 107 sources.append(i) 108 targets.append(i + 1) 109 values.append(flow_value) 110 link_colors.append('rgba(52, 152, 219, 0.3)') # Blue with transparency 111 112 # Ignored branch (sites filtered out early, typically at 'found' stage) 113 if ignored > 0: 114 sources.append(0) # From 'found' 115 targets.append(len(MAIN_STAGES)) # To 'ignored' 116 values.append(ignored) 117 link_colors.append('rgba(149, 165, 166, 0.3)') # Gray with transparency 118 119 # Create node values for hover (actual count at each stage) 120 node_values = [main_flow[stage] for stage in MAIN_STAGES] + [ignored] 121 122 fig = go.Figure(data=[go.Sankey( 123 node=dict( 124 pad=20, 125 thickness=25, 126 line=dict(color="rgba(0,0,0,0)", width=0), # Transparent border 127 label=labels, 128 color=colors, 129 customdata=node_values, 130 hovertemplate='%{label}<br>Count: %{customdata:,}<extra></extra>' 131 ), 132 link=dict( 133 source=sources, 134 target=targets, 135 value=values, 136 color=link_colors, 137 hovertemplate='Flow: %{value:,} sites<extra></extra>' 138 ), 139 textfont=dict( 140 color='black', 141 size=12, 142 family='Arial, sans-serif' 143 ) 144 )]) 145 146 fig.update_layout( 147 title="Pipeline Flow - Sankey Diagram", 148 height=500, 149 font=dict(size=12, color='black', family='Arial, sans-serif'), 150 margin=dict(l=20, r=20, t=50, b=20), 151 paper_bgcolor='white', 152 plot_bgcolor='white' 153 ) 154 155 return fig 156 157 158 def create_pie_chart(data: pd.DataFrame, show_legend: bool = True) -> go.Figure: 159 """ 160 Pie Chart with stages ordered clockwise in pipeline sequence. 161 162 Args: 163 data: DataFrame with 'status' and 'count' columns 164 show_legend: Whether to display the legend (default: True) 165 166 Returns: 167 Plotly Figure with pie chart 168 """ 169 prepared = prepare_data(data) 170 main_flow = prepared['main_flow'] 171 ignored = prepared['ignored'] 172 173 # Build ordered lists for pie chart 174 # Stages go clockwise in order, then ignore at the end 175 labels = [] 176 values = [] 177 colors = [] 178 179 # Add main stages in order 180 for stage in MAIN_STAGES: 181 if main_flow[stage] > 0: # Only include stages with data 182 labels.append(STAGE_NAMES[stage]) 183 values.append(main_flow[stage]) 184 colors.append(STAGE_COLORS[stage]) 185 186 # Add ignored if present 187 if ignored > 0: 188 labels.append('Ignored (Filtered)') 189 values.append(ignored) 190 colors.append(STAGE_COLORS['ignore']) 191 192 fig = go.Figure(data=[go.Pie( 193 labels=labels, 194 values=values, 195 marker=dict(colors=colors), 196 textinfo='label+percent', 197 textposition='auto', 198 hovertemplate='%{label}<br>Count: %{value:,}<br>Percent: %{percent}<extra></extra>', 199 direction='clockwise', # Clockwise progression 200 sort=False, # Don't sort - keep our order 201 rotation=180 # Rotate 180 degrees to position smaller slices at bottom/left 202 )]) 203 204 layout_config = { 205 "title": "Pipeline Distribution - Stage Breakdown", 206 "height": 500, 207 "margin": dict(l=80, r=80, t=50, b=80), # Increased margins for labels 208 "showlegend": show_legend 209 } 210 211 if show_legend: 212 layout_config["legend"] = dict( 213 orientation="v", 214 yanchor="middle", 215 y=0.5, 216 xanchor="left", 217 x=1.05 218 ) 219 220 fig.update_layout(**layout_config) 221 222 return fig 223 224 225 def create_funnel_tree(breakdown: dict) -> go.Figure: 226 """ 227 Treemap showing the full site funnel with sub-breakdowns. 228 229 Top level: Ignored / Failing / Active 230 Second level: sub-categories within each group 231 232 Args: 233 breakdown: dict returned by database.get_funnel_breakdown() 234 235 Returns: 236 Plotly Figure (treemap) 237 """ 238 labels = [] 239 parents = [] 240 values = [] 241 colors_list = [] 242 243 root = f"All Sites\n{breakdown['total']:,}" 244 labels.append(root) 245 parents.append("") 246 values.append(breakdown['total']) 247 colors_list.append("#2C3E50") 248 249 # --- Ignored --- 250 ignored_pct = round(100 * breakdown['ignored'] / breakdown['total'], 1) if breakdown['total'] else 0 251 ignored_label = f"Ignored\n{breakdown['ignored']:,} ({ignored_pct}%)" 252 labels.append(ignored_label) 253 parents.append(root) 254 values.append(breakdown['ignored']) 255 colors_list.append("#95A5A6") 256 257 for row in breakdown['ignored_breakdown']: 258 cat_pct = round(100 * row['count'] / breakdown['ignored'], 1) if breakdown['ignored'] else 0 259 labels.append(f"{row['category']}\n{row['count']:,} ({cat_pct}%)") 260 parents.append(ignored_label) 261 values.append(row['count']) 262 colors_list.append("#BDC3C7") 263 264 # --- Failing --- 265 failing_pct = round(100 * breakdown['failing'] / breakdown['total'], 1) if breakdown['total'] else 0 266 failing_label = f"Failing\n{breakdown['failing']:,} ({failing_pct}%)" 267 labels.append(failing_label) 268 parents.append(root) 269 values.append(breakdown['failing']) 270 colors_list.append("#E74C3C") 271 272 for row in breakdown['failing_breakdown']: 273 cat_pct = round(100 * row['count'] / breakdown['failing'], 1) if breakdown['failing'] else 0 274 labels.append(f"{row['category']}\n{row['count']:,} ({cat_pct}%)") 275 parents.append(failing_label) 276 values.append(row['count']) 277 colors_list.append("#F1948A") 278 279 # --- Active --- 280 active_pct = round(100 * breakdown['active'] / breakdown['total'], 1) if breakdown['total'] else 0 281 active_label = f"Active Pipeline\n{breakdown['active']:,} ({active_pct}%)" 282 labels.append(active_label) 283 parents.append(root) 284 values.append(breakdown['active']) 285 colors_list.append("#27AE60") 286 287 stage_display = { 288 'found': 'Awaiting Assets', 289 'assets_captured': 'Awaiting Scoring', 290 'scored': 'Scored', 291 'rescored': 'Awaiting Enrichment', 292 'enriched': 'Enriched', 293 'proposals_drafted': 'Awaiting Outreach', 294 'outreach_sent': 'Outreach Sent', 295 } 296 stage_colors_active = { 297 'found': '#3498DB', 298 'assets_captured': '#9B59B6', 299 'scored': '#E67E22', 300 'rescored': '#E67E22', 301 'enriched': '#1ABC9C', 302 'proposals_drafted': '#2ECC71', 303 'outreach_sent': '#27AE60', 304 } 305 for row in breakdown['active_breakdown']: 306 stage_name = stage_display.get(row['status'], row['status']) 307 stage_pct = round(100 * row['count'] / breakdown['active'], 1) if breakdown['active'] else 0 308 labels.append(f"{stage_name}\n{row['count']:,} ({stage_pct}%)") 309 parents.append(active_label) 310 values.append(row['count']) 311 colors_list.append(stage_colors_active.get(row['status'], '#2ECC71')) 312 313 fig = go.Figure(go.Treemap( 314 labels=labels, 315 parents=parents, 316 values=values, 317 marker=dict(colors=colors_list, line=dict(width=2, color='white')), 318 textfont=dict(size=12), 319 hovertemplate='<b>%{label}</b><extra></extra>', 320 branchvalues='total', 321 maxdepth=2, 322 )) 323 324 fig.update_layout( 325 title="Site Funnel Breakdown", 326 height=500, 327 margin=dict(l=10, r=10, t=50, b=10), 328 ) 329 330 return fig 331 332 333 def create_funnel_with_sidebar(data: pd.DataFrame) -> Tuple[go.Figure, Dict]: 334 """ 335 Enhanced Funnel with Sidebar metrics. 336 337 Returns: 338 tuple: (Figure, Dict with sidebar metrics) 339 """ 340 prepared = prepare_data(data) 341 main_flow = prepared['main_flow'] 342 ignored = prepared['ignored'] 343 total = prepared['total'] 344 345 # Create funnel with only main flow stages (in order) 346 stages = [] 347 counts = [] 348 colors = [] 349 350 for stage in MAIN_STAGES: 351 if main_flow[stage] > 0: # Only show stages with data 352 stages.append(STAGE_NAMES[stage]) 353 counts.append(main_flow[stage]) 354 colors.append(STAGE_COLORS[stage]) 355 356 fig = go.Figure(go.Funnel( 357 y=stages, 358 x=counts, 359 textinfo="value+percent initial", 360 marker=dict(color=colors), 361 connector=dict(line=dict(color="royalblue", width=3)) 362 )) 363 364 fig.update_layout( 365 title="Main Pipeline Flow", 366 height=500, 367 margin=dict(l=20, r=20, t=50, b=20) 368 ) 369 370 # Calculate sidebar metrics 371 active_total = sum(main_flow.values()) 372 sidebar_metrics = { 373 'ignored': ignored, 374 'ignored_percent': round(100 * ignored / total, 1) if total > 0 else 0, 375 'active': active_total, 376 'active_percent': round(100 * active_total / total, 1) if total > 0 else 0, 377 'total': total, 378 'conversion_rate': round(100 * main_flow['outreach_sent'] / main_flow['found'], 1) if main_flow['found'] > 0 else 0 379 } 380 381 return fig, sidebar_metrics