/ dashboard / components / pipeline_widgets.py
pipeline_widgets.py
  1  """
  2  Pipeline Progress Widgets
  3  
  4  Production-ready visualizations for pipeline processing progress.
  5  All widgets handle the 'ignore' status separately from the main flow.
  6  """
  7  
  8  import plotly.graph_objects as go
  9  import plotly.express as px
 10  import pandas as pd
 11  from typing import Dict, Optional, Tuple
 12  import streamlit as st
 13  
 14  
 15  # Main pipeline stages (in order)
 16  MAIN_STAGES = [
 17      'found',
 18      'assets_captured',
 19      'scored',
 20      'rescored',
 21      'enriched',
 22      'proposals_drafted',
 23      'outreach_sent'
 24  ]
 25  
 26  # Stage display names
 27  STAGE_NAMES = {
 28      'found': 'Found',
 29      'assets_captured': 'Assets Captured',
 30      'scored': 'Scored',
 31      'rescored': 'Rescored',
 32      'enriched': 'Enriched',
 33      'proposals_drafted': 'Proposals Drafted',
 34      'outreach_sent': 'Outreach Sent',
 35      'ignore': 'Ignored'
 36  }
 37  
 38  # Color scheme
 39  STAGE_COLORS = {
 40      'found': '#3498DB',  # Blue
 41      'assets_captured': '#9B59B6',  # Purple
 42      'scored': '#E67E22',  # Orange
 43      'rescored': '#E74C3C',  # Red
 44      'enriched': '#1ABC9C',  # Turquoise
 45      'proposals_drafted': '#2ECC71',  # Green
 46      'outreach_sent': '#27AE60',  # Dark Green
 47      'ignore': '#95A5A6'  # Gray
 48  }
 49  
 50  
 51  def prepare_data(data: pd.DataFrame) -> Dict:
 52      """
 53      Prepare data for visualization.
 54  
 55      Args:
 56          data: DataFrame with 'status' and 'count' columns
 57  
 58      Returns:
 59          Dict with main_flow and ignored counts
 60      """
 61      # Convert to dict for easy lookup
 62      status_counts = dict(zip(data['status'], data['count']))
 63  
 64      # Separate main flow from ignored
 65      main_flow = {stage: status_counts.get(stage, 0) for stage in MAIN_STAGES}
 66      ignored = status_counts.get('ignore', 0)
 67  
 68      return {
 69          'main_flow': main_flow,
 70          'ignored': ignored,
 71          'total': sum(main_flow.values()) + ignored
 72      }
 73  
 74  
 75  def create_sankey_flow(data: pd.DataFrame) -> go.Figure:
 76      """
 77      Sankey Flow Diagram with corrected stage ordering.
 78  
 79      Shows progression through pipeline stages with 'ignore' branching off early.
 80      Stages are ordered correctly from left to right.
 81      """
 82      prepared = prepare_data(data)
 83      main_flow = prepared['main_flow']
 84      ignored = prepared['ignored']
 85  
 86      # Build nodes in correct order
 87      labels = [STAGE_NAMES[stage] for stage in MAIN_STAGES] + ['Ignored']
 88      colors = [STAGE_COLORS[stage] for stage in MAIN_STAGES] + [STAGE_COLORS['ignore']]
 89  
 90      # Build links - each link goes from current stage to next stage
 91      # The value is the count at the TARGET stage (how many made it through)
 92      sources = []
 93      targets = []
 94      values = []
 95      link_colors = []
 96  
 97      # Main flow: each stage flows to the next
 98      # Value = number of sites that reached the target stage
 99      for i in range(len(MAIN_STAGES) - 1):
100          current_stage = MAIN_STAGES[i]
101          next_stage = MAIN_STAGES[i + 1]
102  
103          # Flow from current to next = count at next stage
104          flow_value = main_flow[next_stage]
105  
106          if flow_value > 0:
107              sources.append(i)
108              targets.append(i + 1)
109              values.append(flow_value)
110              link_colors.append('rgba(52, 152, 219, 0.3)')  # Blue with transparency
111  
112      # Ignored branch (sites filtered out early, typically at 'found' stage)
113      if ignored > 0:
114          sources.append(0)  # From 'found'
115          targets.append(len(MAIN_STAGES))  # To 'ignored'
116          values.append(ignored)
117          link_colors.append('rgba(149, 165, 166, 0.3)')  # Gray with transparency
118  
119      # Create node values for hover (actual count at each stage)
120      node_values = [main_flow[stage] for stage in MAIN_STAGES] + [ignored]
121  
122      fig = go.Figure(data=[go.Sankey(
123          node=dict(
124              pad=20,
125              thickness=25,
126              line=dict(color="rgba(0,0,0,0)", width=0),  # Transparent border
127              label=labels,
128              color=colors,
129              customdata=node_values,
130              hovertemplate='%{label}<br>Count: %{customdata:,}<extra></extra>'
131          ),
132          link=dict(
133              source=sources,
134              target=targets,
135              value=values,
136              color=link_colors,
137              hovertemplate='Flow: %{value:,} sites<extra></extra>'
138          ),
139          textfont=dict(
140              color='black',
141              size=12,
142              family='Arial, sans-serif'
143          )
144      )])
145  
146      fig.update_layout(
147          title="Pipeline Flow - Sankey Diagram",
148          height=500,
149          font=dict(size=12, color='black', family='Arial, sans-serif'),
150          margin=dict(l=20, r=20, t=50, b=20),
151          paper_bgcolor='white',
152          plot_bgcolor='white'
153      )
154  
155      return fig
156  
157  
158  def create_pie_chart(data: pd.DataFrame, show_legend: bool = True) -> go.Figure:
159      """
160      Pie Chart with stages ordered clockwise in pipeline sequence.
161  
162      Args:
163          data: DataFrame with 'status' and 'count' columns
164          show_legend: Whether to display the legend (default: True)
165  
166      Returns:
167          Plotly Figure with pie chart
168      """
169      prepared = prepare_data(data)
170      main_flow = prepared['main_flow']
171      ignored = prepared['ignored']
172  
173      # Build ordered lists for pie chart
174      # Stages go clockwise in order, then ignore at the end
175      labels = []
176      values = []
177      colors = []
178  
179      # Add main stages in order
180      for stage in MAIN_STAGES:
181          if main_flow[stage] > 0:  # Only include stages with data
182              labels.append(STAGE_NAMES[stage])
183              values.append(main_flow[stage])
184              colors.append(STAGE_COLORS[stage])
185  
186      # Add ignored if present
187      if ignored > 0:
188          labels.append('Ignored (Filtered)')
189          values.append(ignored)
190          colors.append(STAGE_COLORS['ignore'])
191  
192      fig = go.Figure(data=[go.Pie(
193          labels=labels,
194          values=values,
195          marker=dict(colors=colors),
196          textinfo='label+percent',
197          textposition='auto',
198          hovertemplate='%{label}<br>Count: %{value:,}<br>Percent: %{percent}<extra></extra>',
199          direction='clockwise',  # Clockwise progression
200          sort=False,  # Don't sort - keep our order
201          rotation=180  # Rotate 180 degrees to position smaller slices at bottom/left
202      )])
203  
204      layout_config = {
205          "title": "Pipeline Distribution - Stage Breakdown",
206          "height": 500,
207          "margin": dict(l=80, r=80, t=50, b=80),  # Increased margins for labels
208          "showlegend": show_legend
209      }
210  
211      if show_legend:
212          layout_config["legend"] = dict(
213              orientation="v",
214              yanchor="middle",
215              y=0.5,
216              xanchor="left",
217              x=1.05
218          )
219  
220      fig.update_layout(**layout_config)
221  
222      return fig
223  
224  
225  def create_funnel_tree(breakdown: dict) -> go.Figure:
226      """
227      Treemap showing the full site funnel with sub-breakdowns.
228  
229      Top level: Ignored / Failing / Active
230      Second level: sub-categories within each group
231  
232      Args:
233          breakdown: dict returned by database.get_funnel_breakdown()
234  
235      Returns:
236          Plotly Figure (treemap)
237      """
238      labels = []
239      parents = []
240      values = []
241      colors_list = []
242  
243      root = f"All Sites\n{breakdown['total']:,}"
244      labels.append(root)
245      parents.append("")
246      values.append(breakdown['total'])
247      colors_list.append("#2C3E50")
248  
249      # --- Ignored ---
250      ignored_pct = round(100 * breakdown['ignored'] / breakdown['total'], 1) if breakdown['total'] else 0
251      ignored_label = f"Ignored\n{breakdown['ignored']:,} ({ignored_pct}%)"
252      labels.append(ignored_label)
253      parents.append(root)
254      values.append(breakdown['ignored'])
255      colors_list.append("#95A5A6")
256  
257      for row in breakdown['ignored_breakdown']:
258          cat_pct = round(100 * row['count'] / breakdown['ignored'], 1) if breakdown['ignored'] else 0
259          labels.append(f"{row['category']}\n{row['count']:,} ({cat_pct}%)")
260          parents.append(ignored_label)
261          values.append(row['count'])
262          colors_list.append("#BDC3C7")
263  
264      # --- Failing ---
265      failing_pct = round(100 * breakdown['failing'] / breakdown['total'], 1) if breakdown['total'] else 0
266      failing_label = f"Failing\n{breakdown['failing']:,} ({failing_pct}%)"
267      labels.append(failing_label)
268      parents.append(root)
269      values.append(breakdown['failing'])
270      colors_list.append("#E74C3C")
271  
272      for row in breakdown['failing_breakdown']:
273          cat_pct = round(100 * row['count'] / breakdown['failing'], 1) if breakdown['failing'] else 0
274          labels.append(f"{row['category']}\n{row['count']:,} ({cat_pct}%)")
275          parents.append(failing_label)
276          values.append(row['count'])
277          colors_list.append("#F1948A")
278  
279      # --- Active ---
280      active_pct = round(100 * breakdown['active'] / breakdown['total'], 1) if breakdown['total'] else 0
281      active_label = f"Active Pipeline\n{breakdown['active']:,} ({active_pct}%)"
282      labels.append(active_label)
283      parents.append(root)
284      values.append(breakdown['active'])
285      colors_list.append("#27AE60")
286  
287      stage_display = {
288          'found': 'Awaiting Assets',
289          'assets_captured': 'Awaiting Scoring',
290          'scored': 'Scored',
291          'rescored': 'Awaiting Enrichment',
292          'enriched': 'Enriched',
293          'proposals_drafted': 'Awaiting Outreach',
294          'outreach_sent': 'Outreach Sent',
295      }
296      stage_colors_active = {
297          'found': '#3498DB',
298          'assets_captured': '#9B59B6',
299          'scored': '#E67E22',
300          'rescored': '#E67E22',
301          'enriched': '#1ABC9C',
302          'proposals_drafted': '#2ECC71',
303          'outreach_sent': '#27AE60',
304      }
305      for row in breakdown['active_breakdown']:
306          stage_name = stage_display.get(row['status'], row['status'])
307          stage_pct = round(100 * row['count'] / breakdown['active'], 1) if breakdown['active'] else 0
308          labels.append(f"{stage_name}\n{row['count']:,} ({stage_pct}%)")
309          parents.append(active_label)
310          values.append(row['count'])
311          colors_list.append(stage_colors_active.get(row['status'], '#2ECC71'))
312  
313      fig = go.Figure(go.Treemap(
314          labels=labels,
315          parents=parents,
316          values=values,
317          marker=dict(colors=colors_list, line=dict(width=2, color='white')),
318          textfont=dict(size=12),
319          hovertemplate='<b>%{label}</b><extra></extra>',
320          branchvalues='total',
321          maxdepth=2,
322      ))
323  
324      fig.update_layout(
325          title="Site Funnel Breakdown",
326          height=500,
327          margin=dict(l=10, r=10, t=50, b=10),
328      )
329  
330      return fig
331  
332  
333  def create_funnel_with_sidebar(data: pd.DataFrame) -> Tuple[go.Figure, Dict]:
334      """
335      Enhanced Funnel with Sidebar metrics.
336  
337      Returns:
338          tuple: (Figure, Dict with sidebar metrics)
339      """
340      prepared = prepare_data(data)
341      main_flow = prepared['main_flow']
342      ignored = prepared['ignored']
343      total = prepared['total']
344  
345      # Create funnel with only main flow stages (in order)
346      stages = []
347      counts = []
348      colors = []
349  
350      for stage in MAIN_STAGES:
351          if main_flow[stage] > 0:  # Only show stages with data
352              stages.append(STAGE_NAMES[stage])
353              counts.append(main_flow[stage])
354              colors.append(STAGE_COLORS[stage])
355  
356      fig = go.Figure(go.Funnel(
357          y=stages,
358          x=counts,
359          textinfo="value+percent initial",
360          marker=dict(color=colors),
361          connector=dict(line=dict(color="royalblue", width=3))
362      ))
363  
364      fig.update_layout(
365          title="Main Pipeline Flow",
366          height=500,
367          margin=dict(l=20, r=20, t=50, b=20)
368      )
369  
370      # Calculate sidebar metrics
371      active_total = sum(main_flow.values())
372      sidebar_metrics = {
373          'ignored': ignored,
374          'ignored_percent': round(100 * ignored / total, 1) if total > 0 else 0,
375          'active': active_total,
376          'active_percent': round(100 * active_total / total, 1) if total > 0 else 0,
377          'total': total,
378          'conversion_rate': round(100 * main_flow['outreach_sent'] / main_flow['found'], 1) if main_flow['found'] > 0 else 0
379      }
380  
381      return fig, sidebar_metrics