Cradicle Explorer

/ services / sync_notion.py
sync_notion.py
  1  """Notion sync service — classify scored jobs and build Notion page data.
  2  
  3  Pure domain logic, no I/O, no Notion client reference. Produces NotionJobPage
  4  objects for the pipeline to pass to the integration layer.
  5  """
  6  
  7  __all__ = ["classify_jobs", "build_page", "build_page_properties", "build_page_blocks"]
  8  
  9  import logging
 10  from typing import Any
 11  
 12  import pandas as pd  # type: ignore[import-untyped]
 13  
 14  from config import NotionPropertyMapping, NotionStatusMapping
 15  from integrations.html_to_notion import html_to_notion_blocks
 16  from models.notion import NotionJobPage, SyncCategory
 17  from services.notion_blocks import RICH_TEXT_MAX_LENGTH, heading
 18  
 19  logger = logging.getLogger(__name__)
 20  
 21  _OUTREACH_OPPORTUNITY_THRESHOLD = 3
 22  # Exact-equality match — only jobs with opportunity score of exactly 3 qualify.
 23  # Using >= would widen the outreach net; keep == until that's explicitly desired.
 24  # Independent from _NETWORKING_MAX_OPPORTUNITY in models/job.py.
 25  
 26  _OUTREACH_EXCLUDED_FIT = {"good_fit", "perfect_match"}
 27  # Fit categories that disqualify a job from outreach (already strong fits).
 28  
 29  
 30  def _normalize_fit_category(value: object) -> str:
 31      """Normalize a fit category value from Excel to a lowercase identifier.
 32  
 33      Strips the ``FitCategory.`` prefix that pandas may preserve from the enum
 34      serialization, then lowercases and trims whitespace.
 35      """
 36      return str(value).replace("FitCategory.", "").strip().lower()
 37  
 38  
 39  def classify_jobs(
 40      df: pd.DataFrame,
 41  ) -> tuple[list[dict[str, Any]], list[dict[str, Any]], int]:
 42      """Classify jobs into outreach and manual categories.
 43  
 44      Args:
 45          df: DataFrame with scored job rows.
 46  
 47      Returns:
 48          Tuple of (outreach_rows, manual_rows, overlap_count) where each row
 49          is a dict from DataFrame.to_dict(orient="records").
 50      """
 51      # Normalize columns
 52      df = df.copy()
 53      df["OK_clean"] = df["OK"].fillna("").astype(str).str.strip().str.lower()
 54      df["fit_clean"] = df["Fit Category"].apply(_normalize_fit_category)
 55      df["net_opp"] = pd.to_numeric(df["Net Opportunity"], errors="coerce").fillna(0).astype(int)
 56  
 57      # Manual: user marked OK=x
 58      manual_mask = df["OK_clean"] == "x"
 59  
 60      # Outreach: networking_opportunity == 3 AND fit not in excluded set AND NOT manual
 61      outreach_mask = (
 62          (df["net_opp"] == _OUTREACH_OPPORTUNITY_THRESHOLD)
 63          & (~df["fit_clean"].isin(_OUTREACH_EXCLUDED_FIT))
 64          & (~manual_mask)
 65      )
 66  
 67      # Track overlap: jobs that qualify for both (classified as manual)
 68      overlap_mask = manual_mask & (
 69          (df["net_opp"] == _OUTREACH_OPPORTUNITY_THRESHOLD)
 70          & (~df["fit_clean"].isin(_OUTREACH_EXCLUDED_FIT))
 71      )
 72      overlap_count = int(overlap_mask.sum())
 73  
 74      manual_rows: list[dict[str, Any]] = df[manual_mask].to_dict(orient="records")
 75  
 76      # Outreach: deduplicate by Company, keep highest Score
 77      outreach_df = df[outreach_mask].copy()
 78      if not outreach_df.empty:
 79          outreach_df = outreach_df.sort_values("Score", ascending=False)
 80          outreach_df = outreach_df.drop_duplicates(subset="Company", keep="first")
 81      outreach_rows: list[dict[str, Any]] = outreach_df.to_dict(orient="records")
 82  
 83      return outreach_rows, manual_rows, overlap_count
 84  
 85  
 86  def build_page(
 87      row: dict[str, Any],
 88      category: SyncCategory,
 89      status_mapping: NotionStatusMapping,
 90  ) -> NotionJobPage:
 91      """Map an Excel row dict to a NotionJobPage.
 92  
 93      Args:
 94          row: Dict from DataFrame.to_dict(orient="records").
 95          category: Sync category for this page.
 96          status_mapping: Status text and icon per category.
 97  
 98      Returns:
 99          NotionJobPage value object.
100      """
101      status, icon = _get_status_and_icon(category, status_mapping)
102      # Assumption: if LinkedIn did not provide a job_posting_id, the Notion page's Job ID
103      # property will be written as "". get_uncoached_pages skips pages with an empty Job ID,
104      # so such pages are invisible to the coach pipeline and cannot be coached via --sync-notion.
105      return NotionJobPage(
106          job_posting_id=str(row.get("job_posting_id", "")),
107          job_title=str(row.get("Title", "")),
108          company_name=str(row.get("Company", "")),
109          job_location=str(row.get("Job Location", "")),
110          url=str(row.get("URL", "")),
111          company_url=str(row.get("Company LinkedIn", "")),
112          score=int(row.get("Score", 0)),
113          fit_category=_normalize_fit_category(row.get("Fit Category", "")),
114          networking_opportunity=int(row.get("Net Opportunity", 0) or 0),
115          networking_rationale=str(row.get("Net Rationale", "")),
116          reasoning=str(row.get("AI Reasoning", "")),
117          job_description_html=str(row.get("job_description_html", "")),
118          category=category,
119          status=status,
120          icon=icon,
121      )
122  
123  
124  def _get_status_and_icon(
125      category: SyncCategory,
126      status_mapping: NotionStatusMapping,
127  ) -> tuple[str, str]:
128      """Return the Notion status and icon emoji for a sync category."""
129      if category == SyncCategory.OUTREACH:
130          return status_mapping.outreach_status, status_mapping.outreach_icon
131      return status_mapping.manual_status, status_mapping.manual_icon
132  
133  
134  # ── Property and block builders for Notion page creation ─────────────────────
135  
136  _DESCRIPTION_TOGGLE_LABEL = "\U0001f4cb Job Description"
137  # Toggle heading wrapping the job description body — closed by default in Notion.
138  
139  
140  def build_page_properties(
141      page: NotionJobPage,
142      pm: NotionPropertyMapping,
143  ) -> dict[str, Any]:
144      """Build the Notion properties payload for a job page.
145  
146      Args:
147          page: Job page data.
148          pm: Property name mapping.
149  
150      Returns:
151          Dict of Notion property definitions ready for create_page().
152      """
153      title_text = f"{page.job_title} @ {page.company_name}"
154  
155      props: dict[str, Any] = {
156          pm.title: {"title": [{"text": {"content": title_text}}]},
157          pm.position: {"rich_text": [{"text": {"content": page.job_title}}]},
158          pm.status: {"select": {"name": page.status}},
159          pm.job_id: {"rich_text": [{"text": {"content": page.job_posting_id}}]},
160          pm.ai_reasoning: {
161              "rich_text": [
162                  {"text": {"content": page.reasoning[:RICH_TEXT_MAX_LENGTH]}}
163              ]
164          },
165          pm.company: {"rich_text": [{"text": {"content": page.company_name}}]},
166          pm.location: {"rich_text": [{"text": {"content": page.job_location}}]},
167          pm.fit_category: (
168              {"select": {"name": page.fit_category}} if page.fit_category else {}
169          ),
170          pm.networking_signal: {"number": page.networking_opportunity},
171          pm.networking_rationale: {
172              "rich_text": [{"text": {"content": page.networking_rationale}}]
173          },
174          pm.score: {"number": page.score},
175          pm.coached: {"checkbox": False},
176      }
177  
178      if page.url:
179          props[pm.job_url] = {"url": page.url}
180      if page.company_url:
181          props[pm.company_linkedin] = {"url": page.company_url}
182  
183      # Remove empty property dicts (e.g. fit_category when blank).
184      return {k: v for k, v in props.items() if v}
185  
186  
187  def build_page_blocks(page: NotionJobPage) -> list[dict[str, Any]]:
188      """Build initial content blocks from a job page's HTML description.
189  
190      Wraps the HTML blocks in a toggleable heading_1 with the "Job Description"
191      label. Returns an empty list if the description is empty.
192  
193      Args:
194          page: Job page data.
195  
196      Returns:
197          List of Notion block dicts ready for create_page(blocks=...).
198      """
199      if not page.job_description_html:
200          return []
201  
202      html_blocks = html_to_notion_blocks(page.job_description_html)
203      if not html_blocks:
204          return []
205      return [
206          heading(
207              1,
208              _DESCRIPTION_TOGGLE_LABEL,
209              is_toggleable=True,
210              children=html_blocks,
211          )
212      ]