/ scripts / daily_digest.py
daily_digest.py
  1  #!/usr/bin/env python3
  2  """
  3  Daily Digest Generator - Summarize graph activity
  4  
  5  Creates a daily report of:
  6  - New pages created
  7  - Links added
  8  - Orphan status changes
  9  - Gravity shifts
 10  
 11  Usage:
 12      python scripts/daily_digest.py [--output FILE]
 13  """
 14  
 15  import os
 16  import re
 17  import sys
 18  import json
 19  from pathlib import Path
 20  from datetime import datetime, timedelta
 21  from collections import defaultdict
 22  import argparse
 23  
 24  SCAN_DIRS = ['docs', 'patterns', 'sessions', 'dashboards']
 25  SKIP_DIRS = {'.git', 'node_modules', '__pycache__', 'templates'}
 26  STATE_FILE = '.graph_state.json'
 27  
 28  
 29  def normalize_link(link: str) -> str:
 30      """Normalize wiki-link."""
 31      return link.split('/')[-1].lower().replace(' ', '-').replace('.md', '')
 32  
 33  
 34  def extract_wiki_links(content: str) -> set:
 35      """Extract all [[wiki-links]] from content."""
 36      pattern = r'\[\[([^\]|]+)(?:\|[^\]]+)?\]\]'
 37      return {normalize_link(m) for m in re.findall(pattern, content)}
 38  
 39  
 40  def extract_title(content: str) -> str:
 41      """Extract title."""
 42      match = re.search(r'^# (.+)$', content, re.MULTILINE)
 43      return match.group(1) if match else "Untitled"
 44  
 45  
 46  def scan_current_state(base_path: Path) -> dict:
 47      """Scan current graph state."""
 48      pages = {}
 49      for scan_dir in SCAN_DIRS:
 50          dir_path = base_path / scan_dir
 51          if not dir_path.exists():
 52              continue
 53          for filepath in dir_path.rglob('*.md'):
 54              if any(skip in filepath.parts for skip in SKIP_DIRS):
 55                  continue
 56              try:
 57                  stat = filepath.stat()
 58                  content = filepath.read_text(encoding='utf-8')
 59              except Exception:
 60                  continue
 61              name = normalize_link(filepath.stem)
 62              pages[name] = {
 63                  'path': str(filepath),
 64                  'title': extract_title(content),
 65                  'outbound': list(extract_wiki_links(content)),
 66                  'mtime': stat.st_mtime,
 67              }
 68      return pages
 69  
 70  
 71  def load_previous_state(base_path: Path) -> dict:
 72      """Load previous state if exists."""
 73      state_path = base_path / STATE_FILE
 74      if state_path.exists():
 75          try:
 76              return json.loads(state_path.read_text())
 77          except Exception:
 78              pass
 79      return {}
 80  
 81  
 82  def save_current_state(base_path: Path, state: dict):
 83      """Save current state."""
 84      state_path = base_path / STATE_FILE
 85      state_path.write_text(json.dumps(state, indent=2))
 86  
 87  
 88  def calculate_inbound(pages: dict) -> dict:
 89      """Calculate inbound links for each page."""
 90      inbound = defaultdict(set)
 91      for name, data in pages.items():
 92          for target in data.get('outbound', []):
 93              inbound[target].add(name)
 94      return {k: list(v) for k, v in inbound.items()}
 95  
 96  
 97  def generate_digest(current: dict, previous: dict, base_path: Path) -> str:
 98      """Generate digest comparing current to previous state."""
 99      today = datetime.now().strftime('%Y-%m-%d')
100      lines = [
101          f"# Daily Digest - {today}",
102          "",
103          "*Auto-generated graph activity summary*",
104          "",
105          "---",
106          "",
107      ]
108  
109      # Stats
110      curr_pages = set(current.keys())
111      prev_pages = set(previous.keys())
112  
113      new_pages = curr_pages - prev_pages
114      deleted_pages = prev_pages - curr_pages
115  
116      total_links_curr = sum(len(p.get('outbound', [])) for p in current.values())
117      total_links_prev = sum(len(p.get('outbound', [])) for p in previous.values())
118  
119      # Calculate orphans
120      curr_inbound = calculate_inbound(current)
121      orphans = {name for name in current if not curr_inbound.get(name)}
122  
123      lines.extend([
124          "## Summary",
125          "",
126          f"| Metric | Value | Change |",
127          f"|--------|-------|--------|",
128          f"| Total pages | {len(current)} | {len(current) - len(previous):+d} |",
129          f"| Total links | {total_links_curr} | {total_links_curr - total_links_prev:+d} |",
130          f"| Orphan pages | {len(orphans)} | - |",
131          f"| New pages | {len(new_pages)} | - |",
132          "",
133      ])
134  
135      # New pages
136      if new_pages:
137          lines.extend([
138              "## New Pages",
139              "",
140          ])
141          for name in sorted(new_pages):
142              title = current[name].get('title', name)
143              links = len(current[name].get('outbound', []))
144              lines.append(f"- [[{name}|{title}]] ({links} links)")
145          lines.append("")
146  
147      # Modified pages (by mtime)
148      today_ts = datetime.now().replace(hour=0, minute=0, second=0).timestamp()
149      modified_today = [
150          name for name, data in current.items()
151          if data.get('mtime', 0) > today_ts and name not in new_pages
152      ]
153  
154      if modified_today:
155          lines.extend([
156              "## Modified Today",
157              "",
158          ])
159          for name in sorted(modified_today)[:20]:
160              title = current[name].get('title', name)
161              lines.append(f"- [[{name}|{title}]]")
162          if len(modified_today) > 20:
163              lines.append(f"- ... and {len(modified_today) - 20} more")
164          lines.append("")
165  
166      # Orphans (if any)
167      if orphans:
168          lines.extend([
169              "## Current Orphans",
170              "",
171              f"*{len(orphans)} pages with no inbound links*",
172              "",
173          ])
174          for name in sorted(orphans)[:10]:
175              title = current[name].get('title', name)
176              lines.append(f"- [[{name}|{title}]]")
177          if len(orphans) > 10:
178              lines.append(f"- ... and {len(orphans) - 10} more")
179          lines.append("")
180  
181      # Top gravity (simple version)
182      gravity = {}
183      for name in current:
184          in_count = len(curr_inbound.get(name, []))
185          out_count = len(current[name].get('outbound', []))
186          gravity[name] = (in_count * 2) + (out_count * 0.5)
187  
188      top10 = sorted(gravity.items(), key=lambda x: x[1], reverse=True)[:10]
189  
190      lines.extend([
191          "## Top 10 by Gravity",
192          "",
193          "| Rank | Page | Gravity |",
194          "|------|------|---------|",
195      ])
196      for i, (name, grav) in enumerate(top10, 1):
197          title = current[name].get('title', name)[:30]
198          lines.append(f"| {i} | [[{name}\\|{title}]] | {grav:.0f} |")
199  
200      lines.extend([
201          "",
202          "---",
203          "",
204          f"*Generated: {datetime.now().isoformat()}*",
205      ])
206  
207      return '\n'.join(lines)
208  
209  
210  def main():
211      parser = argparse.ArgumentParser(description='Daily Digest Generator')
212      parser.add_argument(
213          '--output', '-o',
214          type=Path,
215          help='Output file'
216      )
217      parser.add_argument(
218          '--path', '-p',
219          type=Path,
220          default=Path(__file__).parent.parent,
221          help='Path to Sovereign_OS repo'
222      )
223      parser.add_argument(
224          '--no-save',
225          action='store_true',
226          help='Do not save current state'
227      )
228  
229      args = parser.parse_args()
230  
231      print("Scanning current state...")
232      current = scan_current_state(args.path)
233  
234      print("Loading previous state...")
235      previous = load_previous_state(args.path)
236  
237      print(f"Generating digest ({len(current)} pages)...")
238      digest = generate_digest(current, previous, args.path)
239  
240      if args.output:
241          args.output.write_text(digest, encoding='utf-8')
242          print(f"Saved to {args.output}")
243      else:
244          print()
245          print(digest)
246  
247      if not args.no_save:
248          print("Saving current state...")
249          save_current_state(args.path, current)
250  
251  
252  if __name__ == '__main__':
253      main()