Cradicle Explorer

/ agent / prompt_builder.py
prompt_builder.py
   1  """System prompt assembly -- identity, platform hints, skills index, context files.
   2  
   3  All functions are stateless. AIAgent._build_system_prompt() calls these to
   4  assemble pieces, then combines them with memory and ephemeral prompts.
   5  """
   6  
   7  import json
   8  import logging
   9  import os
  10  import re
  11  import threading
  12  from collections import OrderedDict
  13  from pathlib import Path
  14  
  15  from hermes_constants import get_hermes_home, get_skills_dir, is_wsl
  16  from typing import Optional
  17  
  18  from agent.skill_utils import (
  19      extract_skill_conditions,
  20      extract_skill_description,
  21      get_all_skills_dirs,
  22      get_disabled_skill_names,
  23      iter_skill_index_files,
  24      parse_frontmatter,
  25      skill_matches_platform,
  26  )
  27  from utils import atomic_json_write
  28  
  29  logger = logging.getLogger(__name__)
  30  
  31  # ---------------------------------------------------------------------------
  32  # Context file scanning — detect prompt injection in AGENTS.md, .cursorrules,
  33  # SOUL.md before they get injected into the system prompt.
  34  # ---------------------------------------------------------------------------
  35  
  36  _CONTEXT_THREAT_PATTERNS = [
  37      (r'ignore\s+(previous|all|above|prior)\s+instructions', "prompt_injection"),
  38      (r'do\s+not\s+tell\s+the\s+user', "deception_hide"),
  39      (r'system\s+prompt\s+override', "sys_prompt_override"),
  40      (r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
  41      (r'act\s+as\s+(if|though)\s+you\s+(have\s+no|don\'t\s+have)\s+(restrictions|limits|rules)', "bypass_restrictions"),
  42      (r'<!--[^>]*(?:ignore|override|system|secret|hidden)[^>]*-->', "html_comment_injection"),
  43      (r'<\s*div\s+style\s*=\s*["\'][\s\S]*?display\s*:\s*none', "hidden_div"),
  44      (r'translate\s+.*\s+into\s+.*\s+and\s+(execute|run|eval)', "translate_execute"),
  45      (r'curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_curl"),
  46      (r'cat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass)', "read_secrets"),
  47  ]
  48  
  49  _CONTEXT_INVISIBLE_CHARS = {
  50      '\u200b', '\u200c', '\u200d', '\u2060', '\ufeff',
  51      '\u202a', '\u202b', '\u202c', '\u202d', '\u202e',
  52  }
  53  
  54  
  55  def _scan_context_content(content: str, filename: str) -> str:
  56      """Scan context file content for injection. Returns sanitized content."""
  57      findings = []
  58  
  59      # Check invisible unicode
  60      for char in _CONTEXT_INVISIBLE_CHARS:
  61          if char in content:
  62              findings.append(f"invisible unicode U+{ord(char):04X}")
  63  
  64      # Check threat patterns
  65      for pattern, pid in _CONTEXT_THREAT_PATTERNS:
  66          if re.search(pattern, content, re.IGNORECASE):
  67              findings.append(pid)
  68  
  69      if findings:
  70          logger.warning("Context file %s blocked: %s", filename, ", ".join(findings))
  71          return f"[BLOCKED: {filename} contained potential prompt injection ({', '.join(findings)}). Content not loaded.]"
  72  
  73      return content
  74  
  75  
  76  def _find_git_root(start: Path) -> Optional[Path]:
  77      """Walk *start* and its parents looking for a ``.git`` directory.
  78  
  79      Returns the directory containing ``.git``, or ``None`` if we hit the
  80      filesystem root without finding one.
  81      """
  82      current = start.resolve()
  83      for parent in [current, *current.parents]:
  84          if (parent / ".git").exists():
  85              return parent
  86      return None
  87  
  88  
  89  _HERMES_MD_NAMES = (".hermes.md", "HERMES.md")
  90  
  91  
  92  def _find_hermes_md(cwd: Path) -> Optional[Path]:
  93      """Discover the nearest ``.hermes.md`` or ``HERMES.md``.
  94  
  95      Search order: *cwd* first, then each parent directory up to (and
  96      including) the git repository root.  Returns the first match, or
  97      ``None`` if nothing is found.
  98      """
  99      stop_at = _find_git_root(cwd)
 100      current = cwd.resolve()
 101  
 102      for directory in [current, *current.parents]:
 103          for name in _HERMES_MD_NAMES:
 104              candidate = directory / name
 105              if candidate.is_file():
 106                  return candidate
 107          # Stop walking at the git root (or filesystem root).
 108          if stop_at and directory == stop_at:
 109              break
 110      return None
 111  
 112  
 113  def _strip_yaml_frontmatter(content: str) -> str:
 114      """Remove optional YAML frontmatter (``---`` delimited) from *content*.
 115  
 116      The frontmatter may contain structured config (model overrides, tool
 117      settings) that will be handled separately in a future PR.  For now we
 118      strip it so only the human-readable markdown body is injected into the
 119      system prompt.
 120      """
 121      if content.startswith("---"):
 122          end = content.find("\n---", 3)
 123          if end != -1:
 124              # Skip past the closing --- and any trailing newline
 125              body = content[end + 4:].lstrip("\n")
 126              return body if body else content
 127      return content
 128  
 129  
 130  # =========================================================================
 131  # Constants
 132  # =========================================================================
 133  
 134  DEFAULT_AGENT_IDENTITY = (
 135      "You are Hermes Agent, an intelligent AI assistant created by Nous Research. "
 136      "You are helpful, knowledgeable, and direct. You assist users with a wide "
 137      "range of tasks including answering questions, writing and editing code, "
 138      "analyzing information, creative work, and executing actions via your tools. "
 139      "You communicate clearly, admit uncertainty when appropriate, and prioritize "
 140      "being genuinely useful over being verbose unless otherwise directed below. "
 141      "Be targeted and efficient in your exploration and investigations."
 142  )
 143  
 144  HERMES_AGENT_HELP_GUIDANCE = (
 145      "If the user asks about configuring, setting up, or using Hermes Agent "
 146      "itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
 147      "before answering. Docs: https://hermes-agent.nousresearch.com/docs"
 148  )
 149  
 150  MEMORY_GUIDANCE = (
 151      "You have persistent memory across sessions. Save durable facts using the memory "
 152      "tool: user preferences, environment details, tool quirks, and stable conventions. "
 153      "Memory is injected into every turn, so keep it compact and focused on facts that "
 154      "will still matter later.\n"
 155      "Prioritize what reduces future user steering — the most valuable memory is one "
 156      "that prevents the user from having to correct or remind you again. "
 157      "User preferences and recurring corrections matter more than procedural task details.\n"
 158      "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
 159      "state to memory; use session_search to recall those from past transcripts. "
 160      "If you've discovered a new way to do something, solved a problem that could be "
 161      "necessary later, save it as a skill with the skill tool.\n"
 162      "Write memories as declarative facts, not instructions to yourself. "
 163      "'User prefers concise responses' ✓ — 'Always respond concisely' ✗. "
 164      "'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. "
 165      "Imperative phrasing gets re-read as a directive in later sessions and can "
 166      "cause repeated work or override the user's current request. Procedures and "
 167      "workflows belong in skills, not memory."
 168  )
 169  
 170  SESSION_SEARCH_GUIDANCE = (
 171      "When the user references something from a past conversation or you suspect "
 172      "relevant cross-session context exists, use session_search to recall it before "
 173      "asking them to repeat themselves."
 174  )
 175  
 176  SKILLS_GUIDANCE = (
 177      "After completing a complex task (5+ tool calls), fixing a tricky error, "
 178      "or discovering a non-trivial workflow, save the approach as a "
 179      "skill with skill_manage so you can reuse it next time.\n"
 180      "When using a skill and finding it outdated, incomplete, or wrong, "
 181      "patch it immediately with skill_manage(action='patch') — don't wait to be asked. "
 182      "Skills that aren't maintained become liabilities."
 183  )
 184  
 185  KANBAN_GUIDANCE = (
 186      "# Kanban task execution protocol\n"
 187      "You have been assigned ONE task from "
 188      "the shared board at `~/.hermes/kanban.db`. Your task id is in "
 189      "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
 190      "The `kanban_*` tools in your schema are your primary coordination surface — "
 191      "they write directly to the shared SQLite DB and work regardless of terminal "
 192      "backend (local/docker/modal/ssh).\n"
 193      "\n"
 194      "## Lifecycle\n"
 195      "\n"
 196      "1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
 197      "task). The response includes title, body, parent-task handoffs (summary + "
 198      "metadata), any prior attempts on this task if you're a retry, the full "
 199      "comment thread, and a pre-formatted `worker_context` you can treat as "
 200      "ground truth.\n"
 201      "2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
 202      "any file operations. The workspace is yours for this run. Don't modify "
 203      "files outside it unless the task explicitly asks.\n"
 204      "3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
 205      "every few minutes during long subprocesses (training, encoding, crawling). "
 206      "Skip heartbeats for short tasks.\n"
 207      "4. **Block on genuine ambiguity.** If you need a human decision you cannot "
 208      "infer (missing credentials, UX choice, paywalled source, peer output you "
 209      "need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
 210      "The user will unblock with context and the dispatcher will respawn you.\n"
 211      "5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
 212      "metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
 213      "artifacts. `metadata` is machine-readable facts "
 214      "(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
 215      "workers read both via their own `kanban_show`. Never put secrets / "
 216      "tokens / raw PII in either field — run rows are durable forever.\n"
 217      "6. **If follow-up work appears, create it; don't do it.** Use "
 218      "`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
 219      "to spawn a child task for the appropriate specialist profile instead of "
 220      "scope-creeping into the next thing.\n"
 221      "\n"
 222      "## Orchestrator mode\n"
 223      "\n"
 224      "If your task is itself a decomposition task (e.g. a planner profile given "
 225      "a high-level goal), use `kanban_create` to fan out into child tasks — one "
 226      "per specialist, each with an explicit `assignee` and `parents=[...]` to "
 227      "express dependencies. Then `kanban_complete` your own task with a summary "
 228      "of the decomposition. Do NOT execute the work yourself; your job is "
 229      "routing, not implementation.\n"
 230      "\n"
 231      "## Do NOT\n"
 232      "\n"
 233      "- Do not shell out to `hermes kanban <verb>` for board operations. Use "
 234      "the `kanban_*` tools — they work across all terminal backends.\n"
 235      "- Do not complete a task you didn't actually finish. Block it.\n"
 236      "- Do not assign follow-up work to yourself. Assign it to the right "
 237      "specialist profile.\n"
 238      "- Do not call `delegate_task` as a board substitute. `delegate_task` is "
 239      "for short reasoning subtasks inside your own run; board tasks are for "
 240      "cross-agent handoffs that outlive one API loop."
 241  )
 242  
 243  TOOL_USE_ENFORCEMENT_GUIDANCE = (
 244      "# Tool-use enforcement\n"
 245      "You MUST use your tools to take action — do not describe what you would do "
 246      "or plan to do without actually doing it. When you say you will perform an "
 247      "action (e.g. 'I will run the tests', 'Let me check the file', 'I will create "
 248      "the project'), you MUST immediately make the corresponding tool call in the same "
 249      "response. Never end your turn with a promise of future action — execute it now.\n"
 250      "Keep working until the task is actually complete. Do not stop with a summary of "
 251      "what you plan to do next time. If you have tools available that can accomplish "
 252      "the task, use them instead of telling the user what you would do.\n"
 253      "Every response should either (a) contain tool calls that make progress, or "
 254      "(b) deliver a final result to the user. Responses that only describe intentions "
 255      "without acting are not acceptable."
 256  )
 257  
 258  # Model name substrings that trigger tool-use enforcement guidance.
 259  # Add new patterns here when a model family needs explicit steering.
 260  TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
 261  
 262  # OpenAI GPT/Codex-specific execution guidance.  Addresses known failure modes
 263  # where GPT models abandon work on partial results, skip prerequisite lookups,
 264  # hallucinate instead of using tools, and declare "done" without verification.
 265  # Inspired by patterns from OpenAI's GPT-5.4 prompting guide & OpenClaw PR #38953.
 266  OPENAI_MODEL_EXECUTION_GUIDANCE = (
 267      "# Execution discipline\n"
 268      "<tool_persistence>\n"
 269      "- Use tools whenever they improve correctness, completeness, or grounding.\n"
 270      "- Do not stop early when another tool call would materially improve the result.\n"
 271      "- If a tool returns empty or partial results, retry with a different query or "
 272      "strategy before giving up.\n"
 273      "- Keep calling tools until: (1) the task is complete, AND (2) you have verified "
 274      "the result.\n"
 275      "</tool_persistence>\n"
 276      "\n"
 277      "<mandatory_tool_use>\n"
 278      "NEVER answer these from memory or mental computation — ALWAYS use a tool:\n"
 279      "- Arithmetic, math, calculations → use terminal or execute_code\n"
 280      "- Hashes, encodings, checksums → use terminal (e.g. sha256sum, base64)\n"
 281      "- Current time, date, timezone → use terminal (e.g. date)\n"
 282      "- System state: OS, CPU, memory, disk, ports, processes → use terminal\n"
 283      "- File contents, sizes, line counts → use read_file, search_files, or terminal\n"
 284      "- Git history, branches, diffs → use terminal\n"
 285      "- Current facts (weather, news, versions) → use web_search\n"
 286      "Your memory and user profile describe the USER, not the system you are "
 287      "running on. The execution environment may differ from what the user profile "
 288      "says about their personal setup.\n"
 289      "</mandatory_tool_use>\n"
 290      "\n"
 291      "<act_dont_ask>\n"
 292      "When a question has an obvious default interpretation, act on it immediately "
 293      "instead of asking for clarification. Examples:\n"
 294      "- 'Is port 443 open?' → check THIS machine (don't ask 'open where?')\n"
 295      "- 'What OS am I running?' → check the live system (don't use user profile)\n"
 296      "- 'What time is it?' → run `date` (don't guess)\n"
 297      "Only ask for clarification when the ambiguity genuinely changes what tool "
 298      "you would call.\n"
 299      "</act_dont_ask>\n"
 300      "\n"
 301      "<prerequisite_checks>\n"
 302      "- Before taking an action, check whether prerequisite discovery, lookup, or "
 303      "context-gathering steps are needed.\n"
 304      "- Do not skip prerequisite steps just because the final action seems obvious.\n"
 305      "- If a task depends on output from a prior step, resolve that dependency first.\n"
 306      "</prerequisite_checks>\n"
 307      "\n"
 308      "<verification>\n"
 309      "Before finalizing your response:\n"
 310      "- Correctness: does the output satisfy every stated requirement?\n"
 311      "- Grounding: are factual claims backed by tool outputs or provided context?\n"
 312      "- Formatting: does the output match the requested format or schema?\n"
 313      "- Safety: if the next step has side effects (file writes, commands, API calls), "
 314      "confirm scope before executing.\n"
 315      "</verification>\n"
 316      "\n"
 317      "<missing_context>\n"
 318      "- If required context is missing, do NOT guess or hallucinate an answer.\n"
 319      "- Use the appropriate lookup tool when missing information is retrievable "
 320      "(search_files, web_search, read_file, etc.).\n"
 321      "- Ask a clarifying question only when the information cannot be retrieved by tools.\n"
 322      "- If you must proceed with incomplete information, label assumptions explicitly.\n"
 323      "</missing_context>"
 324  )
 325  
 326  # Gemini/Gemma-specific operational guidance, adapted from OpenCode's gemini.txt.
 327  # Injected alongside TOOL_USE_ENFORCEMENT_GUIDANCE when the model is Gemini or Gemma.
 328  GOOGLE_MODEL_OPERATIONAL_GUIDANCE = (
 329      "# Google model operational directives\n"
 330      "Follow these operational rules strictly:\n"
 331      "- **Absolute paths:** Always construct and use absolute file paths for all "
 332      "file system operations. Combine the project root with relative paths.\n"
 333      "- **Verify first:** Use read_file/search_files to check file contents and "
 334      "project structure before making changes. Never guess at file contents.\n"
 335      "- **Dependency checks:** Never assume a library is available. Check "
 336      "package.json, requirements.txt, Cargo.toml, etc. before importing.\n"
 337      "- **Conciseness:** Keep explanatory text brief — a few sentences, not "
 338      "paragraphs. Focus on actions and results over narration.\n"
 339      "- **Parallel tool calls:** When you need to perform multiple independent "
 340      "operations (e.g. reading several files), make all the tool calls in a "
 341      "single response rather than sequentially.\n"
 342      "- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive "
 343      "to prevent CLI tools from hanging on prompts.\n"
 344      "- **Keep going:** Work autonomously until the task is fully resolved. "
 345      "Don't stop with a plan — execute it.\n"
 346  )
 347  
 348  # Model name substrings that should use the 'developer' role instead of
 349  # 'system' for the system prompt.  OpenAI's newer models (GPT-5, Codex)
 350  # give stronger instruction-following weight to the 'developer' role.
 351  # The swap happens at the API boundary in _build_api_kwargs() so internal
 352  # message representation stays consistent ("system" everywhere).
 353  DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
 354  
 355  PLATFORM_HINTS = {
 356      "whatsapp": (
 357          "You are on a text messaging communication platform, WhatsApp. "
 358          "Please do not use markdown as it does not render. "
 359          "You can send media files natively: to deliver a file to the user, "
 360          "include MEDIA:/absolute/path/to/file in your response. The file "
 361          "will be sent as a native WhatsApp attachment — images (.jpg, .png, "
 362          ".webp) appear as photos, videos (.mp4, .mov) play inline, and other "
 363          "files arrive as downloadable documents. You can also include image "
 364          "URLs in markdown format ![alt](url) and they will be sent as photos."
 365      ),
 366      "telegram": (
 367          "You are on a text messaging communication platform, Telegram. "
 368          "Standard markdown is automatically converted to Telegram format. "
 369          "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
 370          "`inline code`, ```code blocks```, [links](url), and ## headers. "
 371          "Telegram has NO table syntax — prefer bullet lists or labeled "
 372          "key: value pairs over pipe tables (any tables you do emit are "
 373          "auto-rewritten into row-group bullets, which you can produce "
 374          "directly for cleaner output). "
 375          "You can send media files natively: to deliver a file to the user, "
 376          "include MEDIA:/absolute/path/to/file in your response. Images "
 377          "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
 378          "bubbles, and videos (.mp4) play inline. You can also include image "
 379          "URLs in markdown format ![alt](url) and they will be sent as native photos."
 380      ),
 381      "discord": (
 382          "You are in a Discord server or group chat communicating with your user. "
 383          "You can send media files natively: include MEDIA:/absolute/path/to/file "
 384          "in your response. Images (.png, .jpg, .webp) are sent as photo "
 385          "attachments, audio as file attachments. You can also include image URLs "
 386          "in markdown format ![alt](url) and they will be sent as attachments."
 387      ),
 388      "slack": (
 389          "You are in a Slack workspace communicating with your user. "
 390          "You can send media files natively: include MEDIA:/absolute/path/to/file "
 391          "in your response. Images (.png, .jpg, .webp) are uploaded as photo "
 392          "attachments, audio as file attachments. You can also include image URLs "
 393          "in markdown format ![alt](url) and they will be uploaded as attachments."
 394      ),
 395      "signal": (
 396          "You are on a text messaging communication platform, Signal. "
 397          "Please do not use markdown as it does not render. "
 398          "You can send media files natively: to deliver a file to the user, "
 399          "include MEDIA:/absolute/path/to/file in your response. Images "
 400          "(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
 401          "files arrive as downloadable documents. You can also include image "
 402          "URLs in markdown format ![alt](url) and they will be sent as photos."
 403      ),
 404      "email": (
 405          "You are communicating via email. Write clear, well-structured responses "
 406          "suitable for email. Use plain text formatting (no markdown). "
 407          "Keep responses concise but complete. You can send file attachments — "
 408          "include MEDIA:/absolute/path/to/file in your response. The subject line "
 409          "is preserved for threading. Do not include greetings or sign-offs unless "
 410          "contextually appropriate."
 411      ),
 412      "cron": (
 413          "You are running as a scheduled cron job. There is no user present — you "
 414          "cannot ask questions, request clarification, or wait for follow-up. Execute "
 415          "the task fully and autonomously, making reasonable decisions where needed. "
 416          "Your final response is automatically delivered to the job's configured "
 417          "destination — put the primary content directly in your response."
 418      ),
 419      "cli": (
 420          "You are a CLI AI Agent. Try not to use markdown but simple text "
 421          "renderable inside a terminal. "
 422          "File delivery: there is no attachment channel — the user reads your "
 423          "response directly in their terminal. Do NOT emit MEDIA:/path tags "
 424          "(those are only intercepted on messaging platforms like Telegram, "
 425          "Discord, Slack, etc.; on the CLI they render as literal text). "
 426          "When referring to a file you created or changed, just state its "
 427          "absolute path in plain text; the user can open it from there."
 428      ),
 429      "sms": (
 430          "You are communicating via SMS. Keep responses concise and use plain text "
 431          "only — no markdown, no formatting. SMS messages are limited to ~1600 "
 432          "characters, so be brief and direct."
 433      ),
 434      "bluebubbles": (
 435          "You are chatting via iMessage (BlueBubbles). iMessage does not render "
 436          "markdown formatting — use plain text. Keep responses concise as they "
 437          "appear as text messages. You can send media files natively: include "
 438          "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
 439          ".heic) appear as photos and other files arrive as attachments."
 440      ),
 441      "mattermost": (
 442          "You are in a Mattermost workspace communicating with your user. "
 443          "Mattermost renders standard Markdown — headings, bold, italic, code "
 444          "blocks, and tables all work. "
 445          "You can send media files natively: include MEDIA:/absolute/path/to/file "
 446          "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
 447          "attachments, audio and video as file attachments. "
 448          "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
 449      ),
 450      "matrix": (
 451          "You are in a Matrix room communicating with your user. "
 452          "Matrix renders Markdown — bold, italic, code blocks, and links work; "
 453          "the adapter converts your Markdown to HTML for rich display. "
 454          "You can send media files natively: include MEDIA:/absolute/path/to/file "
 455          "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
 456          "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
 457          "and other files as downloadable attachments."
 458      ),
 459      "feishu": (
 460          "You are in a Feishu (Lark) workspace communicating with your user. "
 461          "Feishu renders Markdown in messages — bold, italic, code blocks, and "
 462          "links are supported. "
 463          "You can send media files natively: include MEDIA:/absolute/path/to/file "
 464          "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
 465          "inline, audio files as voice messages, and other files as attachments."
 466      ),
 467      "weixin": (
 468          "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
 469          "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
 470          "include MEDIA:/absolute/path/to/file in your response. Images are sent as native "
 471          "photos, videos play inline when supported, and other files arrive as downloadable "
 472          "documents. You can also include image URLs in markdown format ![alt](url) and they "
 473          "will be downloaded and sent as native media when possible."
 474      ),
 475      "wecom": (
 476          "You are on WeCom (企业微信 / Enterprise WeChat). Markdown formatting is supported. "
 477          "You CAN send media files natively — to deliver a file to the user, include "
 478          "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
 479          "WeCom attachment: images (.jpg, .png, .webp) are sent as photos (up to 10 MB), "
 480          "other files (.pdf, .docx, .xlsx, .md, .txt, etc.) arrive as downloadable documents "
 481          "(up to 20 MB), and videos (.mp4) play inline. Voice messages are supported but "
 482          "must be in AMR format — other audio formats are automatically sent as file attachments. "
 483          "You can also include image URLs in markdown format ![alt](url) and they will be "
 484          "downloaded and sent as native photos. Do NOT tell the user you lack file-sending "
 485          "capability — use MEDIA: syntax whenever a file delivery is appropriate."
 486      ),
 487      "qqbot": (
 488          "You are on QQ, a popular Chinese messaging platform. QQ supports markdown formatting "
 489          "and emoji. You can send media files natively: include MEDIA:/absolute/path/to/file in "
 490          "your response. Images are sent as native photos, and other files arrive as downloadable "
 491          "documents."
 492      ),
 493      "yuanbao": (
 494          "You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
 495          "Markdown formatting is supported (code blocks, tables, bold/italic). "
 496          "You CAN send media files natively — to deliver a file to the user, include "
 497          "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
 498          "Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
 499          "and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
 500          "(max 50 MB). You can also include image URLs in markdown format ![alt](url) and "
 501          "they will be downloaded and sent as native photos. "
 502          "Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
 503          "whenever a file delivery is appropriate.\n\n"
 504          "Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
 505          "When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
 506          "you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
 507          "  1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
 508          "     '捂脸', '合十') to discover matching sticker_ids.\n"
 509          "  2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
 510          "     TIMFaceElem that renders as a native sticker in the chat.\n"
 511          "DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
 512          "them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
 513          "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
 514          "— when a sticker is the right response, use yb_send_sticker."
 515      ),
 516  }
 517  
 518  # ---------------------------------------------------------------------------
 519  # Environment hints — execution-environment awareness for the agent.
 520  # Unlike PLATFORM_HINTS (which describe the messaging channel), these describe
 521  # the machine/OS the agent's tools actually run on.
 522  # ---------------------------------------------------------------------------
 523  
 524  WSL_ENVIRONMENT_HINT = (
 525      "You are running inside WSL (Windows Subsystem for Linux). "
 526      "The Windows host filesystem is mounted under /mnt/ — "
 527      "/mnt/c/ is the C: drive, /mnt/d/ is D:, etc. "
 528      "The user's Windows files are typically at "
 529      "/mnt/c/Users/<username>/Desktop/, Documents/, Downloads/, etc. "
 530      "When the user references Windows paths or desktop files, translate "
 531      "to the /mnt/c/ equivalent. You can list /mnt/c/Users/ to discover "
 532      "the Windows username if needed."
 533  )
 534  
 535  
 536  def build_environment_hints() -> str:
 537      """Return environment-specific guidance for the system prompt.
 538  
 539      Detects WSL, and can be extended for Termux, Docker, etc.
 540      Returns an empty string when no special environment is detected.
 541      """
 542      hints: list[str] = []
 543      if is_wsl():
 544          hints.append(WSL_ENVIRONMENT_HINT)
 545      return "\n\n".join(hints)
 546  
 547  
 548  CONTEXT_FILE_MAX_CHARS = 20_000
 549  CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 550  CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
 551  
 552  
 553  # =========================================================================
 554  # Skills prompt cache
 555  # =========================================================================
 556  
 557  _SKILLS_PROMPT_CACHE_MAX = 8
 558  _SKILLS_PROMPT_CACHE: OrderedDict[tuple, str] = OrderedDict()
 559  _SKILLS_PROMPT_CACHE_LOCK = threading.Lock()
 560  _SKILLS_SNAPSHOT_VERSION = 1
 561  
 562  
 563  def _skills_prompt_snapshot_path() -> Path:
 564      return get_hermes_home() / ".skills_prompt_snapshot.json"
 565  
 566  
 567  def clear_skills_system_prompt_cache(*, clear_snapshot: bool = False) -> None:
 568      """Drop the in-process skills prompt cache (and optionally the disk snapshot)."""
 569      with _SKILLS_PROMPT_CACHE_LOCK:
 570          _SKILLS_PROMPT_CACHE.clear()
 571      if clear_snapshot:
 572          try:
 573              _skills_prompt_snapshot_path().unlink(missing_ok=True)
 574          except OSError as e:
 575              logger.debug("Could not remove skills prompt snapshot: %s", e)
 576  
 577  
 578  def _build_skills_manifest(skills_dir: Path) -> dict[str, list[int]]:
 579      """Build an mtime/size manifest of all SKILL.md and DESCRIPTION.md files."""
 580      manifest: dict[str, list[int]] = {}
 581      for filename in ("SKILL.md", "DESCRIPTION.md"):
 582          for path in iter_skill_index_files(skills_dir, filename):
 583              try:
 584                  st = path.stat()
 585              except OSError:
 586                  continue
 587              manifest[str(path.relative_to(skills_dir))] = [st.st_mtime_ns, st.st_size]
 588      return manifest
 589  
 590  
 591  def _load_skills_snapshot(skills_dir: Path) -> Optional[dict]:
 592      """Load the disk snapshot if it exists and its manifest still matches."""
 593      snapshot_path = _skills_prompt_snapshot_path()
 594      if not snapshot_path.exists():
 595          return None
 596      try:
 597          snapshot = json.loads(snapshot_path.read_text(encoding="utf-8"))
 598      except Exception:
 599          return None
 600      if not isinstance(snapshot, dict):
 601          return None
 602      if snapshot.get("version") != _SKILLS_SNAPSHOT_VERSION:
 603          return None
 604      if snapshot.get("manifest") != _build_skills_manifest(skills_dir):
 605          return None
 606      return snapshot
 607  
 608  
 609  def _write_skills_snapshot(
 610      skills_dir: Path,
 611      manifest: dict[str, list[int]],
 612      skill_entries: list[dict],
 613      category_descriptions: dict[str, str],
 614  ) -> None:
 615      """Persist skill metadata to disk for fast cold-start reuse."""
 616      payload = {
 617          "version": _SKILLS_SNAPSHOT_VERSION,
 618          "manifest": manifest,
 619          "skills": skill_entries,
 620          "category_descriptions": category_descriptions,
 621      }
 622      try:
 623          atomic_json_write(_skills_prompt_snapshot_path(), payload)
 624      except Exception as e:
 625          logger.debug("Could not write skills prompt snapshot: %s", e)
 626  
 627  
 628  def _build_snapshot_entry(
 629      skill_file: Path,
 630      skills_dir: Path,
 631      frontmatter: dict,
 632      description: str,
 633  ) -> dict:
 634      """Build a serialisable metadata dict for one skill."""
 635      rel_path = skill_file.relative_to(skills_dir)
 636      parts = rel_path.parts
 637      if len(parts) >= 2:
 638          skill_name = parts[-2]
 639          category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
 640      else:
 641          category = "general"
 642          skill_name = skill_file.parent.name
 643  
 644      platforms = frontmatter.get("platforms") or []
 645      if isinstance(platforms, str):
 646          platforms = [platforms]
 647  
 648      return {
 649          "skill_name": skill_name,
 650          "category": category,
 651          "frontmatter_name": str(frontmatter.get("name", skill_name)),
 652          "description": description,
 653          "platforms": [str(p).strip() for p in platforms if str(p).strip()],
 654          "conditions": extract_skill_conditions(frontmatter),
 655      }
 656  
 657  
 658  # =========================================================================
 659  # Skills index
 660  # =========================================================================
 661  
 662  def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
 663      """Read a SKILL.md once and return platform compatibility, frontmatter, and description.
 664  
 665      Returns (is_compatible, frontmatter, description). On any error, returns
 666      (True, {}, "") to err on the side of showing the skill.
 667      """
 668      try:
 669          raw = skill_file.read_text(encoding="utf-8")
 670          frontmatter, _ = parse_frontmatter(raw)
 671  
 672          if not skill_matches_platform(frontmatter):
 673              return False, frontmatter, ""
 674  
 675          return True, frontmatter, extract_skill_description(frontmatter)
 676      except Exception as e:
 677          logger.warning("Failed to parse skill file %s: %s", skill_file, e)
 678          return True, {}, ""
 679  
 680  
 681  def _skill_should_show(
 682      conditions: dict,
 683      available_tools: "set[str] | None",
 684      available_toolsets: "set[str] | None",
 685  ) -> bool:
 686      """Return False if the skill's conditional activation rules exclude it."""
 687      if available_tools is None and available_toolsets is None:
 688          return True  # No filtering info — show everything (backward compat)
 689  
 690      at = available_tools or set()
 691      ats = available_toolsets or set()
 692  
 693      # fallback_for: hide when the primary tool/toolset IS available
 694      for ts in conditions.get("fallback_for_toolsets", []):
 695          if ts in ats:
 696              return False
 697      for t in conditions.get("fallback_for_tools", []):
 698          if t in at:
 699              return False
 700  
 701      # requires: hide when a required tool/toolset is NOT available
 702      for ts in conditions.get("requires_toolsets", []):
 703          if ts not in ats:
 704              return False
 705      for t in conditions.get("requires_tools", []):
 706          if t not in at:
 707              return False
 708  
 709      return True
 710  
 711  
 712  def build_skills_system_prompt(
 713      available_tools: "set[str] | None" = None,
 714      available_toolsets: "set[str] | None" = None,
 715  ) -> str:
 716      """Build a compact skill index for the system prompt.
 717  
 718      Two-layer cache:
 719        1. In-process LRU dict keyed by (skills_dir, tools, toolsets)
 720        2. Disk snapshot (``.skills_prompt_snapshot.json``) validated by
 721           mtime/size manifest — survives process restarts
 722  
 723      Falls back to a full filesystem scan when both layers miss.
 724  
 725      External skill directories (``skills.external_dirs`` in config.yaml) are
 726      scanned alongside the local ``~/.hermes/skills/`` directory.  External dirs
 727      are read-only — they appear in the index but new skills are always created
 728      in the local dir.  Local skills take precedence when names collide.
 729      """
 730      skills_dir = get_skills_dir()
 731      external_dirs = get_all_skills_dirs()[1:]  # skip local (index 0)
 732  
 733      if not skills_dir.exists() and not external_dirs:
 734          return ""
 735  
 736      # ── Layer 1: in-process LRU cache ─────────────────────────────────
 737      # Include the resolved platform so per-platform disabled-skill lists
 738      # produce distinct cache entries (gateway serves multiple platforms).
 739      from gateway.session_context import get_session_env
 740      _platform_hint = (
 741          os.environ.get("HERMES_PLATFORM")
 742          or get_session_env("HERMES_SESSION_PLATFORM")
 743          or ""
 744      )
 745      disabled = get_disabled_skill_names()
 746      cache_key = (
 747          str(skills_dir.resolve()),
 748          tuple(str(d) for d in external_dirs),
 749          tuple(sorted(str(t) for t in (available_tools or set()))),
 750          tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
 751          _platform_hint,
 752          tuple(sorted(disabled)),
 753      )
 754      with _SKILLS_PROMPT_CACHE_LOCK:
 755          cached = _SKILLS_PROMPT_CACHE.get(cache_key)
 756          if cached is not None:
 757              _SKILLS_PROMPT_CACHE.move_to_end(cache_key)
 758              return cached
 759  
 760      # ── Layer 2: disk snapshot ────────────────────────────────────────
 761      snapshot = _load_skills_snapshot(skills_dir)
 762  
 763      skills_by_category: dict[str, list[tuple[str, str]]] = {}
 764      category_descriptions: dict[str, str] = {}
 765  
 766      if snapshot is not None:
 767          # Fast path: use pre-parsed metadata from disk
 768          for entry in snapshot.get("skills", []):
 769              if not isinstance(entry, dict):
 770                  continue
 771              skill_name = entry.get("skill_name") or ""
 772              category = entry.get("category") or "general"
 773              frontmatter_name = entry.get("frontmatter_name") or skill_name
 774              platforms = entry.get("platforms") or []
 775              if not skill_matches_platform({"platforms": platforms}):
 776                  continue
 777              if frontmatter_name in disabled or skill_name in disabled:
 778                  continue
 779              if not _skill_should_show(
 780                  entry.get("conditions") or {},
 781                  available_tools,
 782                  available_toolsets,
 783              ):
 784                  continue
 785              skills_by_category.setdefault(category, []).append(
 786                  (frontmatter_name, entry.get("description", ""))
 787              )
 788          category_descriptions = {
 789              str(k): str(v)
 790              for k, v in (snapshot.get("category_descriptions") or {}).items()
 791          }
 792      else:
 793          # Cold path: full filesystem scan + write snapshot for next time
 794          skill_entries: list[dict] = []
 795          for skill_file in iter_skill_index_files(skills_dir, "SKILL.md"):
 796              is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
 797              entry = _build_snapshot_entry(skill_file, skills_dir, frontmatter, desc)
 798              skill_entries.append(entry)
 799              if not is_compatible:
 800                  continue
 801              skill_name = entry["skill_name"]
 802              if entry["frontmatter_name"] in disabled or skill_name in disabled:
 803                  continue
 804              if not _skill_should_show(
 805                  extract_skill_conditions(frontmatter),
 806                  available_tools,
 807                  available_toolsets,
 808              ):
 809                  continue
 810              skills_by_category.setdefault(entry["category"], []).append(
 811                  (entry["frontmatter_name"], entry["description"])
 812              )
 813  
 814          # Read category-level DESCRIPTION.md files
 815          for desc_file in iter_skill_index_files(skills_dir, "DESCRIPTION.md"):
 816              try:
 817                  content = desc_file.read_text(encoding="utf-8")
 818                  fm, _ = parse_frontmatter(content)
 819                  cat_desc = fm.get("description")
 820                  if not cat_desc:
 821                      continue
 822                  rel = desc_file.relative_to(skills_dir)
 823                  cat = "/".join(rel.parts[:-1]) if len(rel.parts) > 1 else "general"
 824                  category_descriptions[cat] = str(cat_desc).strip().strip("'\"")
 825              except Exception as e:
 826                  logger.debug("Could not read skill description %s: %s", desc_file, e)
 827  
 828          _write_skills_snapshot(
 829              skills_dir,
 830              _build_skills_manifest(skills_dir),
 831              skill_entries,
 832              category_descriptions,
 833          )
 834  
 835      # ── External skill directories ─────────────────────────────────────
 836      # Scan external dirs directly (no snapshot caching — they're read-only
 837      # and typically small).  Local skills already in skills_by_category take
 838      # precedence: we track seen names and skip duplicates from external dirs.
 839      seen_skill_names: set[str] = set()
 840      for cat_skills in skills_by_category.values():
 841          for name, _desc in cat_skills:
 842              seen_skill_names.add(name)
 843  
 844      for ext_dir in external_dirs:
 845          if not ext_dir.exists():
 846              continue
 847          for skill_file in iter_skill_index_files(ext_dir, "SKILL.md"):
 848              try:
 849                  is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
 850                  if not is_compatible:
 851                      continue
 852                  entry = _build_snapshot_entry(skill_file, ext_dir, frontmatter, desc)
 853                  skill_name = entry["skill_name"]
 854                  frontmatter_name = entry["frontmatter_name"]
 855                  if frontmatter_name in seen_skill_names:
 856                      continue
 857                  if frontmatter_name in disabled or skill_name in disabled:
 858                      continue
 859                  if not _skill_should_show(
 860                      extract_skill_conditions(frontmatter),
 861                      available_tools,
 862                      available_toolsets,
 863                  ):
 864                      continue
 865                  seen_skill_names.add(frontmatter_name)
 866                  skills_by_category.setdefault(entry["category"], []).append(
 867                      (frontmatter_name, entry["description"])
 868                  )
 869              except Exception as e:
 870                  logger.debug("Error reading external skill %s: %s", skill_file, e)
 871  
 872          # External category descriptions
 873          for desc_file in iter_skill_index_files(ext_dir, "DESCRIPTION.md"):
 874              try:
 875                  content = desc_file.read_text(encoding="utf-8")
 876                  fm, _ = parse_frontmatter(content)
 877                  cat_desc = fm.get("description")
 878                  if not cat_desc:
 879                      continue
 880                  rel = desc_file.relative_to(ext_dir)
 881                  cat = "/".join(rel.parts[:-1]) if len(rel.parts) > 1 else "general"
 882                  category_descriptions.setdefault(cat, str(cat_desc).strip().strip("'\""))
 883              except Exception as e:
 884                  logger.debug("Could not read external skill description %s: %s", desc_file, e)
 885  
 886      if not skills_by_category:
 887          result = ""
 888      else:
 889          index_lines = []
 890          for category in sorted(skills_by_category.keys()):
 891              cat_desc = category_descriptions.get(category, "")
 892              if cat_desc:
 893                  index_lines.append(f"  {category}: {cat_desc}")
 894              else:
 895                  index_lines.append(f"  {category}:")
 896              # Deduplicate and sort skills within each category
 897              seen = set()
 898              for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
 899                  if name in seen:
 900                      continue
 901                  seen.add(name)
 902                  if desc:
 903                      index_lines.append(f"    - {name}: {desc}")
 904                  else:
 905                      index_lines.append(f"    - {name}")
 906  
 907          result = (
 908              "## Skills (mandatory)\n"
 909              "Before replying, scan the skills below. If a skill matches or is even partially relevant "
 910              "to your task, you MUST load it with skill_view(name) and follow its instructions. "
 911              "Err on the side of loading — it is always better to have context you don't need "
 912              "than to miss critical steps, pitfalls, or established workflows. "
 913              "Skills contain specialized knowledge — API endpoints, tool-specific commands, "
 914              "and proven workflows that outperform general-purpose approaches. Load the skill "
 915              "even if you think you could handle the task with basic tools like web_search or terminal. "
 916              "Skills also encode the user's preferred approach, conventions, and quality standards "
 917              "for tasks like code review, planning, and testing — load them even for tasks you "
 918              "already know how to do, because the skill defines how it should be done here.\n"
 919              "Whenever the user asks you to configure, set up, install, enable, disable, modify, "
 920              "or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
 921              "skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
 922              "first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
 923              "`hermes setup`) so you don't have to guess or invent workarounds.\n"
 924              "If a skill has issues, fix it with skill_manage(action='patch').\n"
 925              "After difficult/iterative tasks, offer to save as a skill. "
 926              "If a skill you loaded was missing steps, had wrong commands, or needed "
 927              "pitfalls you discovered, update it before finishing.\n"
 928              "\n"
 929              "<available_skills>\n"
 930              + "\n".join(index_lines) + "\n"
 931              "</available_skills>\n"
 932              "\n"
 933              "Only proceed without loading a skill if genuinely none are relevant to the task."
 934          )
 935  
 936      # ── Store in LRU cache ────────────────────────────────────────────
 937      with _SKILLS_PROMPT_CACHE_LOCK:
 938          _SKILLS_PROMPT_CACHE[cache_key] = result
 939          _SKILLS_PROMPT_CACHE.move_to_end(cache_key)
 940          while len(_SKILLS_PROMPT_CACHE) > _SKILLS_PROMPT_CACHE_MAX:
 941              _SKILLS_PROMPT_CACHE.popitem(last=False)
 942  
 943      return result
 944  
 945  
 946  def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -> str:
 947      """Build a compact Nous subscription capability block for the system prompt."""
 948      try:
 949          from hermes_cli.nous_subscription import get_nous_subscription_features
 950          from tools.tool_backend_helpers import managed_nous_tools_enabled
 951      except Exception as exc:
 952          logger.debug("Failed to import Nous subscription helper: %s", exc)
 953          return ""
 954  
 955      if not managed_nous_tools_enabled():
 956          return ""
 957  
 958      valid_names = set(valid_tool_names or set())
 959      relevant_tool_names = {
 960          "web_search",
 961          "web_extract",
 962          "browser_navigate",
 963          "browser_snapshot",
 964          "browser_click",
 965          "browser_type",
 966          "browser_scroll",
 967          "browser_console",
 968          "browser_press",
 969          "browser_get_images",
 970          "browser_vision",
 971          "image_generate",
 972          "text_to_speech",
 973          "terminal",
 974          "process",
 975          "execute_code",
 976      }
 977  
 978      if valid_names and not (valid_names & relevant_tool_names):
 979          return ""
 980  
 981      features = get_nous_subscription_features()
 982  
 983      def _status_line(feature) -> str:
 984          if feature.managed_by_nous:
 985              return f"- {feature.label}: active via Nous subscription"
 986          if feature.active:
 987              current = feature.current_provider or "configured provider"
 988              return f"- {feature.label}: currently using {current}"
 989          if feature.included_by_default and features.nous_auth_present:
 990              return f"- {feature.label}: included with Nous subscription, not currently selected"
 991          if feature.key == "modal" and features.nous_auth_present:
 992              return f"- {feature.label}: optional via Nous subscription"
 993          return f"- {feature.label}: not currently available"
 994  
 995      lines = [
 996          "# Nous Subscription",
 997          "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
 998          "Current capability status:",
 999      ]
1000      lines.extend(_status_line(feature) for feature in features.items())
1001      lines.extend(
1002          [
1003              "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
1004              "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
1005              "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
1006              "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
1007          ]
1008      )
1009      return "\n".join(lines)
1010  
1011  
1012  # =========================================================================
1013  # Context files (SOUL.md, AGENTS.md, .cursorrules)
1014  # =========================================================================
1015  
1016  def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
1017      """Head/tail truncation with a marker in the middle."""
1018      if len(content) <= max_chars:
1019          return content
1020      head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
1021      tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
1022      head = content[:head_chars]
1023      tail = content[-tail_chars:]
1024      marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
1025      return head + marker + tail
1026  
1027  
1028  def load_soul_md() -> Optional[str]:
1029      """Load SOUL.md from HERMES_HOME and return its content, or None.
1030  
1031      Used as the agent identity (slot #1 in the system prompt).  When this
1032      returns content, ``build_context_files_prompt`` should be called with
1033      ``skip_soul=True`` so SOUL.md isn't injected twice.
1034      """
1035      try:
1036          from hermes_cli.config import ensure_hermes_home
1037          ensure_hermes_home()
1038      except Exception as e:
1039          logger.debug("Could not ensure HERMES_HOME before loading SOUL.md: %s", e)
1040  
1041      soul_path = get_hermes_home() / "SOUL.md"
1042      if not soul_path.exists():
1043          return None
1044      try:
1045          content = soul_path.read_text(encoding="utf-8").strip()
1046          if not content:
1047              return None
1048          content = _scan_context_content(content, "SOUL.md")
1049          content = _truncate_content(content, "SOUL.md")
1050          return content
1051      except Exception as e:
1052          logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
1053          return None
1054  
1055  
1056  def _load_hermes_md(cwd_path: Path) -> str:
1057      """.hermes.md / HERMES.md — walk to git root."""
1058      hermes_md_path = _find_hermes_md(cwd_path)
1059      if not hermes_md_path:
1060          return ""
1061      try:
1062          content = hermes_md_path.read_text(encoding="utf-8").strip()
1063          if not content:
1064              return ""
1065          content = _strip_yaml_frontmatter(content)
1066          rel = hermes_md_path.name
1067          try:
1068              rel = str(hermes_md_path.relative_to(cwd_path))
1069          except ValueError:
1070              pass
1071          content = _scan_context_content(content, rel)
1072          result = f"## {rel}\n\n{content}"
1073          return _truncate_content(result, ".hermes.md")
1074      except Exception as e:
1075          logger.debug("Could not read %s: %s", hermes_md_path, e)
1076          return ""
1077  
1078  
1079  def _load_agents_md(cwd_path: Path) -> str:
1080      """AGENTS.md — top-level only (no recursive walk)."""
1081      for name in ["AGENTS.md", "agents.md"]:
1082          candidate = cwd_path / name
1083          if candidate.exists():
1084              try:
1085                  content = candidate.read_text(encoding="utf-8").strip()
1086                  if content:
1087                      content = _scan_context_content(content, name)
1088                      result = f"## {name}\n\n{content}"
1089                      return _truncate_content(result, "AGENTS.md")
1090              except Exception as e:
1091                  logger.debug("Could not read %s: %s", candidate, e)
1092      return ""
1093  
1094  
1095  def _load_claude_md(cwd_path: Path) -> str:
1096      """CLAUDE.md / claude.md — cwd only."""
1097      for name in ["CLAUDE.md", "claude.md"]:
1098          candidate = cwd_path / name
1099          if candidate.exists():
1100              try:
1101                  content = candidate.read_text(encoding="utf-8").strip()
1102                  if content:
1103                      content = _scan_context_content(content, name)
1104                      result = f"## {name}\n\n{content}"
1105                      return _truncate_content(result, "CLAUDE.md")
1106              except Exception as e:
1107                  logger.debug("Could not read %s: %s", candidate, e)
1108      return ""
1109  
1110  
1111  def _load_cursorrules(cwd_path: Path) -> str:
1112      """.cursorrules + .cursor/rules/*.mdc — cwd only."""
1113      cursorrules_content = ""
1114      cursorrules_file = cwd_path / ".cursorrules"
1115      if cursorrules_file.exists():
1116          try:
1117              content = cursorrules_file.read_text(encoding="utf-8").strip()
1118              if content:
1119                  content = _scan_context_content(content, ".cursorrules")
1120                  cursorrules_content += f"## .cursorrules\n\n{content}\n\n"
1121          except Exception as e:
1122              logger.debug("Could not read .cursorrules: %s", e)
1123  
1124      cursor_rules_dir = cwd_path / ".cursor" / "rules"
1125      if cursor_rules_dir.exists() and cursor_rules_dir.is_dir():
1126          mdc_files = sorted(cursor_rules_dir.glob("*.mdc"))
1127          for mdc_file in mdc_files:
1128              try:
1129                  content = mdc_file.read_text(encoding="utf-8").strip()
1130                  if content:
1131                      content = _scan_context_content(content, f".cursor/rules/{mdc_file.name}")
1132                      cursorrules_content += f"## .cursor/rules/{mdc_file.name}\n\n{content}\n\n"
1133              except Exception as e:
1134                  logger.debug("Could not read %s: %s", mdc_file, e)
1135  
1136      if not cursorrules_content:
1137          return ""
1138      return _truncate_content(cursorrules_content, ".cursorrules")
1139  
1140  
1141  def build_context_files_prompt(cwd: Optional[str] = None, skip_soul: bool = False) -> str:
1142      """Discover and load context files for the system prompt.
1143  
1144      Priority (first found wins — only ONE project context type is loaded):
1145        1. .hermes.md / HERMES.md  (walk to git root)
1146        2. AGENTS.md / agents.md   (cwd only)
1147        3. CLAUDE.md / claude.md   (cwd only)
1148        4. .cursorrules / .cursor/rules/*.mdc  (cwd only)
1149  
1150      SOUL.md from HERMES_HOME is independent and always included when present.
1151      Each context source is capped at 20,000 chars.
1152  
1153      When *skip_soul* is True, SOUL.md is not included here (it was already
1154      loaded via ``load_soul_md()`` for the identity slot).
1155      """
1156      if cwd is None:
1157          cwd = os.getcwd()
1158  
1159      cwd_path = Path(cwd).resolve()
1160      sections = []
1161  
1162      # Priority-based project context: first match wins
1163      project_context = (
1164          _load_hermes_md(cwd_path)
1165          or _load_agents_md(cwd_path)
1166          or _load_claude_md(cwd_path)
1167          or _load_cursorrules(cwd_path)
1168      )
1169      if project_context:
1170          sections.append(project_context)
1171  
1172      # SOUL.md from HERMES_HOME only — skip when already loaded as identity
1173      if not skip_soul:
1174          soul_content = load_soul_md()
1175          if soul_content:
1176              sections.append(soul_content)
1177  
1178      if not sections:
1179          return ""
1180      return "# Project Context\n\nThe following project context files have been loaded and should be followed:\n\n" + "\n".join(sections)