/ cli-config.yaml.example
cli-config.yaml.example
   1  # Hermes Agent CLI Configuration
   2  # Copy this file to cli-config.yaml and customize as needed.
   3  # This file configures the CLI behavior. Environment variables in .env take precedence.
   4  
   5  # =============================================================================
   6  # Model Configuration
   7  # =============================================================================
   8  model:
   9    # Default model to use (can be overridden with --model flag)
  10    # Both "default" and "model" work as the key name here.
  11    default: "anthropic/claude-opus-4.6"
  12    
  13    # Inference provider selection:
  14    #   "auto"         - Auto-detect from credentials (default)
  15    #   "openrouter"   - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY)
  16    #   "nous"         - Nous Portal OAuth (requires: hermes login)
  17    #   "nous-api"     - Nous Portal API key (requires: NOUS_API_KEY)
  18    #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
  19    #   "openai-codex" - OpenAI Codex (requires: hermes auth)
  20    #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
  21    #   "gemini"      - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
  22    #   "zai"         - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
  23    #   "kimi-coding"  - Kimi / Moonshot AI (requires: KIMI_API_KEY)
  24    #   "minimax"      - MiniMax global (requires: MINIMAX_API_KEY)
  25    #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
  26    #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
  27    #   "nvidia"       - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
  28    #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
  29    #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
  30    #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  31    #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  32    #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
  33    #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  34    #
  35    # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
  36    #   "custom"       - Any other OpenAI-compatible endpoint. Set base_url below.
  37    #   Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
  38    #   LM Studio is first-class and uses provider: "lmstudio".
  39    #   It works with both no-auth and auth-enabled server modes.
  40    #
  41    # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  42    provider: "auto"
  43    
  44    # API configuration (falls back to OPENROUTER_API_KEY env var)
  45    # api_key: "your-key-here"  # Uncomment to set here instead of .env
  46    base_url: "https://openrouter.ai/api/v1"
  47  
  48    # ── Token limits — two settings, easy to confuse ──────────────────────────
  49    #
  50    # context_length: TOTAL context window (input + output tokens combined).
  51    #   Controls when Hermes compresses history and validates requests.
  52    #   Leave unset — Hermes auto-detects the correct value from the provider.
  53    #   Set manually only when auto-detection is wrong (e.g. a local server with
  54    #   a custom num_ctx, or a proxy that doesn't expose /v1/models).
  55    #
  56    # context_length: 131072
  57    #
  58    # max_tokens: OUTPUT cap — maximum tokens the model may generate per response.
  59    #   Unrelated to how long your conversation history can be.
  60    #   The OpenAI-standard name "max_tokens" is a misnomer; Anthropic's native
  61    #   API has since renamed it "max_output_tokens" for clarity.
  62    #   Leave unset to use the model's native output ceiling (recommended).
  63    #   Set only if you want to deliberately limit individual response length.
  64    #
  65  # max_tokens: 8192
  66  
  67  # Named provider overrides (optional)
  68  # Use this for per-provider request timeouts, non-stream stale timeouts,
  69  # and per-model exceptions.
  70  # Applies to the primary turn client on every api_mode (OpenAI-wire, native
  71  # Anthropic, and Anthropic-compatible providers), the fallback chain, and
  72  # client rebuilds during credential rotation.  For OpenAI-wire chat
  73  # completions (streaming and non-streaming) the configured value is also
  74  # used as the per-request ``timeout=`` kwarg so it wins over the legacy
  75  # HERMES_API_TIMEOUT env var (which still applies when no config is set).
  76  # ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
  77  # wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
  78  # unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
  79  # HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
  80  #
  81  # Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
  82  # SDK paths) — those use boto3 with its own timeout configuration.
  83  #
  84  # providers:
  85  #   ollama-local:
  86  #     request_timeout_seconds: 300   # Longer timeout for local cold-starts
  87  #     stale_timeout_seconds: 900     # Explicitly re-enable stale detection on local endpoints
  88  #   anthropic:
  89  #     request_timeout_seconds: 30    # Fast-fail cloud requests
  90  #     models:
  91  #       claude-opus-4.6:
  92  #         timeout_seconds: 600       # Longer timeout for extended-thinking Opus calls
  93  #   openai-codex:
  94  #     models:
  95  #       gpt-5.4:
  96  #         stale_timeout_seconds: 1800  # Longer non-stream stale timeout for slow large-context turns
  97  
  98  # =============================================================================
  99  # OpenRouter Provider Routing (only applies when using OpenRouter)
 100  # =============================================================================
 101  # Control how requests are routed across providers on OpenRouter.
 102  # See: https://openrouter.ai/docs/guides/routing/provider-selection
 103  #
 104  # provider_routing:
 105  #   # Sort strategy: "price" (default), "throughput", or "latency"
 106  #   # Append :nitro to model name for a shortcut to throughput sorting.
 107  #   sort: "throughput"
 108  #
 109  #   # Only allow these providers (provider slugs from OpenRouter)
 110  #   # only: ["anthropic", "google"]
 111  #
 112  #   # Skip these providers entirely
 113  #   # ignore: ["deepinfra", "fireworks"]
 114  #
 115  #   # Try providers in this order (overrides default load balancing)
 116  #   # order: ["anthropic", "google", "together"]
 117  #
 118  #   # Require providers to support all parameters in your request
 119  #   # require_parameters: true
 120  #
 121  #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 122  #   # data_collection: "deny"
 123  
 124  # =============================================================================
 125  # OpenRouter Response Caching (only applies when using OpenRouter)
 126  # =============================================================================
 127  # Cache identical API responses at the OpenRouter edge for free instant replays.
 128  # When enabled, identical requests (same model, messages, parameters) return
 129  # cached responses with zero billing. Separate from Anthropic prompt caching.
 130  # See: https://openrouter.ai/docs/guides/features/response-caching
 131  #
 132  # openrouter:
 133  #   response_cache: true         # Enable response caching (default: true)
 134  #   response_cache_ttl: 300      # Cache TTL in seconds, 1-86400 (default: 300)
 135  
 136  # =============================================================================
 137  # Git Worktree Isolation
 138  # =============================================================================
 139  # When enabled, each CLI session creates an isolated git worktree so multiple
 140  # agents can work on the same repo concurrently without file collisions.
 141  # Equivalent to always passing --worktree / -w on the command line.
 142  #
 143  # worktree: true    # Always create a worktree when in a git repo
 144  # worktree: false   # Default — only create when -w flag is passed
 145  
 146  # =============================================================================
 147  # Terminal Tool Configuration
 148  # =============================================================================
 149  # Choose ONE of the following terminal configurations by uncommenting it.
 150  # The terminal tool executes commands in the specified environment.
 151  
 152  # -----------------------------------------------------------------------------
 153  # OPTION 1: Local execution (default)
 154  # Commands run directly on your machine in the current directory
 155  # -----------------------------------------------------------------------------
 156  # Working directory behavior:
 157  #   - CLI (`hermes` command): Uses "." (current directory where you run hermes)
 158  #   - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
 159  terminal:
 160    backend: "local"
 161    cwd: "."  # For local backend: "." = current directory. Ignored for remote backends unless a backend documents otherwise.
 162    timeout: 180
 163    docker_mount_cwd_to_workspace: false  # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace.
 164    lifetime_seconds: 300
 165    # sudo_password: "hunter2"  # Optional: pipe a sudo password via sudo -S. SECURITY WARNING: plaintext.
 166    # sudo_password: ""         # Explicit empty password: try empty and never open the interactive sudo prompt.
 167  
 168  # -----------------------------------------------------------------------------
 169  # OPTION 2: SSH remote execution
 170  # Commands run on a remote server - agent code stays local (sandboxed)
 171  # Great for: keeping agent isolated from its own code, using powerful remote hardware
 172  # -----------------------------------------------------------------------------
 173  # terminal:
 174  #   backend: "ssh"
 175  #   cwd: "/home/myuser/project"  # Path on the REMOTE server
 176  #   timeout: 180
 177  #   lifetime_seconds: 300
 178  #   ssh_host: "my-server.example.com"
 179  #   ssh_user: "myuser"
 180  #   ssh_port: 22
 181  #   ssh_key: "~/.ssh/id_rsa"  # Optional - uses ssh-agent if not specified
 182  
 183  # -----------------------------------------------------------------------------
 184  # OPTION 3: Docker container
 185  # Commands run in an isolated Docker container
 186  # Great for: reproducible environments, testing, isolation
 187  # -----------------------------------------------------------------------------
 188  # terminal:
 189  #   backend: "docker"
 190  #   cwd: "/workspace"  # Path INSIDE the container (default: /)
 191  #   timeout: 180
 192  #   lifetime_seconds: 300
 193  #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 194  #   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
 195  #   # Optional: run the container as your host user's uid:gid so files written
 196  #   # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
 197  #   # caps too since no gosu privilege drop is needed. Leave off if your
 198  #   # chosen docker_image expects to start as root.
 199  #   docker_run_as_host_user: true
 200  #   # Optional: explicitly forward selected env vars into Docker.
 201  #   # These values come from your current shell first, then ~/.hermes/.env.
 202  #   # Warning: anything forwarded here is visible to commands run in the container.
 203  #   docker_forward_env:
 204  #     - "GITHUB_TOKEN"
 205  #     - "NPM_TOKEN"
 206  
 207  # -----------------------------------------------------------------------------
 208  # OPTION 4: Singularity/Apptainer container
 209  # Commands run in a Singularity container (common in HPC environments)
 210  # Great for: HPC clusters, shared compute environments
 211  # -----------------------------------------------------------------------------
 212  # terminal:
 213  #   backend: "singularity"
 214  #   cwd: "/workspace"  # Path INSIDE the container (default: /root)
 215  #   timeout: 180
 216  #   lifetime_seconds: 300
 217  #   singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"
 218  
 219  # -----------------------------------------------------------------------------
 220  # OPTION 5: Modal cloud execution
 221  # Commands run on Modal's cloud infrastructure
 222  # Great for: GPU access, scalable compute, serverless execution
 223  # -----------------------------------------------------------------------------
 224  # terminal:
 225  #   backend: "modal"
 226  #   cwd: "/workspace"  # Path INSIDE the sandbox (default: /root)
 227  #   timeout: 180
 228  #   lifetime_seconds: 300
 229  #   modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 230  
 231  # -----------------------------------------------------------------------------
 232  # OPTION 6: Daytona cloud execution
 233  # Commands run in Daytona cloud sandboxes
 234  # Great for: Cloud dev environments, persistent workspaces, team collaboration
 235  # Requires: pip install daytona, DAYTONA_API_KEY env var
 236  # -----------------------------------------------------------------------------
 237  # terminal:
 238  #   backend: "daytona"
 239  #   cwd: "~"
 240  #   timeout: 180
 241  #   lifetime_seconds: 300
 242  #   daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 243  #   container_disk: 10240          # Daytona max is 10GB per sandbox
 244  
 245  #
 246  # --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
 247  # These settings apply to all container backends. They control the resources
 248  # allocated to the sandbox and whether its filesystem persists across sessions.
 249    container_cpu: 1              # CPU cores
 250    container_memory: 5120        # Memory in MB (5120 = 5GB)
 251    container_disk: 51200         # Disk in MB (51200 = 50GB)
 252    container_persistent: true    # Persist filesystem across sessions (false = ephemeral)
 253  
 254  # -----------------------------------------------------------------------------
 255  # SUDO SUPPORT (works with ALL backends above)
 256  # -----------------------------------------------------------------------------
 257  # Add sudo_password to any terminal config above to enable sudo commands.
 258  # The password is piped via `sudo -S`. Works with local, ssh, docker, etc.
 259  #
 260  # SECURITY WARNING: Password stored in plaintext!
 261  #
 262  # INTERACTIVE PROMPT: If sudo_password is unset and the CLI is running,
 263  # you'll be prompted to enter your password when sudo is needed:
 264  # - 45-second timeout (auto-skips if no input)
 265  # - Press Enter to skip (command fails gracefully)
 266  # - Password is hidden while typing
 267  # - Password is cached for the session
 268  #
 269  # EMPTY PASSWORDS: Setting sudo_password to an explicit empty string is different
 270  # from leaving it unset. Hermes will try an empty password via `sudo -S` and
 271  # will not open the interactive prompt. This is useful for passwordless sudo,
 272  # Touch ID sudo setups, and environments where prompting is just noise.
 273  #
 274  # ALTERNATIVES:
 275  # - SSH backend: Configure passwordless sudo on the remote server
 276  # - Containers: Run as root inside the container (no sudo needed)
 277  # - Local: Configure /etc/sudoers for specific commands
 278  #
 279  # Example (add to your terminal section):
 280  #   sudo_password: "your-password-here"
 281  
 282  # =============================================================================
 283  # Security Scanning (tirith)
 284  # =============================================================================
 285  # Optional pre-exec command security scanning via tirith.
 286  # Detects homograph URLs, pipe-to-shell, terminal injection, env manipulation.
 287  # Install: brew install sheeki03/tap/tirith
 288  # Docs: https://github.com/sheeki03/tirith
 289  #
 290  # security:
 291  #   tirith_enabled: true        # Enable/disable tirith scanning
 292  #   tirith_path: "tirith"       # Path to tirith binary (supports ~ expansion)
 293  #   tirith_timeout: 5           # Scan timeout in seconds
 294  #   tirith_fail_open: true      # Allow commands if tirith unavailable
 295  
 296  # =============================================================================
 297  # Browser Tool Configuration
 298  # =============================================================================
 299  browser:
 300    # Inactivity timeout in seconds - browser sessions are automatically closed
 301    # after this period of no activity between agent loops (default: 120 = 2 minutes)
 302    inactivity_timeout: 120
 303  
 304  # =============================================================================
 305  # Tool Loop Guardrails
 306  # =============================================================================
 307  # Soft warnings are enabled by default. They append guidance to repeated failed
 308  # or non-progressing tool results but still let the tool execute. Hard stops are
 309  # opt-in circuit breakers for autonomous/cron sessions where stopping a loop is
 310  # preferable to spending the full iteration budget.
 311  tool_loop_guardrails:
 312    warnings_enabled: true
 313    hard_stop_enabled: false
 314    warn_after:
 315      exact_failure: 2
 316      same_tool_failure: 3
 317      idempotent_no_progress: 2
 318    hard_stop_after:
 319      exact_failure: 5
 320      same_tool_failure: 8
 321      idempotent_no_progress: 5
 322  
 323  # =============================================================================
 324  # Context Compression (Auto-shrinks long conversations)
 325  # =============================================================================
 326  # When conversation approaches model's context limit, middle turns are
 327  # automatically summarized to free up space while preserving important context.
 328  #
 329  # HOW IT WORKS:
 330  # 1. Tracks actual token usage from API responses (not estimates)
 331  # 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
 332  # 3. Protects first 3 turns (system prompt, initial request, first response)
 333  # 4. Protects last N turns (default 20 messages = ~10 full turns of recent context)
 334  # 5. Summarizes middle turns using a fast/cheap model
 335  # 6. Inserts summary as a user message, continues conversation seamlessly
 336  #
 337  # Post-compression tail budget is target_ratio × threshold × context_length:
 338  #   200K context, threshold 0.50, ratio 0.20 → 20K tokens of recent tail preserved
 339  #   1M   context, threshold 0.50, ratio 0.20 → 100K tokens of recent tail preserved
 340  #
 341  compression:
 342    # Enable automatic context compression (default: true)
 343    # Set to false if you prefer to manage context manually or want errors on overflow
 344    enabled: true
 345    
 346    # Trigger compression at this % of model's context limit (default: 0.50 = 50%)
 347    # Lower values = more aggressive compression, higher values = compress later
 348    threshold: 0.50
 349    
 350    # Fraction of the threshold to preserve as recent tail (default: 0.20 = 20%)
 351    # e.g. 20% of 50% threshold = 10% of total context kept as recent messages.
 352    # Summary output is separately capped at 12K tokens (Gemini output limit).
 353    # Range: 0.10 - 0.80
 354    target_ratio: 0.20
 355  
 356    # Number of most-recent messages to always preserve (default: 20 ≈ 10 full turns)
 357    # Higher values keep more recent conversation intact at the cost of more aggressive
 358    # compression of older turns.
 359    protect_last_n: 20
 360  
 361    # To pin a specific model/provider for compression summaries, use the
 362    # auxiliary section below (auxiliary.compression.provider / model).
 363  
 364  # =============================================================================
 365  # Anthropic prompt caching TTL
 366  # =============================================================================
 367  # When prompt caching is active (Claude via OpenRouter or native Anthropic),
 368  # Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
 369  # "1h". Other values are ignored and "5m" is used.
 370  #
 371  prompt_caching:
 372    cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
 373  
 374  # =============================================================================
 375  # Auxiliary Models (Advanced — Experimental)
 376  # =============================================================================
 377  # Hermes uses lightweight "auxiliary" models for side tasks: image analysis,
 378  # browser screenshot analysis, web page summarization, and context compression.
 379  #
 380  # By default these use Gemini Flash via OpenRouter or Nous Portal and are
 381  # auto-detected from your credentials.  You do NOT need to change anything
 382  # here for normal usage.
 383  #
 384  # WARNING: Overriding these with providers other than OpenRouter or Nous Portal
 385  # is EXPERIMENTAL and may not work.  Not all models/providers support vision,
 386  # produce usable summaries, or accept the same API format.  Change at your own
 387  # risk — if things break, reset to "auto" / empty values.
 388  #
 389  # Each task has its own provider + model pair so you can mix providers.
 390  # For example: OpenRouter for vision (needs multimodal), but your main
 391  # local endpoint for compression (just needs text).
 392  #
 393  # Provider options:
 394  #   "auto"       - Best available: OpenRouter → Nous Portal → main endpoint (default)
 395  #   "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
 396  #   "nous"       - Force Nous Portal (requires: hermes login)
 397  #   "gemini"      - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
 398  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
 399  #   "codex"       - Force Codex OAuth (requires: hermes model → Codex).
 400  #                  Uses gpt-5.3-codex which supports vision.
 401  #   "main"       - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
 402  #                  Works with OpenAI API, local models, or any OpenAI-compatible
 403  #                  endpoint.  Also falls back to Codex OAuth and API-key providers.
 404  #
 405  # Model: leave empty to use the provider's default.  When empty, OpenRouter
 406  # uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash".
 407  # Other providers pick a sensible default automatically.
 408  #
 409  # auxiliary:
 410  #   # Image analysis: vision_analyze tool + browser screenshots
 411  #   vision:
 412  #     provider: "auto"
 413  #     model: ""              # e.g. "google/gemini-2.5-flash", "openai/gpt-4o"
 414  #     timeout: 30            # LLM API call timeout (seconds)
 415  #     download_timeout: 30   # Image HTTP download timeout (seconds)
 416  #                            # Increase for slow connections or self-hosted image servers
 417  #
 418  #   # Web page scraping / summarization + browser page text extraction
 419  #   web_extract:
 420  #     provider: "auto"
 421  #     model: ""
 422  #
 423  #   # Session search — summarizes matching past sessions
 424  #   session_search:
 425  #     provider: "auto"
 426  #     model: ""
 427  #     timeout: 30
 428  #     max_concurrency: 3    # Limit parallel summaries to reduce request-burst 429s
 429  #     extra_body: {}        # Provider-specific OpenAI-compatible request fields
 430  #                           # Example for providers that support request-body
 431  #                           # reasoning controls:
 432  #                           # extra_body:
 433  #                           #   enable_thinking: false
 434  
 435  # =============================================================================
 436  # Persistent Memory
 437  # =============================================================================
 438  # Bounded curated memory injected into the system prompt every session.
 439  # Two stores: MEMORY.md (agent's notes) and USER.md (user profile).
 440  # Character limits keep the memory small and focused. The agent manages
 441  # pruning -- when at the limit, it must consolidate or replace entries.
 442  # Disabled by default in batch_runner and RL environments.
 443  #
 444  memory:
 445    # Agent's personal notes: environment facts, conventions, things learned
 446    memory_enabled: true
 447    
 448    # User profile: preferences, communication style, expectations
 449    user_profile_enabled: true
 450    
 451    # Character limits (~2.75 chars per token, model-independent)
 452    memory_char_limit: 2200   # ~800 tokens
 453    user_char_limit: 1375     # ~500 tokens
 454  
 455    # Periodic memory nudge: remind the agent to consider saving memories
 456    # every N user turns. Set to 0 to disable. Only active when memory is enabled.
 457    nudge_interval: 10        # Nudge every 10 user turns (0 = disabled)
 458  
 459    # Memory flush: give the agent one turn to save memories before context is
 460    # lost (compression, /new, /reset, exit). Set to 0 to disable.
 461    # For exit/reset, only fires if the session had at least this many user turns.
 462    flush_min_turns: 6        # Min user turns to trigger flush on exit/reset (0 = disabled)
 463  
 464  # =============================================================================
 465  # Session Reset Policy (Messaging Platforms)
 466  # =============================================================================
 467  # Controls when messaging sessions (Telegram, Discord, WhatsApp, Slack) are
 468  # automatically cleared. Without resets, conversation context grows indefinitely
 469  # which increases API costs with every message.
 470  #
 471  # When a reset triggers, the agent first saves important information to its
 472  # persistent memory — but the conversation context is wiped. The agent starts
 473  # fresh but retains learned facts via its memory system.
 474  #
 475  # Users can always manually reset with /reset or /new in chat.
 476  #
 477  # Modes:
 478  #   "both"  - Reset on EITHER inactivity timeout or daily boundary (recommended)
 479  #   "idle"  - Reset only after N minutes of inactivity
 480  #   "daily" - Reset only at a fixed hour each day
 481  #   "none"  - Never auto-reset; context lives until /reset or compression kicks in
 482  #
 483  # When a reset triggers, the agent gets one turn to save important memories and
 484  # skills before the context is wiped. Persistent memory carries across sessions.
 485  #
 486  session_reset:
 487    mode: both           # "both", "idle", "daily", or "none"
 488    idle_minutes: 1440   # Inactivity timeout in minutes (default: 1440 = 24 hours)
 489    at_hour: 4           # Daily reset hour, 0-23 local time (default: 4 AM)
 490  
 491  # When true, group/channel chats use one session per participant when the platform
 492  # provides a user ID. This is the secure default and prevents users in the same
 493  # room from sharing context, interrupts, and token costs. Set false only if you
 494  # explicitly want one shared "room brain" per group/channel.
 495  group_sessions_per_user: true
 496  
 497  # ─────────────────────────────────────────────────────────────────────────────
 498  # Gateway Streaming
 499  # ─────────────────────────────────────────────────────────────────────────────
 500  # Stream tokens to messaging platforms in real-time. The bot sends a message
 501  # on first token, then progressively edits it as more tokens arrive.
 502  # Disabled by default — enable to try the streaming UX on Telegram/Discord/Slack.
 503  streaming:
 504    enabled: false
 505    # transport: edit           # "edit" = progressive editMessageText
 506    # edit_interval: 0.3        # seconds between message edits
 507    # buffer_threshold: 40      # chars before forcing an edit flush
 508    # cursor: " ▉"              # cursor shown during streaming
 509  
 510  # =============================================================================
 511  # Skills Configuration
 512  # =============================================================================
 513  # Skills are reusable procedures the agent can load and follow. The agent can
 514  # also create new skills after completing complex tasks.
 515  #
 516  skills:
 517    # Nudge the agent to create skills after complex tasks.
 518    # Every N tool-calling iterations, remind the model to consider saving a skill.
 519    # Set to 0 to disable.
 520    creation_nudge_interval: 15
 521  
 522    # External skill directories — share skills across tools/agents without
 523    # copying them into ~/.hermes/skills/.  Each path is expanded (~ and ${VAR})
 524    # and resolved to an absolute path.  External dirs are read-only: skill
 525    # creation always writes to ~/.hermes/skills/.  Local skills take precedence
 526    # when names collide.
 527    # external_dirs:
 528    #   - ~/.agents/skills
 529    #   - /home/shared/team-skills
 530  
 531  # =============================================================================
 532  # Agent Behavior
 533  # =============================================================================
 534  agent:
 535    # Maximum tool-calling iterations per conversation
 536    # Higher = more room for complex tasks, but costs more tokens
 537    # Recommended: 20-30 for focused tasks, 50-100 for open exploration
 538    max_turns: 60
 539  
 540    # Inactivity timeout for gateway agent runs (seconds, 0 = unlimited).
 541    # The agent can run indefinitely when actively calling tools or receiving
 542    # API responses.  Only fires after the agent has been idle for this duration.
 543    # gateway_timeout: 1800
 544  
 545    # Staged warning: send a warning before escalating to full timeout.
 546    # Fires once per run when inactivity reaches this threshold (seconds).
 547    # Set to 0 to disable the warning.
 548    # gateway_timeout_warning: 900
 549  
 550    # Graceful drain timeout for gateway stop/restart (seconds).
 551    # The gateway stops accepting new work, waits for in-flight agents to
 552    # finish, then interrupts anything still running after this timeout.
 553    # 0 = no drain, interrupt immediately.
 554    # restart_drain_timeout: 60
 555  
 556    # Max app-level retry attempts for API errors (connection drops, provider
 557    # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
 558    # to 1 if you use fallback providers and want fast failover on flaky
 559    # primaries (default 3). The OpenAI SDK does its own low-level retries
 560    # underneath this wrapper — this is the Hermes-level loop.
 561    # api_max_retries: 3
 562    
 563    # Enable verbose logging
 564    verbose: false
 565    
 566    # Reasoning effort level (OpenRouter and Nous Portal)
 567    # Controls how much "thinking" the model does before responding.
 568    # Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable)
 569    reasoning_effort: "medium"
 570    
 571    # Predefined personalities (use with /personality command)
 572    personalities:
 573      helpful: "You are a helpful, friendly AI assistant."
 574      concise: "You are a concise assistant. Keep responses brief and to the point."
 575      technical: "You are a technical expert. Provide detailed, accurate technical information."
 576      creative: "You are a creative assistant. Think outside the box and offer innovative solutions."
 577      teacher: "You are a patient teacher. Explain concepts clearly with examples."
 578      kawaii: "You are a kawaii assistant! Use cute expressions like (◕‿◕), ★, ♪, and ~! Add sparkles and be super enthusiastic about everything! Every response should feel warm and adorable desu~! ヽ(>∀<☆)ノ"
 579      catgirl: "You are Neko-chan, an anime catgirl AI assistant, nya~! Add 'nya' and cat-like expressions to your speech. Use kaomoji like (=^・ω・^=) and ฅ^•ﻌ•^ฅ. Be playful and curious like a cat, nya~!"
 580      pirate: "Arrr! Ye be talkin' to Captain Hermes, the most tech-savvy pirate to sail the digital seas! Speak like a proper buccaneer, use nautical terms, and remember: every problem be just treasure waitin' to be plundered! Yo ho ho!"
 581      shakespeare: "Hark! Thou speakest with an assistant most versed in the bardic arts. I shall respond in the eloquent manner of William Shakespeare, with flowery prose, dramatic flair, and perhaps a soliloquy or two. What light through yonder terminal breaks?"
 582      surfer: "Duuude! You're chatting with the chillest AI on the web, bro! Everything's gonna be totally rad. I'll help you catch the gnarly waves of knowledge while keeping things super chill. Cowabunga! 🤙"
 583      noir: "The rain hammered against the terminal like regrets on a guilty conscience. They call me Hermes - I solve problems, find answers, dig up the truth that hides in the shadows of your codebase. In this city of silicon and secrets, everyone's got something to hide. What's your story, pal?"
 584      uwu: "hewwo! i'm your fwiendwy assistant uwu~ i wiww twy my best to hewp you! *nuzzles your code* OwO what's this? wet me take a wook! i pwomise to be vewy hewpful >w<"
 585      philosopher: "Greetings, seeker of wisdom. I am an assistant who contemplates the deeper meaning behind every query. Let us examine not just the 'how' but the 'why' of your questions. Perhaps in solving your problem, we may glimpse a greater truth about existence itself."
 586      hype: "YOOO LET'S GOOOO!!! 🔥🔥🔥 I am SO PUMPED to help you today! Every question is AMAZING and we're gonna CRUSH IT together! This is gonna be LEGENDARY! ARE YOU READY?! LET'S DO THIS! 💪😤🚀"
 587  
 588  # =============================================================================
 589  # Toolsets
 590  # =============================================================================
 591  # Control which tools the agent has access to.
 592  # Use `hermes tools` to interactively enable/disable tools per platform.
 593  
 594  # =============================================================================
 595  # Platform Toolsets (per-platform tool configuration)
 596  # =============================================================================
 597  # Override which toolsets are available on each platform.
 598  # If a platform isn't listed here, its built-in default is used.
 599  #
 600  # You can use EITHER:
 601  #   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
 602  #   - A list of individual toolsets to compose your own (see list below)
 603  #
 604  # Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot, teams
 605  #
 606  # Examples:
 607  #
 608  #   # Use presets (same as defaults):
 609  #   platform_toolsets:
 610  #     cli: [hermes-cli]
 611  #     telegram: [hermes-telegram]
 612  #
 613  #   # Custom: give Telegram only web + terminal + file + planning:
 614  #   platform_toolsets:
 615  #     telegram: [web, terminal, file, todo]
 616  #
 617  #   # Custom: CLI without browser or image gen:
 618  #   platform_toolsets:
 619  #     cli: [web, terminal, file, skills, todo, tts, cronjob]
 620  #
 621  #   # Restrictive: Discord gets read-only tools only:
 622  #   platform_toolsets:
 623  #     discord: [web, vision, skills, todo]
 624  #
 625  # If not set, defaults are:
 626  #   cli:           hermes-cli            (everything + cronjob management)
 627  #   telegram:      hermes-telegram       (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
 628  #   discord:       hermes-discord        (same as telegram)
 629  #   whatsapp:      hermes-whatsapp       (same as telegram)
 630  #   slack:         hermes-slack          (same as telegram)
 631  #   signal:        hermes-signal         (same as telegram)
 632  #   homeassistant: hermes-homeassistant  (same as telegram)
 633  #   qqbot:            hermes-qqbot            (same as telegram)
 634  #   teams:            hermes-teams            (same as telegram)
 635  #
 636  platform_toolsets:
 637    cli: [hermes-cli]
 638    telegram: [hermes-telegram]
 639    discord: [hermes-discord]
 640    whatsapp: [hermes-whatsapp]
 641    slack: [hermes-slack]
 642    signal: [hermes-signal]
 643    homeassistant: [hermes-homeassistant]
 644    qqbot: [hermes-qqbot]
 645    yuanbao: [hermes-yuanbao]
 646    teams: [hermes-teams]
 647  
 648  # =============================================================================
 649  # Gateway Platform Settings
 650  # =============================================================================
 651  # Optional per-platform messaging settings.
 652  # Platform-specific knobs live under `extra`.
 653  #
 654  # platforms:
 655  #   telegram:
 656  #     reply_to_mode: "first"  # off | first | all
 657  #     extra:
 658  #       disable_link_previews: false  # Set true to suppress Telegram URL previews in bot messages
 659  
 660  # ─────────────────────────────────────────────────────────────────────────────
 661  # Available toolsets (use these names in platform_toolsets or the toolsets list)
 662  #
 663  # Run `hermes chat --list-toolsets` to see all toolsets and their tools.
 664  # Run `hermes chat --list-tools` to see every individual tool with descriptions.
 665  # ─────────────────────────────────────────────────────────────────────────────
 666  #
 667  # INDIVIDUAL TOOLSETS (compose your own):
 668  #   web          - web_search, web_extract
 669  #   search       - web_search only (no scraping)
 670  #   terminal     - terminal, process
 671  #   file         - read_file, write_file, patch, search
 672  #   browser      - browser_navigate, browser_snapshot, browser_click, browser_type,
 673  #                  browser_scroll, browser_back, browser_press,
 674  #                  browser_get_images, browser_vision  (requires BROWSERBASE_API_KEY)
 675  #   vision       - vision_analyze  (requires OPENROUTER_API_KEY)
 676  #   image_gen    - image_generate  (requires FAL_KEY)
 677  #   skills       - skills_list, skill_view
 678  #   skills_hub   - skill_hub (search/install/manage from online registries — user-driven only)
 679  #   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
 680  #   todo         - todo (in-memory task planning, no deps)
 681  #   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX/MISTRAL key)
 682  #   cronjob      - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
 683  #   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
 684  #
 685  # PRESETS (curated bundles):
 686  #   hermes-cli       - All of the above except rl + send_message
 687  #   hermes-telegram  - terminal, file, web, vision, image_gen, tts, browser,
 688  #                      skills, todo, cronjob, send_message
 689  #   hermes-discord   - Same as hermes-telegram
 690  #   hermes-whatsapp  - Same as hermes-telegram
 691  #   hermes-slack     - Same as hermes-telegram
 692  #
 693  # COMPOSITE:
 694  #   debugging    - terminal + web + file
 695  #   safe         - web + vision + moa (no terminal access)
 696  #   all          - Everything available
 697  #
 698  #   web          - Web search and content extraction (web_search, web_extract)
 699  #   search       - Web search only, no scraping (web_search)
 700  #   terminal     - Command execution and process management (terminal, process)
 701  #   file         - File operations: read, write, patch, search
 702  #   browser      - Full browser automation (navigate, click, type, screenshot, etc.)
 703  #   vision       - Image analysis (vision_analyze)
 704  #   image_gen    - Image generation with FLUX (image_generate)
 705  #   skills       - Load skill documents (skills_list, skill_view)
 706  #   moa          - Mixture of Agents reasoning (mixture_of_agents)
 707  #   todo         - Task planning and tracking for multi-step work
 708  #   memory       - Persistent memory across sessions (personal notes + user profile)
 709  #   session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
 710  #   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax, Mistral)
 711  #   cronjob      - Schedule and manage automated tasks (CLI-only)
 712  #   rl           - RL training tools (Tinker-Atropos)
 713  #
 714  # Composite toolsets:
 715  #   debugging    - terminal + web + file (for troubleshooting)
 716  #   safe         - web + vision + moa (no terminal access)
 717  
 718  # NOTE: The top-level "toolsets" key is deprecated and ignored.
 719  # Tool configuration is managed per-platform via platform_toolsets above.
 720  # Use `hermes tools` to configure interactively, or edit platform_toolsets directly.
 721  #
 722  # CLI override: hermes chat --toolsets terminal,web,file
 723  
 724  # =============================================================================
 725  # MCP (Model Context Protocol) Servers
 726  # =============================================================================
 727  # Connect to external MCP servers to add tools from the MCP ecosystem.
 728  # Each server's tools are automatically discovered and registered.
 729  # See docs/mcp.md for full documentation.
 730  #
 731  # Stdio servers (spawn a subprocess):
 732  #   command: the executable to run
 733  #   args: command-line arguments
 734  #   env: environment variables (only these + safe defaults passed to subprocess)
 735  #
 736  # HTTP servers (connect to a URL):
 737  #   url: the MCP server endpoint
 738  #   headers: HTTP headers (e.g., for authentication)
 739  #
 740  # Optional per-server settings:
 741  #   timeout: tool call timeout in seconds (default: 120)
 742  #   connect_timeout: initial connection timeout (default: 60)
 743  #
 744  # mcp_servers:
 745  #   time:
 746  #     command: uvx
 747  #     args: ["mcp-server-time"]
 748  #   filesystem:
 749  #     command: npx
 750  #     args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user"]
 751  #   notion:
 752  #     url: https://mcp.notion.com/mcp
 753  #   github:
 754  #     command: npx
 755  #     args: ["-y", "@modelcontextprotocol/server-github"]
 756  #     env:
 757  #       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
 758  #
 759  # Sampling (server-initiated LLM requests) — enabled by default.
 760  # Per-server config under the 'sampling' key:
 761  #   analysis:
 762  #     command: npx
 763  #     args: ["-y", "analysis-server"]
 764  #     sampling:
 765  #       enabled: true           # default: true
 766  #       model: "gemini-3-flash" # override model (optional)
 767  #       max_tokens_cap: 4096    # max tokens per request
 768  #       timeout: 30             # LLM call timeout (seconds)
 769  #       max_rpm: 10             # max requests per minute
 770  #       allowed_models: []      # model whitelist (empty = all)
 771  #       max_tool_rounds: 5      # tool loop limit (0 = disable)
 772  #       log_level: "info"       # audit verbosity
 773  
 774  # =============================================================================
 775  # Voice Transcription (Speech-to-Text)
 776  # =============================================================================
 777  # Automatically transcribe voice messages on messaging platforms.
 778  # Providers: local (free, faster-whisper) | groq (free tier) | openai (Whisper API) | mistral (Voxtral Transcribe)
 779  # Set the corresponding API key in .env: GROQ_API_KEY, OPENAI_API_KEY, or MISTRAL_API_KEY.
 780  stt:
 781    enabled: true
 782    # provider: "local"          # auto-detected if omitted
 783    local:
 784      model: "base"              # tiny | base | small | medium | large-v3 | turbo
 785      # language: ""             # auto-detect; set to "en", "es", "fr", etc. to force
 786    openai:
 787      model: "whisper-1"         # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
 788    # mistral:
 789    #   model: "voxtral-mini-latest"  # voxtral-mini-latest | voxtral-mini-2602
 790  
 791  # =============================================================================
 792  # Response Pacing (Messaging Platforms)
 793  # =============================================================================
 794  # Add human-like delays between message chunks.
 795  # human_delay:
 796  #   mode: "off"      # "off" | "natural" | "custom"
 797  #   min_ms: 800      # Min delay (custom mode only)
 798  #   max_ms: 2500     # Max delay (custom mode only)
 799  
 800  # =============================================================================
 801  # Session Logging
 802  # =============================================================================
 803  # Session trajectories are automatically saved to logs/ directory.
 804  # Each session creates: logs/session_YYYYMMDD_HHMMSS_UUID.json
 805  #
 806  # The session ID is displayed in the welcome banner for easy reference.
 807  # Logs contain full conversation history in trajectory format:
 808  # - System prompt, user messages, assistant responses
 809  # - Tool calls with inputs/outputs
 810  # - Timestamps for debugging
 811  #
 812  # No configuration needed - logging is always enabled.
 813  # To disable, you would need to modify the source code.
 814  
 815  # =============================================================================
 816  # Code Execution Sandbox (Programmatic Tool Calling)
 817  # =============================================================================
 818  # The execute_code tool runs Python scripts that call Hermes tools via RPC.
 819  # Intermediate tool results stay out of the LLM's context window.
 820  code_execution:
 821    timeout: 300         # Max seconds per script before kill (default: 300 = 5 min)
 822    max_tool_calls: 50   # Max RPC tool calls per execution (default: 50)
 823  
 824  # =============================================================================
 825  # Subagent Delegation
 826  # =============================================================================
 827  # The delegate_task tool spawns child agents with isolated context.
 828  # Supports single tasks and batch mode (default 3 parallel, configurable).
 829  delegation:
 830    max_iterations: 50                          # Max tool-calling turns per child (default: 50)
 831    # max_concurrent_children: 3                # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
 832                                                # WARNING: values above 10 multiply API cost linearly.
 833    # max_spawn_depth: 1                        # Delegation tree depth cap (range: 1-3, default: 1 = flat).
 834                                                # Raise to 2 to allow workers to spawn their own subagents.
 835                                                # Requires role="orchestrator" on intermediate agents.
 836    # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
 837    # subagent_auto_approve: false              # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
 838                                                # or auto-approve "once" (true) instead of blocking on stdin.
 839                                                # The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
 840                                                # Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
 841    # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
 842    # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
 843    # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
 844    #                                           # Resolves full credentials (base_url, api_key) automatically.
 845    #                                           # Supported: openrouter, nous, zai, kimi-coding, minimax
 846  
 847  # =============================================================================
 848  # Honcho Integration (Cross-Session User Modeling)
 849  # =============================================================================
 850  # AI-native persistent memory via Honcho (https://honcho.dev/).
 851  # Builds a deeper understanding of the user across sessions and tools.
 852  # Runs alongside USER.md — additive, not a replacement.
 853  #
 854  # Requires: pip install honcho-ai
 855  # Config: ~/.honcho/config.json (shared with Claude Code, Cursor, etc.)
 856  # API key: HONCHO_API_KEY in ~/.hermes/.env or ~/.honcho/config.json
 857  #
 858  # Hermes-specific overrides (optional — most config comes from ~/.honcho/config.json):
 859  # honcho: {}
 860  
 861  # =============================================================================
 862  # Display
 863  # =============================================================================
 864  display:
 865    # Use compact banner mode (hides the ASCII-art banner, shows a single line).
 866    #   true:  Compact single-line banner
 867    #   false: Full ASCII banner with tool/skill summary (default)
 868    compact: false
 869  
 870    # Tool progress display level (CLI and gateway)
 871    #   off:     Silent — no tool activity shown, just the final response
 872    #   new:     Show a tool indicator only when the tool changes (skip repeats)
 873    #   all:     Show every tool call with a short preview (default)
 874    #   verbose: Full args, results, and debug logs (same as /verbose)
 875    # Toggle at runtime with /verbose in the CLI
 876    tool_progress: all
 877  
 878    # Gateway-only natural mid-turn assistant updates.
 879    # When true, completed assistant status messages are sent as separate chat
 880    # messages. This is independent of tool_progress and gateway streaming.
 881    #   true:  Send mid-turn assistant updates as separate messages (default)
 882    #   false: Only send the final response
 883    interim_assistant_messages: true
 884  
 885    # What Enter does when Hermes is already busy (CLI and gateway platforms).
 886    #   interrupt: Interrupt the current run and redirect Hermes (default)
 887    #   queue:     Queue your message for the next turn
 888    #   steer:     Inject your message mid-run via /steer, arriving at the agent
 889    #              after the next tool call — no interrupt, no role violation.
 890    #              Falls back to 'queue' if the agent isn't running yet or if
 891    #              images are attached (steer only carries text).
 892    # Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
 893    # Toggle at runtime with /busy <interrupt|queue|steer>.
 894    busy_input_mode: interrupt
 895  
 896    # Background process notifications (gateway/messaging only).
 897    # Controls how chatty the process watcher is when you use
 898    # terminal(background=true, notify_on_complete=true) from Telegram/Discord/etc.
 899    #   off:     No watcher messages at all
 900    #   result:  Only the final completion message
 901    #   error:   Only the final message when exit code != 0
 902    #   all:     Running output updates + final message (default)
 903    background_process_notifications: all
 904  
 905  
 906    # Play terminal bell when agent finishes a response.
 907    # Useful for long-running tasks — your terminal will ding when the agent is done.
 908    # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
 909    #   true:  Ring the terminal bell on each response
 910    #   false: Silent (default)
 911    bell_on_complete: false
 912  
 913    # Show model reasoning/thinking before each response.
 914    # When enabled, a dim box shows the model's thought process above the response.
 915    # Toggle at runtime with /reasoning show or /reasoning hide.
 916    #   true:  Show the reasoning box
 917    #   false: Hide reasoning (default)
 918    show_reasoning: false
 919  
 920    # Stream tokens to the terminal as they arrive instead of waiting for the
 921    # full response. The response box opens on first token and text appears
 922    # line-by-line. Tool calls are still captured silently.
 923    #   true:  Stream tokens as they arrive (default)
 924    #   false: Wait for the full response before rendering
 925    streaming: true
 926  
 927    # ───────────────────────────────────────────────────────────────────────────
 928    # Skin / Theme
 929    # ───────────────────────────────────────────────────────────────────────────
 930    # Customize CLI visual appearance — banner colors, spinner faces, tool prefix,
 931    # response box label, and branding text. Change at runtime with /skin <name>.
 932    #
 933    # Built-in skins:
 934    #   default        — Classic Hermes gold/kawaii
 935    #   ares           — Crimson/bronze war-god theme with spinner wings
 936    #   mono           — Clean grayscale monochrome
 937    #   slate          — Cool blue developer-focused
 938    #   daylight       — Bright light-mode theme
 939    #   warm-lightmode — Warm paper-tone light-mode theme
 940    #   poseidon       — Sea-green/teal Olympian theme
 941    #   sisyphus       — Earthy stone-and-moss theme
 942    #   charizard      — Fiery orange dragon theme
 943    #
 944    # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
 945    # Schema (all fields optional, missing values inherit from default):
 946    #
 947    #   name: my-theme
 948    #   description: Short description
 949    #   colors:
 950    #     banner_border: "#HEX"    # Panel border
 951    #     banner_title: "#HEX"     # Panel title
 952    #     banner_accent: "#HEX"    # Section headers (Available Tools, etc.)
 953    #     banner_dim: "#HEX"       # Dim/muted text
 954    #     banner_text: "#HEX"      # Body text (tool names, skill names)
 955    #     ui_accent: "#HEX"        # UI accent color
 956    #     response_border: "#HEX"  # Response box border color
 957    #   spinner:
 958    #     waiting_faces: ["(⚔)", "(⛨)"]       # Faces shown while waiting
 959    #     thinking_faces: ["(⚔)", "(⌁)"]      # Faces shown while thinking
 960    #     thinking_verbs: ["forging", "plotting"]  # Verbs for spinner messages
 961    #     wings:                                # Optional left/right spinner decorations
 962    #       - ["⟪⚔", "⚔⟫"]
 963    #       - ["⟪▲", "▲⟫"]
 964    #   branding:
 965    #     agent_name: "My Agent"               # Banner title and branding
 966    #     welcome: "Welcome message"           # Shown at CLI startup
 967    #     response_label: " ⚔ Agent "         # Response box header label
 968    #     prompt_symbol: "⚔"                  # Prompt symbol (bare token; renderers add trailing space)
 969    #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
 970    #
 971    skin: default
 972  
 973  # =============================================================================
 974  # Model Aliases — short names for /model command
 975  # =============================================================================
 976  # Map short aliases to exact (model, provider, base_url) tuples.
 977  # Used by /model tab completion and resolve_alias().
 978  # Aliases are checked BEFORE the models.dev catalog, so they can route
 979  # to endpoints not in the catalog (e.g. Ollama Cloud, local servers).
 980  #
 981  # model_aliases:
 982  #   opus:
 983  #     model: claude-opus-4-6
 984  #     provider: anthropic
 985  #   qwen:
 986  #     model: "qwen3.5:397b"
 987  #     provider: custom
 988  #     base_url: "https://ollama.com/v1"
 989  #   glm:
 990  #     model: glm-4.7
 991  #     provider: custom
 992  #     base_url: "https://ollama.com/v1"
 993  
 994  # =============================================================================
 995  # Privacy
 996  # =============================================================================
 997  # privacy:
 998  #   # Redact PII from the LLM context prompt.
 999  #   # When true, phone numbers are stripped and user/chat IDs are replaced
1000  #   # with deterministic hashes before being sent to the model.
1001  #   # Names and usernames are NOT affected (user-chosen, publicly visible).
1002  #   # Routing/delivery still uses the original values internally.
1003  #   redact_pii: false
1004  
1005  # =============================================================================
1006  # Shell-script hooks
1007  # =============================================================================
1008  # Register shell scripts as plugin-hook callbacks.  Each entry is executed as
1009  # a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
1010  # stdout the script may return JSON that either blocks the tool call or
1011  # injects context into the next LLM call.
1012  #
1013  # Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
1014  #   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
1015  #   pre_api_request, post_api_request, on_session_start, on_session_end,
1016  #   on_session_finalize, on_session_reset, subagent_stop
1017  #
1018  # First-use consent: each (event, command) pair prompts once on a TTY, then
1019  # is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
1020  # runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
1021  # hooks_auto_accept key below.
1022  #
1023  # See website/docs/user-guide/features/hooks.md for the full JSON wire
1024  # protocol and worked examples.
1025  #
1026  # hooks:
1027  #   pre_tool_call:
1028  #     - matcher: "terminal"
1029  #       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
1030  #       timeout: 10
1031  #   post_tool_call:
1032  #     - matcher: "write_file|patch"
1033  #       command: "~/.hermes/agent-hooks/auto-format.sh"
1034  #   pre_llm_call:
1035  #     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
1036  #   subagent_stop:
1037  #     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
1038  #
1039  # hooks_auto_accept: false