/ hermes_logging.py
hermes_logging.py
  1  """Centralized logging setup for Hermes Agent.
  2  
  3  Provides a single ``setup_logging()`` entry point that both the CLI and
  4  gateway call early in their startup path.  All log files live under
  5  ``~/.hermes/logs/`` (profile-aware via ``get_hermes_home()``).
  6  
  7  Log files produced:
  8      agent.log   — INFO+, all agent/tool/session activity (the main log)
  9      errors.log  — WARNING+, errors and warnings only (quick triage)
 10      gateway.log — INFO+, gateway-only events (created when mode="gateway")
 11  
 12  All files use ``RotatingFileHandler`` with ``RedactingFormatter`` so
 13  secrets are never written to disk.
 14  
 15  Component separation:
 16      gateway.log only receives records from ``gateway.*`` loggers —
 17      platform adapters, session management, slash commands, delivery.
 18      agent.log remains the catch-all (everything goes there).
 19  
 20  Session context:
 21      Call ``set_session_context(session_id)`` at the start of a conversation
 22      and ``clear_session_context()`` when done.  All log lines emitted on
 23      that thread will include ``[session_id]`` for filtering/correlation.
 24  """
 25  
 26  import logging
 27  import os
 28  import threading
 29  from logging.handlers import RotatingFileHandler
 30  from pathlib import Path
 31  from typing import Optional, Sequence
 32  
 33  from hermes_constants import get_config_path, get_hermes_home
 34  
 35  # Sentinel to track whether setup_logging() has already run.  The function
 36  # is idempotent — calling it twice is safe but the second call is a no-op
 37  # unless ``force=True``.
 38  _logging_initialized = False
 39  
 40  # Thread-local storage for per-conversation session context.
 41  _session_context = threading.local()
 42  
 43  # Default log format — includes timestamp, level, optional session tag,
 44  # logger name, and message.  The ``%(session_tag)s`` field is guaranteed to
 45  # exist on every LogRecord via _install_session_record_factory() below.
 46  _LOG_FORMAT = "%(asctime)s %(levelname)s%(session_tag)s %(name)s: %(message)s"
 47  _LOG_FORMAT_VERBOSE = "%(asctime)s - %(name)s - %(levelname)s%(session_tag)s - %(message)s"
 48  
 49  # Third-party loggers that are noisy at DEBUG/INFO level.
 50  _NOISY_LOGGERS = (
 51      "openai",
 52      "openai._base_client",
 53      "httpx",
 54      "httpcore",
 55      "asyncio",
 56      "hpack",
 57      "hpack.hpack",
 58      "grpc",
 59      "modal",
 60      "urllib3",
 61      "urllib3.connectionpool",
 62      "websockets",
 63      "charset_normalizer",
 64      "markdown_it",
 65  )
 66  
 67  
 68  # ---------------------------------------------------------------------------
 69  # Public session context API
 70  # ---------------------------------------------------------------------------
 71  
 72  def set_session_context(session_id: str) -> None:
 73      """Set the session ID for the current thread.
 74  
 75      All subsequent log records on this thread will include ``[session_id]``
 76      in the formatted output.  Call at the start of ``run_conversation()``.
 77      """
 78      _session_context.session_id = session_id
 79  
 80  
 81  def clear_session_context() -> None:
 82      """Clear the session ID for the current thread."""
 83      _session_context.session_id = None
 84  
 85  
 86  # ---------------------------------------------------------------------------
 87  # Record factory — injects session_tag into every LogRecord at creation
 88  # ---------------------------------------------------------------------------
 89  
 90  def _install_session_record_factory() -> None:
 91      """Replace the global LogRecord factory with one that adds ``session_tag``.
 92  
 93      Unlike a ``logging.Filter`` on a handler or logger, the record factory
 94      runs for EVERY record in the process — including records that propagate
 95      from child loggers and records handled by third-party handlers.  This
 96      guarantees ``%(session_tag)s`` is always available in format strings,
 97      eliminating the KeyError that would occur if a handler used our format
 98      without having a ``_SessionFilter`` attached.
 99  
100      Idempotent — checks for a marker attribute to avoid double-wrapping if
101      the module is reloaded.
102      """
103      current_factory = logging.getLogRecordFactory()
104      if getattr(current_factory, "_hermes_session_injector", False):
105          return  # already installed
106  
107      def _session_record_factory(*args, **kwargs):
108          record = current_factory(*args, **kwargs)
109          sid = getattr(_session_context, "session_id", None)
110          record.session_tag = f" [{sid}]" if sid else ""  # type: ignore[attr-defined]
111          return record
112  
113      _session_record_factory._hermes_session_injector = True  # type: ignore[attr-defined]
114      logging.setLogRecordFactory(_session_record_factory)
115  
116  
117  # Install immediately on import — session_tag is available on all records
118  # from this point forward, even before setup_logging() is called.
119  _install_session_record_factory()
120  
121  
122  # ---------------------------------------------------------------------------
123  # Filters
124  # ---------------------------------------------------------------------------
125  
126  class _ComponentFilter(logging.Filter):
127      """Only pass records whose logger name starts with one of *prefixes*.
128  
129      Used to route gateway-specific records to ``gateway.log`` while
130      keeping ``agent.log`` as the catch-all.
131      """
132  
133      def __init__(self, prefixes: Sequence[str]) -> None:
134          super().__init__()
135          self._prefixes = tuple(prefixes)
136  
137      def filter(self, record: logging.LogRecord) -> bool:
138          return record.name.startswith(self._prefixes)
139  
140  
141  # Logger name prefixes that belong to each component.
142  # Used by _ComponentFilter and exposed for ``hermes logs --component``.
143  COMPONENT_PREFIXES = {
144      "gateway": ("gateway",),
145      "agent": ("agent", "run_agent", "model_tools", "batch_runner"),
146      "tools": ("tools",),
147      "cli": ("hermes_cli", "cli"),
148      "cron": ("cron",),
149  }
150  
151  
152  # ---------------------------------------------------------------------------
153  # Main setup
154  # ---------------------------------------------------------------------------
155  
156  def setup_logging(
157      *,
158      hermes_home: Optional[Path] = None,
159      log_level: Optional[str] = None,
160      max_size_mb: Optional[int] = None,
161      backup_count: Optional[int] = None,
162      mode: Optional[str] = None,
163      force: bool = False,
164  ) -> Path:
165      """Configure the Hermes logging subsystem.
166  
167      Safe to call multiple times — the second call is a no-op unless
168      *force* is ``True``.
169  
170      Parameters
171      ----------
172      hermes_home
173          Override for the Hermes home directory.  Falls back to
174          ``get_hermes_home()`` (profile-aware).
175      log_level
176          Minimum level for the ``agent.log`` file handler.  Accepts any
177          standard Python level name (``"DEBUG"``, ``"INFO"``, ``"WARNING"``).
178          Defaults to ``"INFO"`` or the value from config.yaml ``logging.level``.
179      max_size_mb
180          Maximum size of each log file in megabytes before rotation.
181          Defaults to 5 or the value from config.yaml ``logging.max_size_mb``.
182      backup_count
183          Number of rotated backup files to keep.
184          Defaults to 3 or the value from config.yaml ``logging.backup_count``.
185      mode
186          Caller context: ``"cli"``, ``"gateway"``, ``"cron"``.
187          When ``"gateway"``, an additional ``gateway.log`` file is created
188          that receives only gateway-component records.
189      force
190          Re-run setup even if it has already been called.
191  
192      Returns
193      -------
194      Path
195          The ``logs/`` directory where files are written.
196      """
197      global _logging_initialized
198      home = hermes_home or get_hermes_home()
199      log_dir = home / "logs"
200      log_dir.mkdir(parents=True, exist_ok=True)
201  
202      # Read config defaults (best-effort — config may not be loaded yet).
203      cfg_level, cfg_max_size, cfg_backup = _read_logging_config()
204  
205      level_name = (log_level or cfg_level or "INFO").upper()
206      level = getattr(logging, level_name, logging.INFO)
207      max_bytes = (max_size_mb or cfg_max_size or 5) * 1024 * 1024
208      backups = backup_count or cfg_backup or 3
209  
210      # Lazy import to avoid circular dependency at module load time.
211      from agent.redact import RedactingFormatter
212  
213      root = logging.getLogger()
214  
215      # --- agent.log (INFO+) — the main activity log -------------------------
216      _add_rotating_handler(
217          root,
218          log_dir / "agent.log",
219          level=level,
220          max_bytes=max_bytes,
221          backup_count=backups,
222          formatter=RedactingFormatter(_LOG_FORMAT),
223      )
224  
225      # --- errors.log (WARNING+) — quick triage log --------------------------
226      _add_rotating_handler(
227          root,
228          log_dir / "errors.log",
229          level=logging.WARNING,
230          max_bytes=2 * 1024 * 1024,
231          backup_count=2,
232          formatter=RedactingFormatter(_LOG_FORMAT),
233      )
234  
235      # --- gateway.log (INFO+, gateway component only) ------------------------
236      if mode == "gateway":
237          _add_rotating_handler(
238              root,
239              log_dir / "gateway.log",
240              level=logging.INFO,
241              max_bytes=5 * 1024 * 1024,
242              backup_count=3,
243              formatter=RedactingFormatter(_LOG_FORMAT),
244              log_filter=_ComponentFilter(COMPONENT_PREFIXES["gateway"]),
245          )
246  
247      if _logging_initialized and not force:
248          return log_dir
249  
250      # Ensure root logger level is low enough for the handlers to fire.
251      if root.level == logging.NOTSET or root.level > level:
252          root.setLevel(level)
253  
254      # Suppress noisy third-party loggers.
255      for name in _NOISY_LOGGERS:
256          logging.getLogger(name).setLevel(logging.WARNING)
257  
258      _logging_initialized = True
259      return log_dir
260  
261  
262  def setup_verbose_logging() -> None:
263      """Enable DEBUG-level console logging for ``--verbose`` / ``-v`` mode.
264  
265      Called by ``AIAgent.__init__()`` when ``verbose_logging=True``.
266      """
267      from agent.redact import RedactingFormatter
268  
269      root = logging.getLogger()
270  
271      # Avoid adding duplicate stream handlers.
272      for h in root.handlers:
273          if isinstance(h, logging.StreamHandler) and not isinstance(h, RotatingFileHandler):
274              if getattr(h, "_hermes_verbose", False):
275                  return
276  
277      handler = logging.StreamHandler()
278      handler.setLevel(logging.DEBUG)
279      handler.setFormatter(RedactingFormatter(_LOG_FORMAT_VERBOSE, datefmt="%H:%M:%S"))
280      handler._hermes_verbose = True  # type: ignore[attr-defined]
281      root.addHandler(handler)
282  
283      # Lower root logger level so DEBUG records reach all handlers.
284      if root.level > logging.DEBUG:
285          root.setLevel(logging.DEBUG)
286  
287      # Keep third-party libraries at WARNING to reduce noise.
288      for name in _NOISY_LOGGERS:
289          logging.getLogger(name).setLevel(logging.WARNING)
290      # rex-deploy at INFO for sandbox status.
291      logging.getLogger("rex-deploy").setLevel(logging.INFO)
292  
293  
294  # ---------------------------------------------------------------------------
295  # Internal helpers
296  # ---------------------------------------------------------------------------
297  
298  class _ManagedRotatingFileHandler(RotatingFileHandler):
299      """RotatingFileHandler that ensures group-writable perms in managed mode.
300  
301      In managed mode (NixOS), the stateDir uses setgid (2770) so new files
302      inherit the hermes group. However, both _open() (initial creation) and
303      doRollover() create files via open(), which uses the process umask —
304      typically 0022, producing 0644. This subclass applies chmod 0660 after
305      both operations so the gateway and interactive users can share log files.
306      """
307  
308      def __init__(self, *args, **kwargs):
309          from hermes_cli.config import is_managed
310          self._managed = is_managed()
311          super().__init__(*args, **kwargs)
312  
313      def _chmod_if_managed(self):
314          if self._managed:
315              try:
316                  os.chmod(self.baseFilename, 0o660)
317              except OSError:
318                  pass
319  
320      def _open(self):
321          stream = super()._open()
322          self._chmod_if_managed()
323          return stream
324  
325      def doRollover(self):
326          super().doRollover()
327          self._chmod_if_managed()
328  
329  
330  def _add_rotating_handler(
331      logger: logging.Logger,
332      path: Path,
333      *,
334      level: int,
335      max_bytes: int,
336      backup_count: int,
337      formatter: logging.Formatter,
338      log_filter: Optional[logging.Filter] = None,
339  ) -> None:
340      """Add a ``RotatingFileHandler`` to *logger*, skipping if one already
341      exists for the same resolved file path (idempotent).
342  
343      Parameters
344      ----------
345      log_filter
346          Optional filter to attach to the handler (e.g. ``_ComponentFilter``
347          for gateway.log).
348      """
349      resolved = path.resolve()
350      for existing in logger.handlers:
351          if (
352              isinstance(existing, RotatingFileHandler)
353              and Path(getattr(existing, "baseFilename", "")).resolve() == resolved
354          ):
355              return  # already attached
356  
357      path.parent.mkdir(parents=True, exist_ok=True)
358      handler = _ManagedRotatingFileHandler(
359          str(path), maxBytes=max_bytes, backupCount=backup_count,
360          encoding="utf-8",
361      )
362      handler.setLevel(level)
363      handler.setFormatter(formatter)
364      if log_filter is not None:
365          handler.addFilter(log_filter)
366      logger.addHandler(handler)
367  
368  
369  def _read_logging_config():
370      """Best-effort read of ``logging.*`` from config.yaml.
371  
372      Returns ``(level, max_size_mb, backup_count)`` — any may be ``None``.
373      """
374      try:
375          import yaml
376          config_path = get_config_path()
377          if config_path.exists():
378              with open(config_path, "r", encoding="utf-8") as f:
379                  cfg = yaml.safe_load(f) or {}
380              log_cfg = cfg.get("logging", {})
381              if isinstance(log_cfg, dict):
382                  return (
383                      log_cfg.get("level"),
384                      log_cfg.get("max_size_mb"),
385                      log_cfg.get("backup_count"),
386                  )
387      except Exception:
388          pass
389      return (None, None, None)