/ haystack / tracing / tracer.py
tracer.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  import abc
  6  import contextlib
  7  import os
  8  from collections.abc import Iterator
  9  from typing import Any
 10  
 11  from haystack import logging
 12  
 13  HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR = "HAYSTACK_AUTO_TRACE_ENABLED"
 14  HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR = "HAYSTACK_CONTENT_TRACING_ENABLED"
 15  
 16  logger = logging.getLogger(__name__)
 17  
 18  
 19  class Span(abc.ABC):
 20      """Interface for an instrumented operation."""
 21  
 22      @abc.abstractmethod
 23      def set_tag(self, key: str, value: Any) -> None:
 24          """
 25          Set a single tag on the span.
 26  
 27          Note that the value will be serialized to a string, so it's best to use simple types like strings, numbers, or
 28          booleans.
 29  
 30          :param key: the name of the tag.
 31          :param value: the value of the tag.
 32          """
 33          pass
 34  
 35      def set_tags(self, tags: dict[str, Any]) -> None:
 36          """
 37          Set multiple tags on the span.
 38  
 39          :param tags: a mapping of tag names to tag values.
 40          """
 41          for key, value in tags.items():
 42              self.set_tag(key, value)
 43  
 44      def raw_span(self) -> Any:
 45          """
 46          Provides access to the underlying span object of the tracer.
 47  
 48          Use this if you need full access to the underlying span object.
 49  
 50          :return: The underlying span object.
 51          """
 52          return self
 53  
 54      def set_content_tag(self, key: str, value: Any) -> None:
 55          """
 56          Set a single tag containing content information.
 57  
 58          Content is sensitive information such as
 59          - the content of a query
 60          - the content of a document
 61          - the content of an answer
 62  
 63          By default, this behavior is disabled. To enable it
 64          - set the environment variable `HAYSTACK_CONTENT_TRACING_ENABLED` to `true` or
 65          - override the `set_content_tag` method in a custom tracer implementation.
 66  
 67          :param key: the name of the tag.
 68          :param value: the value of the tag.
 69          """
 70          if tracer.is_content_tracing_enabled:
 71              self.set_tag(key, value)
 72  
 73      def get_correlation_data_for_logs(self) -> dict[str, Any]:
 74          """
 75          Return a dictionary with correlation data for logs.
 76  
 77          This is useful if you want to correlate logs with traces.
 78          """
 79          return {}
 80  
 81  
 82  class Tracer(abc.ABC):
 83      """Interface for instrumenting code by creating and submitting spans."""
 84  
 85      @abc.abstractmethod
 86      @contextlib.contextmanager
 87      def trace(
 88          self, operation_name: str, tags: dict[str, Any] | None = None, parent_span: Span | None = None
 89      ) -> Iterator[Span]:
 90          """
 91          Trace the execution of a block of code.
 92  
 93          :param operation_name: the name of the operation being traced.
 94          :param tags: tags to apply to the newly created span.
 95          :param parent_span: the parent span to use for the newly created span.
 96              If `None`, the newly created span will be a root span.
 97          :return: the newly created span.
 98          """
 99          pass
100  
101      @abc.abstractmethod
102      def current_span(self) -> Span | None:
103          """
104          Returns the currently active span. If no span is active, returns `None`.
105  
106          :return: Currently active span or `None` if no span is active.
107          """
108          pass
109  
110  
111  class ProxyTracer(Tracer):
112      """
113      Container for the actual tracer instance.
114  
115      This eases
116      - replacing the actual tracer instance without having to change the global tracer instance
117      - implementing default behavior for the tracer
118      """
119  
120      def __init__(self, provided_tracer: Tracer) -> None:
121          """Creates an instance of ProxyTracer."""
122          self.actual_tracer: Tracer = provided_tracer
123          self.is_content_tracing_enabled = os.getenv(HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR, "false").lower() == "true"
124  
125      @contextlib.contextmanager
126      def trace(
127          self, operation_name: str, tags: dict[str, Any] | None = None, parent_span: Span | None = None
128      ) -> Iterator[Span]:
129          """Activate and return a new span that inherits from the current active span."""
130          with self.actual_tracer.trace(operation_name, tags=tags, parent_span=parent_span) as span:
131              yield span
132  
133      def current_span(self) -> Span | None:
134          """Return the current active span"""
135          return self.actual_tracer.current_span()
136  
137  
138  class NullSpan(Span):
139      """A no-op implementation of the `Span` interface. This is used when tracing is disabled."""
140  
141      def set_tag(self, key: str, value: Any) -> None:
142          """Set a single tag on the span."""
143          pass
144  
145  
146  class NullTracer(Tracer):
147      """A no-op implementation of the `Tracer` interface. This is used when tracing is disabled."""
148  
149      @contextlib.contextmanager
150      def trace(
151          self,
152          operation_name: str,  # noqa: ARG002
153          tags: dict[str, Any] | None = None,  # noqa: ARG002
154          parent_span: Span | None = None,  # noqa: ARG002
155      ) -> Iterator[Span]:
156          """Activate and return a new span that inherits from the current active span."""
157          yield NullSpan()
158  
159      def current_span(self) -> Span | None:
160          """Return the current active span"""
161          return NullSpan()
162  
163  
164  # We use the proxy pattern to allow for easy enabling and disabling of tracing without having to change the global
165  # tracer instance. That's especially convenient if users import the object directly
166  # (in that case we'd have to monkey-patch it in all of these modules).
167  tracer: ProxyTracer = ProxyTracer(provided_tracer=NullTracer())
168  
169  
170  def enable_tracing(provided_tracer: Tracer) -> None:
171      """Enable tracing by setting the global tracer instance."""
172      tracer.actual_tracer = provided_tracer
173  
174  
175  def disable_tracing() -> None:
176      """Disable tracing by setting the global tracer instance to a no-op tracer."""
177      tracer.actual_tracer = NullTracer()
178  
179  
180  def is_tracing_enabled() -> bool:
181      """Return whether tracing is enabled."""
182      return not isinstance(tracer.actual_tracer, NullTracer)
183  
184  
185  def auto_enable_tracing() -> None:
186      """
187      Auto-enable the right tracing backend.
188  
189      This behavior can be disabled by setting the environment variable `HAYSTACK_AUTO_TRACE_ENABLED` to `false`.
190      Note that it will only work correctly if tracing was configured _before_ Haystack is imported.
191      """
192      if os.getenv(HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR, "true").lower() == "false":
193          logger.info(
194              "Tracing disabled via environment variable '{env_key}'", env_key=HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR
195          )
196          return
197  
198      if is_tracing_enabled():
199          return  # tracing already enabled
200  
201      tracer = _auto_configured_opentelemetry_tracer() or _auto_configured_datadog_tracer()
202      if tracer:
203          enable_tracing(tracer)
204          logger.info("Auto-enabled tracing for '{tracer}'", tracer=tracer.__class__.__name__)
205  
206  
207  def _auto_configured_opentelemetry_tracer() -> Tracer | None:
208      # we implement this here and not in the `opentelemetry` module to avoid import warnings when OpenTelemetry is not
209      # installed
210      try:
211          import opentelemetry.trace
212  
213          # the safest way to check if tracing is enabled is to try to start a span and see if it's a no-op span
214          # alternatively we could of course check `opentelemetry.trace._TRACER_PROVIDER`
215          # but that's not part of the public API and could change in the future
216          with opentelemetry.trace.get_tracer("haystack").start_as_current_span("haystack.tracing.auto_enable") as span:
217              if isinstance(span, opentelemetry.trace.NonRecordingSpan):
218                  return None
219  
220              from haystack.tracing.opentelemetry import OpenTelemetryTracer
221  
222              return OpenTelemetryTracer(opentelemetry.trace.get_tracer("haystack"))
223      except ImportError:
224          pass
225  
226      return None
227  
228  
229  def _auto_configured_datadog_tracer() -> Tracer | None:
230      # we implement this here and not in the `datadog` module to avoid import warnings when Datadog is not installed
231      try:
232          from ddtrace.trace import tracer
233  
234          from haystack.tracing.datadog import DatadogTracer
235  
236          if tracer.enabled:
237              return DatadogTracer(tracer=tracer)
238      except ImportError:
239          pass
240  
241      return None
242  
243  
244  auto_enable_tracing()