tracer.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import abc 6 import contextlib 7 import os 8 from collections.abc import Iterator 9 from typing import Any 10 11 from haystack import logging 12 13 HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR = "HAYSTACK_AUTO_TRACE_ENABLED" 14 HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR = "HAYSTACK_CONTENT_TRACING_ENABLED" 15 16 logger = logging.getLogger(__name__) 17 18 19 class Span(abc.ABC): 20 """Interface for an instrumented operation.""" 21 22 @abc.abstractmethod 23 def set_tag(self, key: str, value: Any) -> None: 24 """ 25 Set a single tag on the span. 26 27 Note that the value will be serialized to a string, so it's best to use simple types like strings, numbers, or 28 booleans. 29 30 :param key: the name of the tag. 31 :param value: the value of the tag. 32 """ 33 pass 34 35 def set_tags(self, tags: dict[str, Any]) -> None: 36 """ 37 Set multiple tags on the span. 38 39 :param tags: a mapping of tag names to tag values. 40 """ 41 for key, value in tags.items(): 42 self.set_tag(key, value) 43 44 def raw_span(self) -> Any: 45 """ 46 Provides access to the underlying span object of the tracer. 47 48 Use this if you need full access to the underlying span object. 49 50 :return: The underlying span object. 51 """ 52 return self 53 54 def set_content_tag(self, key: str, value: Any) -> None: 55 """ 56 Set a single tag containing content information. 57 58 Content is sensitive information such as 59 - the content of a query 60 - the content of a document 61 - the content of an answer 62 63 By default, this behavior is disabled. To enable it 64 - set the environment variable `HAYSTACK_CONTENT_TRACING_ENABLED` to `true` or 65 - override the `set_content_tag` method in a custom tracer implementation. 66 67 :param key: the name of the tag. 68 :param value: the value of the tag. 69 """ 70 if tracer.is_content_tracing_enabled: 71 self.set_tag(key, value) 72 73 def get_correlation_data_for_logs(self) -> dict[str, Any]: 74 """ 75 Return a dictionary with correlation data for logs. 76 77 This is useful if you want to correlate logs with traces. 78 """ 79 return {} 80 81 82 class Tracer(abc.ABC): 83 """Interface for instrumenting code by creating and submitting spans.""" 84 85 @abc.abstractmethod 86 @contextlib.contextmanager 87 def trace( 88 self, operation_name: str, tags: dict[str, Any] | None = None, parent_span: Span | None = None 89 ) -> Iterator[Span]: 90 """ 91 Trace the execution of a block of code. 92 93 :param operation_name: the name of the operation being traced. 94 :param tags: tags to apply to the newly created span. 95 :param parent_span: the parent span to use for the newly created span. 96 If `None`, the newly created span will be a root span. 97 :return: the newly created span. 98 """ 99 pass 100 101 @abc.abstractmethod 102 def current_span(self) -> Span | None: 103 """ 104 Returns the currently active span. If no span is active, returns `None`. 105 106 :return: Currently active span or `None` if no span is active. 107 """ 108 pass 109 110 111 class ProxyTracer(Tracer): 112 """ 113 Container for the actual tracer instance. 114 115 This eases 116 - replacing the actual tracer instance without having to change the global tracer instance 117 - implementing default behavior for the tracer 118 """ 119 120 def __init__(self, provided_tracer: Tracer) -> None: 121 """Creates an instance of ProxyTracer.""" 122 self.actual_tracer: Tracer = provided_tracer 123 self.is_content_tracing_enabled = os.getenv(HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR, "false").lower() == "true" 124 125 @contextlib.contextmanager 126 def trace( 127 self, operation_name: str, tags: dict[str, Any] | None = None, parent_span: Span | None = None 128 ) -> Iterator[Span]: 129 """Activate and return a new span that inherits from the current active span.""" 130 with self.actual_tracer.trace(operation_name, tags=tags, parent_span=parent_span) as span: 131 yield span 132 133 def current_span(self) -> Span | None: 134 """Return the current active span""" 135 return self.actual_tracer.current_span() 136 137 138 class NullSpan(Span): 139 """A no-op implementation of the `Span` interface. This is used when tracing is disabled.""" 140 141 def set_tag(self, key: str, value: Any) -> None: 142 """Set a single tag on the span.""" 143 pass 144 145 146 class NullTracer(Tracer): 147 """A no-op implementation of the `Tracer` interface. This is used when tracing is disabled.""" 148 149 @contextlib.contextmanager 150 def trace( 151 self, 152 operation_name: str, # noqa: ARG002 153 tags: dict[str, Any] | None = None, # noqa: ARG002 154 parent_span: Span | None = None, # noqa: ARG002 155 ) -> Iterator[Span]: 156 """Activate and return a new span that inherits from the current active span.""" 157 yield NullSpan() 158 159 def current_span(self) -> Span | None: 160 """Return the current active span""" 161 return NullSpan() 162 163 164 # We use the proxy pattern to allow for easy enabling and disabling of tracing without having to change the global 165 # tracer instance. That's especially convenient if users import the object directly 166 # (in that case we'd have to monkey-patch it in all of these modules). 167 tracer: ProxyTracer = ProxyTracer(provided_tracer=NullTracer()) 168 169 170 def enable_tracing(provided_tracer: Tracer) -> None: 171 """Enable tracing by setting the global tracer instance.""" 172 tracer.actual_tracer = provided_tracer 173 174 175 def disable_tracing() -> None: 176 """Disable tracing by setting the global tracer instance to a no-op tracer.""" 177 tracer.actual_tracer = NullTracer() 178 179 180 def is_tracing_enabled() -> bool: 181 """Return whether tracing is enabled.""" 182 return not isinstance(tracer.actual_tracer, NullTracer) 183 184 185 def auto_enable_tracing() -> None: 186 """ 187 Auto-enable the right tracing backend. 188 189 This behavior can be disabled by setting the environment variable `HAYSTACK_AUTO_TRACE_ENABLED` to `false`. 190 Note that it will only work correctly if tracing was configured _before_ Haystack is imported. 191 """ 192 if os.getenv(HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR, "true").lower() == "false": 193 logger.info( 194 "Tracing disabled via environment variable '{env_key}'", env_key=HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR 195 ) 196 return 197 198 if is_tracing_enabled(): 199 return # tracing already enabled 200 201 tracer = _auto_configured_opentelemetry_tracer() or _auto_configured_datadog_tracer() 202 if tracer: 203 enable_tracing(tracer) 204 logger.info("Auto-enabled tracing for '{tracer}'", tracer=tracer.__class__.__name__) 205 206 207 def _auto_configured_opentelemetry_tracer() -> Tracer | None: 208 # we implement this here and not in the `opentelemetry` module to avoid import warnings when OpenTelemetry is not 209 # installed 210 try: 211 import opentelemetry.trace 212 213 # the safest way to check if tracing is enabled is to try to start a span and see if it's a no-op span 214 # alternatively we could of course check `opentelemetry.trace._TRACER_PROVIDER` 215 # but that's not part of the public API and could change in the future 216 with opentelemetry.trace.get_tracer("haystack").start_as_current_span("haystack.tracing.auto_enable") as span: 217 if isinstance(span, opentelemetry.trace.NonRecordingSpan): 218 return None 219 220 from haystack.tracing.opentelemetry import OpenTelemetryTracer 221 222 return OpenTelemetryTracer(opentelemetry.trace.get_tracer("haystack")) 223 except ImportError: 224 pass 225 226 return None 227 228 229 def _auto_configured_datadog_tracer() -> Tracer | None: 230 # we implement this here and not in the `datadog` module to avoid import warnings when Datadog is not installed 231 try: 232 from ddtrace.trace import tracer 233 234 from haystack.tracing.datadog import DatadogTracer 235 236 if tracer.enabled: 237 return DatadogTracer(tracer=tracer) 238 except ImportError: 239 pass 240 241 return None 242 243 244 auto_enable_tracing()