/ test / test_logging.py
test_logging.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  import builtins
  6  import json
  7  import logging
  8  import os
  9  import sys
 10  from collections.abc import Generator
 11  from datetime import datetime, timezone
 12  from pathlib import Path
 13  from unittest.mock import ANY
 14  
 15  import pytest
 16  import structlog
 17  from _pytest.capture import CaptureFixture
 18  from _pytest.logging import LogCaptureFixture
 19  from _pytest.monkeypatch import MonkeyPatch
 20  
 21  import haystack.utils.jupyter
 22  from haystack import logging as haystack_logging
 23  from test.tracing.utils import SpyingTracer
 24  
 25  
 26  @pytest.fixture(autouse=True)
 27  def reset_logging_config() -> None:
 28      old_handlers = logging.root.handlers.copy()
 29      yield
 30      # Reset the logging configuration after each test to avoid impacting other tests
 31      logging.root.handlers = old_handlers
 32  
 33  
 34  @pytest.fixture()
 35  def set_context_var_key() -> Generator[str, None, None]:
 36      structlog.contextvars.bind_contextvars(context_var="value")
 37  
 38      yield "context_var"
 39  
 40      structlog.contextvars.unbind_contextvars("context_var")
 41  
 42  
 43  class TestSkipLoggingConfiguration:
 44      def test_skip_logging_configuration(
 45          self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
 46      ) -> None:
 47          monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
 48          haystack_logging.configure_logging()
 49  
 50          logger = logging.getLogger(__name__)
 51          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
 52  
 53          # the pytest fixture caplog only captures logs being rendered from the stdlib logging module
 54          assert caplog.messages == ["Hello, structured logging!"]
 55  
 56          # Nothing should be captured by capfd since structlog is not configured
 57          assert capfd.readouterr().err == ""
 58  
 59  
 60  class TestStructuredLoggingConsoleRendering:
 61      def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None:
 62          haystack_logging.configure_logging(use_json=False)
 63  
 64          logger = logging.getLogger(__name__)
 65          logger.setLevel(logging.INFO)
 66          logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
 67  
 68          # Use `capfd` to capture the output of the final structlog rendering result
 69          output = capfd.readouterr().err
 70          assert output == ""
 71  
 72      def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None:
 73          haystack_logging.configure_logging(use_json=False)
 74  
 75          logger = logging.getLogger(__name__)
 76          logger.setLevel(logging.DEBUG)
 77  
 78          logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
 79  
 80          # Use `capfd` to capture the output of the final structlog rendering result
 81          output = capfd.readouterr().err
 82          assert "Hello, structured logging" in output
 83          assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
 84  
 85      def test_console_rendered_structured_log_even_if_no_tty_but_python_config(
 86          self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
 87      ) -> None:
 88          monkeypatch.setattr(sys.stderr, "isatty", lambda: False)
 89  
 90          haystack_logging.configure_logging(use_json=False)
 91  
 92          logger = logging.getLogger(__name__)
 93          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
 94  
 95          # Use `capfd` to capture the output of the final structlog rendering result
 96          output = capfd.readouterr().err
 97  
 98          assert "Hello, structured logging!" in output
 99          assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
100  
101      def test_console_rendered_structured_log_if_in_ipython(
102          self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
103      ) -> None:
104          monkeypatch.setattr(builtins, "__IPYTHON__", "true", raising=False)
105  
106          haystack_logging.configure_logging()
107  
108          logger = logging.getLogger(__name__)
109          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
110  
111          # Use `capfd` to capture the output of the final structlog rendering result
112          output = capfd.readouterr().err
113  
114          assert "Hello, structured logging!" in output
115          assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
116  
117      def test_console_rendered_structured_log_even_in_jupyter(
118          self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
119      ) -> None:
120          monkeypatch.setattr(haystack.utils.jupyter, haystack.utils.jupyter.is_in_jupyter.__name__, lambda: True)
121  
122          haystack_logging.configure_logging()
123  
124          logger = logging.getLogger(__name__)
125          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
126  
127          # Use `capfd` to capture the output of the final structlog rendering result
128          output = capfd.readouterr().err
129  
130          assert "Hello, structured logging!" in output
131          assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
132  
133      def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env(
134          self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
135      ) -> None:
136          monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "false")
137  
138          haystack_logging.configure_logging()
139  
140          logger = logging.getLogger(__name__)
141          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
142  
143          # Use `capfd` to capture the output of the final structlog rendering result
144          output = capfd.readouterr().err
145  
146          assert "Hello, structured logging!" in output
147          assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
148  
149      def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None:
150          haystack_logging.configure_logging()
151  
152          logger = logging.getLogger(__name__)
153          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
154  
155          # Use `capfd` to capture the output of the final structlog rendering result
156          output = capfd.readouterr().err
157  
158          # Only check for the minute to be a bit more robust
159          today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "")
160          assert today in output
161  
162          log_level = "warning"
163          assert log_level in output
164  
165          assert "Hello, structured logging!" in output
166  
167          assert "key1" in output
168          assert "value1" in output
169  
170      def test_logging_exceptions(self, capfd: CaptureFixture) -> None:
171          haystack_logging.configure_logging()
172  
173          logger = logging.getLogger(__name__)
174  
175          def function_that_raises_and_adds_to_stack_trace():
176              raise ValueError("This is an error")
177  
178          try:
179              function_that_raises_and_adds_to_stack_trace()
180          except ValueError:
181              logger.exception("An error happened")
182  
183          # Use `capfd` to capture the output of the final structlog rendering result
184          output = capfd.readouterr().err
185  
186          assert "An error happened" in output
187  
188      def test_logging_of_contextvars(self, capfd: CaptureFixture, set_context_var_key: str) -> None:
189          haystack_logging.configure_logging()
190  
191          logger = logging.getLogger(__name__)
192          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
193  
194          # Use `capfd` to capture the output of the final structlog rendering result
195          output = capfd.readouterr().err
196  
197          assert set_context_var_key in output
198  
199  
200  class TestStructuredLoggingJSONRendering:
201      def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
202          monkeypatch.setattr(sys.stderr, "isatty", lambda: False)
203          haystack_logging.configure_logging()
204  
205          logger = logging.getLogger(__name__)
206          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
207  
208          # Use `capfd` to capture the output of the final structlog rendering result
209          output = capfd.readouterr().err
210          parsed_output = json.loads(output)  # should not raise an error
211  
212          assert parsed_output == {
213              "event": "Hello, structured logging!",
214              "key1": "value1",
215              "key2": "value2",
216              "level": "warning",
217              "timestamp": ANY,
218              "lineno": ANY,
219              "module": "test.test_logging",
220          }
221  
222      def test_logging_as_json(self, capfd: CaptureFixture) -> None:
223          haystack_logging.configure_logging(use_json=True)
224  
225          logger = logging.getLogger(__name__)
226          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
227  
228          # Use `capfd` to capture the output of the final structlog rendering result
229          output = capfd.readouterr().err
230          parsed_output = json.loads(output)  # should not raise an error
231  
232          assert parsed_output == {
233              "event": "Hello, structured logging!",
234              "key1": "value1",
235              "key2": "value2",
236              "level": "warning",
237              "timestamp": ANY,
238              "lineno": ANY,
239              "module": "test.test_logging",
240          }
241  
242      def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
243          monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
244          haystack_logging.configure_logging()
245  
246          logger = logging.getLogger(__name__)
247          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
248  
249          # Use `capfd` to capture the output of the final structlog rendering result
250          output = capfd.readouterr().err
251          parsed_output = json.loads(output)  # should not raise an error
252  
253          assert parsed_output == {
254              "event": "Hello, structured logging!",
255              "key1": "value1",
256              "key2": "value2",
257              "level": "warning",
258              "timestamp": ANY,
259              "lineno": ANY,
260              "module": "test.test_logging",
261          }
262  
263      def test_logging_of_contextvars(
264          self, capfd: CaptureFixture, monkeypatch: MonkeyPatch, set_context_var_key: str
265      ) -> None:
266          monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
267          haystack_logging.configure_logging()
268  
269          logger = logging.getLogger(__name__)
270          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
271  
272          # Use `capfd` to capture the output of the final structlog rendering result
273          output = capfd.readouterr().err
274          parsed_output = json.loads(output)  # should not raise an error
275  
276          assert parsed_output == {
277              "event": "Hello, structured logging!",
278              "key1": "value1",
279              "key2": "value2",
280              set_context_var_key: "value",
281              "level": "warning",
282              "timestamp": ANY,
283              "lineno": ANY,
284              "module": "test.test_logging",
285          }
286  
287      def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None:
288          haystack_logging.configure_logging(use_json=True)
289  
290          logger = logging.getLogger(__name__)
291  
292          def function_that_raises_and_adds_to_stack_trace():
293              my_local_variable = "my_local_variable"  # noqa: F841
294              raise ValueError("This is an error")
295  
296          try:
297              function_that_raises_and_adds_to_stack_trace()
298          except ValueError:
299              logger.exception("An error happened ")
300  
301          # Use `capfd` to capture the output of the final structlog rendering result
302          output = capfd.readouterr().err
303          parsed_output = json.loads(output)
304          assert parsed_output == {
305              "event": "An error happened ",
306              "level": "error",
307              "timestamp": ANY,
308              "lineno": ANY,
309              "module": "test.test_logging",
310              "exception": [
311                  {
312                      "exc_notes": [],
313                      "exc_type": "ValueError",
314                      "exc_value": "This is an error",
315                      "exceptions": [],
316                      "syntax_error": None,
317                      "is_cause": False,
318                      "is_group": False,
319                      "frames": [
320                          {
321                              "filename": str(Path.cwd() / "test" / "test_logging.py"),
322                              "lineno": ANY,  # otherwise the test breaks if you add a line :-)
323                              "name": "test_logging_exceptions_json",
324                          },
325                          {
326                              "filename": str(Path.cwd() / "test" / "test_logging.py"),
327                              "lineno": ANY,  # otherwise the test breaks if you add a line :-)
328                              "name": "function_that_raises_and_adds_to_stack_trace",
329                          },
330                      ],
331                  }
332              ],
333          }
334  
335  
336  class TestLogTraceCorrelation:
337      def test_trace_log_correlation_python_logs_with_console_rendering(
338          self, spying_tracer: SpyingTracer, capfd: CaptureFixture
339      ) -> None:
340          haystack_logging.configure_logging(use_json=False)
341  
342          with spying_tracer.trace("test-operation"):
343              logger = logging.getLogger(__name__)
344              logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
345  
346          output = capfd.readouterr().err
347          assert "trace_id" not in output
348  
349      def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None:
350          haystack_logging.configure_logging(use_json=True)
351  
352          with spying_tracer.trace("test-operation") as span:
353              logger = logging.getLogger(__name__)
354              logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
355  
356          output = capfd.readouterr().err
357          parsed_output = json.loads(output)
358  
359          assert parsed_output == {
360              "event": "Hello, structured logging!",
361              "key1": "value1",
362              "key2": "value2",
363              "level": "warning",
364              "timestamp": ANY,
365              "trace_id": span.trace_id,
366              "span_id": span.span_id,
367              "lineno": ANY,
368              "module": "test.test_logging",
369          }
370  
371      def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None:
372          haystack_logging.configure_logging(use_json=True)
373  
374          logger = logging.getLogger(__name__)
375  
376          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
377  
378          output = capfd.readouterr().err
379          parsed_output = json.loads(output)
380  
381          assert parsed_output == {
382              "event": "Hello, structured logging!",
383              "key1": "value1",
384              "key2": "value2",
385              "level": "warning",
386              "timestamp": ANY,
387              "lineno": ANY,
388              "module": "test.test_logging",
389          }
390  
391      def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None:
392          haystack_logging.configure_logging(use_json=True)
393  
394          logger = logging.getLogger(__name__)
395  
396          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
397  
398          output = capfd.readouterr().err
399          parsed_output = json.loads(output)
400  
401          assert parsed_output == {
402              "event": "Hello, structured logging!",
403              "key1": "value1",
404              "key2": "value2",
405              "level": "warning",
406              "timestamp": ANY,
407              "lineno": ANY,
408              "module": "test.test_logging",
409          }
410  
411  
412  class TestCompositeLogger:
413      def test_correct_stack_level_with_stdlib_rendering(
414          self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
415      ) -> None:
416          monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
417          haystack_logging.configure_logging()
418  
419          logger = logging.getLogger(__name__)
420          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
421  
422          # the pytest fixture caplog only captures logs being rendered from the stdlib logging module
423          assert caplog.messages == ["Hello, structured logging!"]
424          assert caplog.records[0].name == "test.test_logging"
425  
426          # Nothing should be captured by capfd since structlog is not configured
427          assert capfd.readouterr().err == ""
428  
429      def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None:
430          haystack_logging.configure_logging(use_json=False)
431  
432          logger = haystack_logging.getLogger(__name__)
433          logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
434  
435          output = capfd.readouterr().err
436          assert "test.test_logging" in output
437  
438      @pytest.mark.parametrize(
439          "method, expected_level",
440          [
441              ("debug", "debug"),
442              ("info", "info"),
443              ("warning", "warning"),
444              ("error", "error"),
445              ("fatal", "critical"),
446              ("exception", "error"),
447              ("critical", "critical"),
448          ],
449      )
450      def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None:
451          haystack_logging.configure_logging(use_json=True)
452  
453          logger = haystack_logging.getLogger(__name__)
454  
455          logger.setLevel(logging.DEBUG)
456  
457          getattr(logger, method)("Hello, structured {key}!", key="logging", key1="value1", key2="value2")
458  
459          output = capfd.readouterr().err
460          parsed_output = json.loads(output)  # should not raise an error
461  
462          assert parsed_output == {
463              "event": "Hello, structured logging!",
464              "key": "logging",
465              "key1": "value1",
466              "key2": "value2",
467              "level": expected_level,
468              "timestamp": ANY,
469              "lineno": ANY,
470              "module": "test.test_logging",
471          }
472  
473      def test_log(self, capfd: LogCaptureFixture) -> None:
474          haystack_logging.configure_logging(use_json=True)
475  
476          logger = haystack_logging.getLogger(__name__)
477          logger.setLevel(logging.DEBUG)
478  
479          logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2")
480  
481          output = capfd.readouterr().err
482          parsed_output = json.loads(output)
483  
484          assert parsed_output == {
485              "event": "Hello, structured 'logging'!",
486              "key": "logging",
487              "key1": "value1",
488              "key2": "value2",
489              "level": "debug",
490              "timestamp": ANY,
491              "lineno": ANY,
492              "module": "test.test_logging",
493          }
494  
495      def test_log_json_content(self, capfd: LogCaptureFixture) -> None:
496          haystack_logging.configure_logging(use_json=True)
497  
498          logger = haystack_logging.getLogger(__name__)
499          logger.setLevel(logging.DEBUG)
500  
501          logger.log(logging.DEBUG, 'Hello, structured: {"key": "value"}', key="logging", key1="value1", key2="value2")
502  
503          output = capfd.readouterr().err
504          parsed_output = json.loads(output)
505  
506          assert parsed_output == {
507              "event": 'Hello, structured: {"key": "value"}',
508              "key": "logging",
509              "key1": "value1",
510              "key2": "value2",
511              "level": "debug",
512              "timestamp": ANY,
513              "lineno": ANY,
514              "module": "test.test_logging",
515          }
516  
517      def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None:
518          haystack_logging.configure_logging(use_json=True)
519  
520          logger = haystack_logging.getLogger(__name__)
521          logger.setLevel(logging.DEBUG)
522  
523          logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2")
524  
525          output = capfd.readouterr().err
526          parsed_output = json.loads(output)
527  
528          assert parsed_output == {
529              "event": "Hello, structured '<class '_pytest.logging.LogCaptureFixture'>'!",
530              "key": "<class '_pytest.logging.LogCaptureFixture'>",
531              "key1": "value1",
532              "key2": "value2",
533              "level": "debug",
534              "timestamp": ANY,
535              "lineno": ANY,
536              "module": "test.test_logging",
537          }
538  
539      @pytest.mark.parametrize(
540          "method, expected_level",
541          [
542              ("debug", "debug"),
543              ("info", "info"),
544              ("warning", "warning"),
545              ("error", "error"),
546              ("fatal", "critical"),
547              ("exception", "exception"),
548              ("critical", "critical"),
549          ],
550      )
551      def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None:
552          haystack_logging.configure_logging(use_json=True)
553  
554          logger = haystack_logging.getLogger(__name__)
555          logger.setLevel(logging.DEBUG)
556  
557          with pytest.raises(TypeError):
558              getattr(logger, method)("Hello, structured logging %s!", "logging")
559  
560      @pytest.mark.parametrize(
561          "method, expected_level",
562          [
563              ("debug", "debug"),
564              ("info", "info"),
565              ("warning", "warning"),
566              ("error", "error"),
567              ("fatal", "critical"),
568              ("exception", "exception"),
569              ("critical", "critical"),
570          ],
571      )
572      def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None:
573          haystack_logging.configure_logging(use_json=True)
574  
575          logger = haystack_logging.getLogger(__name__)
576          logger.setLevel(logging.DEBUG)
577  
578          # does not raise - hence we need to check this separately
579          getattr(logger, method)("Hello, structured logging %s!", key="logging")
580  
581      def test_that_haystack_logger_is_used(self) -> None:
582          """Forces the usage of the Haystack logger instead of the standard library logger."""
583          allowed_list = [Path("haystack") / "logging.py"]
584          for root, _, files in os.walk("haystack"):
585              for file in files:
586                  path = Path(root) / file
587  
588                  if not path.suffix.endswith(".py"):
589                      continue
590  
591                  if path in allowed_list:
592                      continue
593  
594                  content = path.read_text(encoding="utf-8")
595  
596                  # that looks like somebody is using our standard logger
597                  if " logging.getLogger" in content:
598                      haystack_logger_in_content = " haystack import logging" in content or ", logging" in content
599                      assert haystack_logger_in_content, (
600                          f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the "
601                          f"standard library logger and add plenty of keyword arguments."
602                      )