test_logging.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import builtins 6 import json 7 import logging 8 import os 9 import sys 10 from collections.abc import Generator 11 from datetime import datetime, timezone 12 from pathlib import Path 13 from unittest.mock import ANY 14 15 import pytest 16 import structlog 17 from _pytest.capture import CaptureFixture 18 from _pytest.logging import LogCaptureFixture 19 from _pytest.monkeypatch import MonkeyPatch 20 21 import haystack.utils.jupyter 22 from haystack import logging as haystack_logging 23 from test.tracing.utils import SpyingTracer 24 25 26 @pytest.fixture(autouse=True) 27 def reset_logging_config() -> None: 28 old_handlers = logging.root.handlers.copy() 29 yield 30 # Reset the logging configuration after each test to avoid impacting other tests 31 logging.root.handlers = old_handlers 32 33 34 @pytest.fixture() 35 def set_context_var_key() -> Generator[str, None, None]: 36 structlog.contextvars.bind_contextvars(context_var="value") 37 38 yield "context_var" 39 40 structlog.contextvars.unbind_contextvars("context_var") 41 42 43 class TestSkipLoggingConfiguration: 44 def test_skip_logging_configuration( 45 self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture 46 ) -> None: 47 monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") 48 haystack_logging.configure_logging() 49 50 logger = logging.getLogger(__name__) 51 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 52 53 # the pytest fixture caplog only captures logs being rendered from the stdlib logging module 54 assert caplog.messages == ["Hello, structured logging!"] 55 56 # Nothing should be captured by capfd since structlog is not configured 57 assert capfd.readouterr().err == "" 58 59 60 class TestStructuredLoggingConsoleRendering: 61 def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: 62 haystack_logging.configure_logging(use_json=False) 63 64 logger = logging.getLogger(__name__) 65 logger.setLevel(logging.INFO) 66 logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 67 68 # Use `capfd` to capture the output of the final structlog rendering result 69 output = capfd.readouterr().err 70 assert output == "" 71 72 def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None: 73 haystack_logging.configure_logging(use_json=False) 74 75 logger = logging.getLogger(__name__) 76 logger.setLevel(logging.DEBUG) 77 78 logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 79 80 # Use `capfd` to capture the output of the final structlog rendering result 81 output = capfd.readouterr().err 82 assert "Hello, structured logging" in output 83 assert "{" not in output, "Seems JSON rendering is enabled when it should not be" 84 85 def test_console_rendered_structured_log_even_if_no_tty_but_python_config( 86 self, capfd: CaptureFixture, monkeypatch: MonkeyPatch 87 ) -> None: 88 monkeypatch.setattr(sys.stderr, "isatty", lambda: False) 89 90 haystack_logging.configure_logging(use_json=False) 91 92 logger = logging.getLogger(__name__) 93 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 94 95 # Use `capfd` to capture the output of the final structlog rendering result 96 output = capfd.readouterr().err 97 98 assert "Hello, structured logging!" in output 99 assert "{" not in output, "Seems JSON rendering is enabled when it should not be" 100 101 def test_console_rendered_structured_log_if_in_ipython( 102 self, capfd: CaptureFixture, monkeypatch: MonkeyPatch 103 ) -> None: 104 monkeypatch.setattr(builtins, "__IPYTHON__", "true", raising=False) 105 106 haystack_logging.configure_logging() 107 108 logger = logging.getLogger(__name__) 109 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 110 111 # Use `capfd` to capture the output of the final structlog rendering result 112 output = capfd.readouterr().err 113 114 assert "Hello, structured logging!" in output 115 assert "{" not in output, "Seems JSON rendering is enabled when it should not be" 116 117 def test_console_rendered_structured_log_even_in_jupyter( 118 self, capfd: CaptureFixture, monkeypatch: MonkeyPatch 119 ) -> None: 120 monkeypatch.setattr(haystack.utils.jupyter, haystack.utils.jupyter.is_in_jupyter.__name__, lambda: True) 121 122 haystack_logging.configure_logging() 123 124 logger = logging.getLogger(__name__) 125 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 126 127 # Use `capfd` to capture the output of the final structlog rendering result 128 output = capfd.readouterr().err 129 130 assert "Hello, structured logging!" in output 131 assert "{" not in output, "Seems JSON rendering is enabled when it should not be" 132 133 def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( 134 self, capfd: CaptureFixture, monkeypatch: MonkeyPatch 135 ) -> None: 136 monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "false") 137 138 haystack_logging.configure_logging() 139 140 logger = logging.getLogger(__name__) 141 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 142 143 # Use `capfd` to capture the output of the final structlog rendering result 144 output = capfd.readouterr().err 145 146 assert "Hello, structured logging!" in output 147 assert "{" not in output, "Seems JSON rendering is enabled when it should not be" 148 149 def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: 150 haystack_logging.configure_logging() 151 152 logger = logging.getLogger(__name__) 153 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 154 155 # Use `capfd` to capture the output of the final structlog rendering result 156 output = capfd.readouterr().err 157 158 # Only check for the minute to be a bit more robust 159 today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "") 160 assert today in output 161 162 log_level = "warning" 163 assert log_level in output 164 165 assert "Hello, structured logging!" in output 166 167 assert "key1" in output 168 assert "value1" in output 169 170 def test_logging_exceptions(self, capfd: CaptureFixture) -> None: 171 haystack_logging.configure_logging() 172 173 logger = logging.getLogger(__name__) 174 175 def function_that_raises_and_adds_to_stack_trace(): 176 raise ValueError("This is an error") 177 178 try: 179 function_that_raises_and_adds_to_stack_trace() 180 except ValueError: 181 logger.exception("An error happened") 182 183 # Use `capfd` to capture the output of the final structlog rendering result 184 output = capfd.readouterr().err 185 186 assert "An error happened" in output 187 188 def test_logging_of_contextvars(self, capfd: CaptureFixture, set_context_var_key: str) -> None: 189 haystack_logging.configure_logging() 190 191 logger = logging.getLogger(__name__) 192 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 193 194 # Use `capfd` to capture the output of the final structlog rendering result 195 output = capfd.readouterr().err 196 197 assert set_context_var_key in output 198 199 200 class TestStructuredLoggingJSONRendering: 201 def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: 202 monkeypatch.setattr(sys.stderr, "isatty", lambda: False) 203 haystack_logging.configure_logging() 204 205 logger = logging.getLogger(__name__) 206 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 207 208 # Use `capfd` to capture the output of the final structlog rendering result 209 output = capfd.readouterr().err 210 parsed_output = json.loads(output) # should not raise an error 211 212 assert parsed_output == { 213 "event": "Hello, structured logging!", 214 "key1": "value1", 215 "key2": "value2", 216 "level": "warning", 217 "timestamp": ANY, 218 "lineno": ANY, 219 "module": "test.test_logging", 220 } 221 222 def test_logging_as_json(self, capfd: CaptureFixture) -> None: 223 haystack_logging.configure_logging(use_json=True) 224 225 logger = logging.getLogger(__name__) 226 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 227 228 # Use `capfd` to capture the output of the final structlog rendering result 229 output = capfd.readouterr().err 230 parsed_output = json.loads(output) # should not raise an error 231 232 assert parsed_output == { 233 "event": "Hello, structured logging!", 234 "key1": "value1", 235 "key2": "value2", 236 "level": "warning", 237 "timestamp": ANY, 238 "lineno": ANY, 239 "module": "test.test_logging", 240 } 241 242 def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: 243 monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") 244 haystack_logging.configure_logging() 245 246 logger = logging.getLogger(__name__) 247 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 248 249 # Use `capfd` to capture the output of the final structlog rendering result 250 output = capfd.readouterr().err 251 parsed_output = json.loads(output) # should not raise an error 252 253 assert parsed_output == { 254 "event": "Hello, structured logging!", 255 "key1": "value1", 256 "key2": "value2", 257 "level": "warning", 258 "timestamp": ANY, 259 "lineno": ANY, 260 "module": "test.test_logging", 261 } 262 263 def test_logging_of_contextvars( 264 self, capfd: CaptureFixture, monkeypatch: MonkeyPatch, set_context_var_key: str 265 ) -> None: 266 monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") 267 haystack_logging.configure_logging() 268 269 logger = logging.getLogger(__name__) 270 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 271 272 # Use `capfd` to capture the output of the final structlog rendering result 273 output = capfd.readouterr().err 274 parsed_output = json.loads(output) # should not raise an error 275 276 assert parsed_output == { 277 "event": "Hello, structured logging!", 278 "key1": "value1", 279 "key2": "value2", 280 set_context_var_key: "value", 281 "level": "warning", 282 "timestamp": ANY, 283 "lineno": ANY, 284 "module": "test.test_logging", 285 } 286 287 def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None: 288 haystack_logging.configure_logging(use_json=True) 289 290 logger = logging.getLogger(__name__) 291 292 def function_that_raises_and_adds_to_stack_trace(): 293 my_local_variable = "my_local_variable" # noqa: F841 294 raise ValueError("This is an error") 295 296 try: 297 function_that_raises_and_adds_to_stack_trace() 298 except ValueError: 299 logger.exception("An error happened ") 300 301 # Use `capfd` to capture the output of the final structlog rendering result 302 output = capfd.readouterr().err 303 parsed_output = json.loads(output) 304 assert parsed_output == { 305 "event": "An error happened ", 306 "level": "error", 307 "timestamp": ANY, 308 "lineno": ANY, 309 "module": "test.test_logging", 310 "exception": [ 311 { 312 "exc_notes": [], 313 "exc_type": "ValueError", 314 "exc_value": "This is an error", 315 "exceptions": [], 316 "syntax_error": None, 317 "is_cause": False, 318 "is_group": False, 319 "frames": [ 320 { 321 "filename": str(Path.cwd() / "test" / "test_logging.py"), 322 "lineno": ANY, # otherwise the test breaks if you add a line :-) 323 "name": "test_logging_exceptions_json", 324 }, 325 { 326 "filename": str(Path.cwd() / "test" / "test_logging.py"), 327 "lineno": ANY, # otherwise the test breaks if you add a line :-) 328 "name": "function_that_raises_and_adds_to_stack_trace", 329 }, 330 ], 331 } 332 ], 333 } 334 335 336 class TestLogTraceCorrelation: 337 def test_trace_log_correlation_python_logs_with_console_rendering( 338 self, spying_tracer: SpyingTracer, capfd: CaptureFixture 339 ) -> None: 340 haystack_logging.configure_logging(use_json=False) 341 342 with spying_tracer.trace("test-operation"): 343 logger = logging.getLogger(__name__) 344 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 345 346 output = capfd.readouterr().err 347 assert "trace_id" not in output 348 349 def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: 350 haystack_logging.configure_logging(use_json=True) 351 352 with spying_tracer.trace("test-operation") as span: 353 logger = logging.getLogger(__name__) 354 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 355 356 output = capfd.readouterr().err 357 parsed_output = json.loads(output) 358 359 assert parsed_output == { 360 "event": "Hello, structured logging!", 361 "key1": "value1", 362 "key2": "value2", 363 "level": "warning", 364 "timestamp": ANY, 365 "trace_id": span.trace_id, 366 "span_id": span.span_id, 367 "lineno": ANY, 368 "module": "test.test_logging", 369 } 370 371 def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: 372 haystack_logging.configure_logging(use_json=True) 373 374 logger = logging.getLogger(__name__) 375 376 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 377 378 output = capfd.readouterr().err 379 parsed_output = json.loads(output) 380 381 assert parsed_output == { 382 "event": "Hello, structured logging!", 383 "key1": "value1", 384 "key2": "value2", 385 "level": "warning", 386 "timestamp": ANY, 387 "lineno": ANY, 388 "module": "test.test_logging", 389 } 390 391 def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: 392 haystack_logging.configure_logging(use_json=True) 393 394 logger = logging.getLogger(__name__) 395 396 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 397 398 output = capfd.readouterr().err 399 parsed_output = json.loads(output) 400 401 assert parsed_output == { 402 "event": "Hello, structured logging!", 403 "key1": "value1", 404 "key2": "value2", 405 "level": "warning", 406 "timestamp": ANY, 407 "lineno": ANY, 408 "module": "test.test_logging", 409 } 410 411 412 class TestCompositeLogger: 413 def test_correct_stack_level_with_stdlib_rendering( 414 self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture 415 ) -> None: 416 monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") 417 haystack_logging.configure_logging() 418 419 logger = logging.getLogger(__name__) 420 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 421 422 # the pytest fixture caplog only captures logs being rendered from the stdlib logging module 423 assert caplog.messages == ["Hello, structured logging!"] 424 assert caplog.records[0].name == "test.test_logging" 425 426 # Nothing should be captured by capfd since structlog is not configured 427 assert capfd.readouterr().err == "" 428 429 def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None: 430 haystack_logging.configure_logging(use_json=False) 431 432 logger = haystack_logging.getLogger(__name__) 433 logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) 434 435 output = capfd.readouterr().err 436 assert "test.test_logging" in output 437 438 @pytest.mark.parametrize( 439 "method, expected_level", 440 [ 441 ("debug", "debug"), 442 ("info", "info"), 443 ("warning", "warning"), 444 ("error", "error"), 445 ("fatal", "critical"), 446 ("exception", "error"), 447 ("critical", "critical"), 448 ], 449 ) 450 def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None: 451 haystack_logging.configure_logging(use_json=True) 452 453 logger = haystack_logging.getLogger(__name__) 454 455 logger.setLevel(logging.DEBUG) 456 457 getattr(logger, method)("Hello, structured {key}!", key="logging", key1="value1", key2="value2") 458 459 output = capfd.readouterr().err 460 parsed_output = json.loads(output) # should not raise an error 461 462 assert parsed_output == { 463 "event": "Hello, structured logging!", 464 "key": "logging", 465 "key1": "value1", 466 "key2": "value2", 467 "level": expected_level, 468 "timestamp": ANY, 469 "lineno": ANY, 470 "module": "test.test_logging", 471 } 472 473 def test_log(self, capfd: LogCaptureFixture) -> None: 474 haystack_logging.configure_logging(use_json=True) 475 476 logger = haystack_logging.getLogger(__name__) 477 logger.setLevel(logging.DEBUG) 478 479 logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2") 480 481 output = capfd.readouterr().err 482 parsed_output = json.loads(output) 483 484 assert parsed_output == { 485 "event": "Hello, structured 'logging'!", 486 "key": "logging", 487 "key1": "value1", 488 "key2": "value2", 489 "level": "debug", 490 "timestamp": ANY, 491 "lineno": ANY, 492 "module": "test.test_logging", 493 } 494 495 def test_log_json_content(self, capfd: LogCaptureFixture) -> None: 496 haystack_logging.configure_logging(use_json=True) 497 498 logger = haystack_logging.getLogger(__name__) 499 logger.setLevel(logging.DEBUG) 500 501 logger.log(logging.DEBUG, 'Hello, structured: {"key": "value"}', key="logging", key1="value1", key2="value2") 502 503 output = capfd.readouterr().err 504 parsed_output = json.loads(output) 505 506 assert parsed_output == { 507 "event": 'Hello, structured: {"key": "value"}', 508 "key": "logging", 509 "key1": "value1", 510 "key2": "value2", 511 "level": "debug", 512 "timestamp": ANY, 513 "lineno": ANY, 514 "module": "test.test_logging", 515 } 516 517 def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: 518 haystack_logging.configure_logging(use_json=True) 519 520 logger = haystack_logging.getLogger(__name__) 521 logger.setLevel(logging.DEBUG) 522 523 logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2") 524 525 output = capfd.readouterr().err 526 parsed_output = json.loads(output) 527 528 assert parsed_output == { 529 "event": "Hello, structured '<class '_pytest.logging.LogCaptureFixture'>'!", 530 "key": "<class '_pytest.logging.LogCaptureFixture'>", 531 "key1": "value1", 532 "key2": "value2", 533 "level": "debug", 534 "timestamp": ANY, 535 "lineno": ANY, 536 "module": "test.test_logging", 537 } 538 539 @pytest.mark.parametrize( 540 "method, expected_level", 541 [ 542 ("debug", "debug"), 543 ("info", "info"), 544 ("warning", "warning"), 545 ("error", "error"), 546 ("fatal", "critical"), 547 ("exception", "exception"), 548 ("critical", "critical"), 549 ], 550 ) 551 def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None: 552 haystack_logging.configure_logging(use_json=True) 553 554 logger = haystack_logging.getLogger(__name__) 555 logger.setLevel(logging.DEBUG) 556 557 with pytest.raises(TypeError): 558 getattr(logger, method)("Hello, structured logging %s!", "logging") 559 560 @pytest.mark.parametrize( 561 "method, expected_level", 562 [ 563 ("debug", "debug"), 564 ("info", "info"), 565 ("warning", "warning"), 566 ("error", "error"), 567 ("fatal", "critical"), 568 ("exception", "exception"), 569 ("critical", "critical"), 570 ], 571 ) 572 def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None: 573 haystack_logging.configure_logging(use_json=True) 574 575 logger = haystack_logging.getLogger(__name__) 576 logger.setLevel(logging.DEBUG) 577 578 # does not raise - hence we need to check this separately 579 getattr(logger, method)("Hello, structured logging %s!", key="logging") 580 581 def test_that_haystack_logger_is_used(self) -> None: 582 """Forces the usage of the Haystack logger instead of the standard library logger.""" 583 allowed_list = [Path("haystack") / "logging.py"] 584 for root, _, files in os.walk("haystack"): 585 for file in files: 586 path = Path(root) / file 587 588 if not path.suffix.endswith(".py"): 589 continue 590 591 if path in allowed_list: 592 continue 593 594 content = path.read_text(encoding="utf-8") 595 596 # that looks like somebody is using our standard logger 597 if " logging.getLogger" in content: 598 haystack_logger_in_content = " haystack import logging" in content or ", logging" in content 599 assert haystack_logger_in_content, ( 600 f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the " 601 f"standard library logger and add plenty of keyword arguments." 602 )