/ haystack / telemetry / _environment.py
_environment.py
 1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 2  #
 3  # SPDX-License-Identifier: Apache-2.0
 4  
 5  import os
 6  import platform
 7  import sys
 8  from typing import Any
 9  
10  from haystack.version import __version__
11  
12  # This value cannot change during the lifetime of the process
13  _IS_DOCKER_CACHE = None
14  
15  
16  def _str_in_any_line_of_file(s: str, path: str) -> bool:
17      with open(path) as f:
18          return any(s in line for line in f)
19  
20  
21  def _in_podman() -> bool:
22      """
23      Check if the code is running in a Podman container.
24  
25      Podman run would create the file /run/.containernv, see:
26      https://github.com/containers/podman/blob/main/docs/source/markdown/podman-run.1.md.in#L31
27      """
28      return os.path.exists("/run/.containerenv")
29  
30  
31  def _has_dockerenv() -> bool:
32      """
33      Check if the code is running in a Docker container.
34  
35      This might not work anymore at some point (even if it's been a while now), see:
36      https://github.com/moby/moby/issues/18355#issuecomment-220484748
37      """
38      return os.path.exists("/.dockerenv")
39  
40  
41  def _has_docker_cgroup_v1() -> bool:
42      """
43      This only works with cgroups v1.
44      """
45      path = "/proc/self/cgroup"  # 'self' should be always symlinked to the actual PID
46      return os.path.isfile(path) and _str_in_any_line_of_file("docker", path)
47  
48  
49  def _has_docker_cgroup_v2() -> bool:
50      """
51      Check if the code is running in a Docker container using the cgroups v2 version.
52  
53      inspired from: https://github.com/jenkinsci/docker-workflow-plugin/blob/master/src/main/java/org/jenkinsci/plugins/docker/workflow/client/DockerClient.java
54      """
55      path = "/proc/self/mountinfo"  # 'self' should be always symlinked to the actual PID
56      return os.path.isfile(path) and _str_in_any_line_of_file("/docker/containers/", path)
57  
58  
59  def _is_containerized() -> bool | None:
60      """
61      This code is based on the popular 'is-docker' package for node.js
62      """
63      global _IS_DOCKER_CACHE
64  
65      if _IS_DOCKER_CACHE is None:
66          _IS_DOCKER_CACHE = _in_podman() or _has_dockerenv() or _has_docker_cgroup_v1() or _has_docker_cgroup_v2()
67  
68      return _IS_DOCKER_CACHE
69  
70  
71  def collect_system_specs() -> dict[str, Any]:
72      """
73      Collects meta-data about the setup that is used with Haystack.
74  
75      Data collected includes: operating system, python version, Haystack version, transformers version,
76      pytorch version, number of GPUs, execution environment.
77  
78      These values are highly unlikely to change during the runtime of the pipeline,
79      so they're collected only once.
80      """
81      return {
82          "libraries.haystack": __version__,
83          "os.containerized": _is_containerized(),
84          "os.version": platform.release(),
85          "os.family": platform.system(),
86          "os.machine": platform.machine(),
87          "python.version": platform.python_version(),
88          "hardware.cpus": os.cpu_count(),
89          "libraries.pytest": sys.modules["pytest"].__version__ if "pytest" in sys.modules.keys() else False,
90          "libraries.ipython": sys.modules["ipython"].__version__ if "ipython" in sys.modules.keys() else False,
91          "libraries.colab": sys.modules["google.colab"].__version__ if "google.colab" in sys.modules.keys() else False,
92          # NOTE: The following items are set to default values and never populated.
93          # We keep them just to make sure we don't break telemetry.
94          "hardware.gpus": 0,
95          "libraries.transformers": False,
96          "libraries.torch": False,
97          "libraries.cuda": False,
98      }