_environment.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import os 6 import platform 7 import sys 8 from typing import Any 9 10 from haystack.version import __version__ 11 12 # This value cannot change during the lifetime of the process 13 _IS_DOCKER_CACHE = None 14 15 16 def _str_in_any_line_of_file(s: str, path: str) -> bool: 17 with open(path) as f: 18 return any(s in line for line in f) 19 20 21 def _in_podman() -> bool: 22 """ 23 Check if the code is running in a Podman container. 24 25 Podman run would create the file /run/.containernv, see: 26 https://github.com/containers/podman/blob/main/docs/source/markdown/podman-run.1.md.in#L31 27 """ 28 return os.path.exists("/run/.containerenv") 29 30 31 def _has_dockerenv() -> bool: 32 """ 33 Check if the code is running in a Docker container. 34 35 This might not work anymore at some point (even if it's been a while now), see: 36 https://github.com/moby/moby/issues/18355#issuecomment-220484748 37 """ 38 return os.path.exists("/.dockerenv") 39 40 41 def _has_docker_cgroup_v1() -> bool: 42 """ 43 This only works with cgroups v1. 44 """ 45 path = "/proc/self/cgroup" # 'self' should be always symlinked to the actual PID 46 return os.path.isfile(path) and _str_in_any_line_of_file("docker", path) 47 48 49 def _has_docker_cgroup_v2() -> bool: 50 """ 51 Check if the code is running in a Docker container using the cgroups v2 version. 52 53 inspired from: https://github.com/jenkinsci/docker-workflow-plugin/blob/master/src/main/java/org/jenkinsci/plugins/docker/workflow/client/DockerClient.java 54 """ 55 path = "/proc/self/mountinfo" # 'self' should be always symlinked to the actual PID 56 return os.path.isfile(path) and _str_in_any_line_of_file("/docker/containers/", path) 57 58 59 def _is_containerized() -> bool | None: 60 """ 61 This code is based on the popular 'is-docker' package for node.js 62 """ 63 global _IS_DOCKER_CACHE 64 65 if _IS_DOCKER_CACHE is None: 66 _IS_DOCKER_CACHE = _in_podman() or _has_dockerenv() or _has_docker_cgroup_v1() or _has_docker_cgroup_v2() 67 68 return _IS_DOCKER_CACHE 69 70 71 def collect_system_specs() -> dict[str, Any]: 72 """ 73 Collects meta-data about the setup that is used with Haystack. 74 75 Data collected includes: operating system, python version, Haystack version, transformers version, 76 pytorch version, number of GPUs, execution environment. 77 78 These values are highly unlikely to change during the runtime of the pipeline, 79 so they're collected only once. 80 """ 81 return { 82 "libraries.haystack": __version__, 83 "os.containerized": _is_containerized(), 84 "os.version": platform.release(), 85 "os.family": platform.system(), 86 "os.machine": platform.machine(), 87 "python.version": platform.python_version(), 88 "hardware.cpus": os.cpu_count(), 89 "libraries.pytest": sys.modules["pytest"].__version__ if "pytest" in sys.modules.keys() else False, 90 "libraries.ipython": sys.modules["ipython"].__version__ if "ipython" in sys.modules.keys() else False, 91 "libraries.colab": sys.modules["google.colab"].__version__ if "google.colab" in sys.modules.keys() else False, 92 # NOTE: The following items are set to default values and never populated. 93 # We keep them just to make sure we don't break telemetry. 94 "hardware.gpus": 0, 95 "libraries.transformers": False, 96 "libraries.torch": False, 97 "libraries.cuda": False, 98 }