/ tests / system_metrics / test_collect_metrics.py
test_collect_metrics.py
 1  from mlflow.system_metrics.metrics.cpu_monitor import CPUMonitor
 2  from mlflow.system_metrics.metrics.disk_monitor import DiskMonitor
 3  from mlflow.system_metrics.metrics.gpu_monitor import GPUMonitor
 4  from mlflow.system_metrics.metrics.network_monitor import NetworkMonitor
 5  
 6  
 7  def test_cpu_monitor():
 8      cpu_monitor = CPUMonitor()
 9      cpu_monitor.collect_metrics()
10  
11      assert isinstance(cpu_monitor.metrics["cpu_utilization_percentage"], list)
12      assert isinstance(cpu_monitor.metrics["system_memory_usage_megabytes"], list)
13  
14      cpu_monitor.collect_metrics()
15      aggregated_metrics = cpu_monitor.aggregate_metrics()
16      assert isinstance(aggregated_metrics["cpu_utilization_percentage"], float)
17      assert isinstance(aggregated_metrics["system_memory_usage_megabytes"], float)
18  
19      cpu_monitor.clear_metrics()
20      assert cpu_monitor.metrics == {}
21  
22  
23  def test_gpu_monitor():
24      try:
25          gpu_monitor = GPUMonitor()
26      except Exception:
27          # If nvidia-ml-py is not installed, or there is no GPU, then `gpu_monitor` creation
28          # will fail. In this case we skip the test.
29          return
30  
31      gpu_monitor.collect_metrics()
32  
33      assert isinstance(gpu_monitor.metrics["gpu_0_memory_usage_percentage"], list)
34      assert isinstance(gpu_monitor.metrics["gpu_0_memory_usage_megabytes"], list)
35      assert isinstance(gpu_monitor.metrics["gpu_0_utilization_percentage"], list)
36      assert isinstance(gpu_monitor.metrics["gpu_0_power_usage_watts"], list)
37      assert isinstance(gpu_monitor.metrics["gpu_0_power_usage_percentage"], list)
38  
39      gpu_monitor.collect_metrics()
40      aggregated_metrics = gpu_monitor.aggregate_metrics()
41      assert isinstance(aggregated_metrics["gpu_0_memory_usage_percentage"], float)
42      assert isinstance(aggregated_metrics["gpu_0_memory_usage_megabytes"], float)
43      assert isinstance(aggregated_metrics["gpu_0_utilization_percentage"], float)
44      assert isinstance(aggregated_metrics["gpu_0_power_usage_watts"], float)
45      assert isinstance(aggregated_metrics["gpu_0_power_usage_percentage"], float)
46  
47      gpu_monitor.clear_metrics()
48      assert len(gpu_monitor.metrics.keys) == 0
49  
50  
51  def test_disk_monitor():
52      disk_monitor = DiskMonitor()
53      disk_monitor.collect_metrics()
54  
55      assert len(disk_monitor.metrics.keys()) > 0
56      assert isinstance(disk_monitor.metrics["disk_usage_percentage"], list)
57      assert isinstance(disk_monitor.metrics["disk_usage_megabytes"], list)
58      assert isinstance(disk_monitor.metrics["disk_available_megabytes"], list)
59  
60      disk_monitor.collect_metrics()
61      aggregated_metrics = disk_monitor.aggregate_metrics()
62      assert len(aggregated_metrics.keys()) > 0
63  
64      assert isinstance(aggregated_metrics["disk_usage_percentage"], float)
65      assert isinstance(aggregated_metrics["disk_usage_megabytes"], float)
66      assert isinstance(aggregated_metrics["disk_available_megabytes"], float)
67  
68      disk_monitor.clear_metrics()
69      assert len(disk_monitor.metrics.keys()) == 0
70  
71  
72  def test_network_monitor():
73      network_monitor = NetworkMonitor()
74      network_monitor.collect_metrics()
75  
76      assert len(network_monitor.metrics.keys()) > 0
77      assert isinstance(network_monitor.metrics["network_receive_megabytes"], float)
78      assert isinstance(network_monitor.metrics["network_transmit_megabytes"], float)
79  
80      network_monitor.collect_metrics()
81      aggregated_metrics = network_monitor.aggregate_metrics()
82      assert len(aggregated_metrics.keys()) > 0
83  
84      assert isinstance(aggregated_metrics["network_receive_megabytes"], float)
85      assert isinstance(aggregated_metrics["network_transmit_megabytes"], float)
86  
87      network_monitor.clear_metrics()
88      assert len(network_monitor.metrics.keys()) == 0