1#!/usr/bin/env python
2# vim: set fileencoding=utf-8 :
3
4"""Tools for interacting with the running computer or GPU"""
5
6import os
7import subprocess
8import shutil
9
10import psutil
11
12import logging
13
14logger = logging.getLogger(__name__)
15
16_nvidia_smi = shutil.which("nvidia-smi")
17"""Location of the nvidia-smi program, if one exists"""
18
19
20GB = float(2 ** 30)
21"""The number of bytes in a gigabyte"""
22
23
24def run_nvidia_smi(query, rename=None):
25 """Returns GPU information from query
26
27 For a comprehensive list of options and help, execute ``nvidia-smi
28 --help-query-gpu`` on a host with a GPU
29
30
31 Parameters
32 ----------
33
34 query : list
35 A list of query strings as defined by ``nvidia-smi --help-query-gpu``
36
37 rename : :py:class:`list`, Optional
38 A list of keys to yield in the return value for each entry above. It
39 gives you the opportunity to rewrite some key names for convenience.
40 This list, if provided, must be of the same length as ``query``.
41
42
43 Returns
44 -------
45
46 data : :py:class:`tuple`, None
47 An ordered dictionary (organized as 2-tuples) containing the queried
48 parameters (``rename`` versions). If ``nvidia-smi`` is not available,
49 returns ``None``. Percentage information is left alone,
50 memory information is transformed to gigabytes (floating-point).
51
52 """
53
54 if _nvidia_smi is not None:
55
56 if rename is None:
57 rename = query
58 else:
59 assert len(rename) == len(query)
60
61 values = subprocess.getoutput(
62 "%s --query-gpu=%s --format=csv,noheader"
63 % (_nvidia_smi, ",".join(query))
64 )
65 values = [k.strip() for k in values.split(",")]
66 t_values = []
67 for k in values:
68 if k.endswith("%"):
69 t_values.append(float(k[:-1].strip()))
70 elif k.endswith("MiB"):
71 t_values.append(float(k[:-3].strip()) / 1024)
72 else:
73 t_values.append(k) #unchanged
74 return tuple(zip(rename, t_values))
75
76
77def gpu_constants():
78 """Returns GPU (static) information using nvidia-smi
79
80 See :py:func:`run_nvidia_smi` for operational details.
81
82 Returns
83 -------
84
85 data : :py:class:`tuple`, None
86 If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
87 return an ordered dictionary (organized as 2-tuples) containing the
88 following ``nvidia-smi`` query information:
89
90 * ``gpu_name``, as ``gpu_name`` (:py:class:`str`)
91 * ``driver_version``, as ``gpu_driver_version`` (:py:class:`str`)
92 * ``memory.total``, as ``gpu_memory_total`` (transformed to gigabytes,
93 :py:class:`float`)
94
95 """
96
97 return run_nvidia_smi(
98 ("gpu_name", "driver_version", "memory.total"),
99 ("gpu_name", "gpu_driver_version", "gpu_memory_total"),
100 )
101
102
103def gpu_log():
104 """Returns GPU information about current non-static status using nvidia-smi
105
106 See :py:func:`run_nvidia_smi` for operational details.
107
108 Returns
109 -------
110
111 data : :py:class:`tuple`, None
112 If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
113 return an ordered dictionary (organized as 2-tuples) containing the
114 following ``nvidia-smi`` query information:
115
116 * ``memory.used``, as ``gpu_memory_used`` (transformed to gigabytes,
117 :py:class:`float`)
118 * ``memory.free``, as ``gpu_memory_free`` (transformed to gigabytes,
119 :py:class:`float`)
120 * ``utilization.memory``, as ``gpu_memory_percent``,
121 (:py:class:`float`, in percent)
122 * ``utilization.gpu``, as ``gpu_utilization``,
123 (:py:class:`float`, in percent)
124
125 """
126
127 return run_nvidia_smi(
128 ("memory.used", "memory.free", "utilization.memory", "utilization.gpu"),
129 (
130 "gpu_memory_used",
131 "gpu_memory_free",
132 "gpu_memory_percent",
133 "gpu_percent",
134 ),
135 )
136
137
138_CLUSTER = []
139"""List of processes currently being monitored"""
140
141
142def cpu_constants():
143 """Returns static CPU information about the current system.
144
145
146 Returns
147 -------
148
149 data : tuple
150 An ordered dictionary (organized as 2-tuples) containing these entries:
151
152 0. ``cpu_memory_total`` (:py:class:`float`): total memory available,
153 in gigabytes
154 1. ``cpu_count`` (:py:class:`int`): number of logical CPUs available
155
156 """
157
158 return (
159 ("cpu_memory_total", psutil.virtual_memory().total / GB),
160 ("cpu_count", psutil.cpu_count(logical=True)),
161 )
162
163
164def cpu_log():
165 """Returns process (+child) information using ``psutil``.
166
167 This call examines the current process plus any spawn child and returns the
168 combined resource usage summary for the process group.
169
170
171 Returns
172 -------
173
174 data : tuple
175 An ordered dictionary (organized as 2-tuples) containing these entries:
176
177 0. ``cpu_memory_used`` (:py:class:`float`): total memory used from
178 the system, in gigabytes
179 1. ``cpu_rss`` (:py:class:`float`): RAM currently used by
180 process and children, in gigabytes
181 2. ``cpu_vms`` (:py:class:`float`): total memory (RAM + swap) currently
182 used by process and children, in gigabytes
183 3. ``cpu_percent`` (:py:class:`float`): percentage of the total CPU
184 used by this process and children (recursively) since last call
185 (first time called should be ignored). This number depends on the
186 number of CPUs in the system and can be greater than 100%
187 4. ``cpu_processes`` (:py:class:`int`): total number of processes
188 including self and children (recursively)
189 5. ``cpu_open_files`` (:py:class:`int`): total number of open files by
190 self and children
191
192 """
193
194 global _CLUSTER
195 if (not _CLUSTER) or (_CLUSTER[0] != psutil.Process()): # initialization
196 this = psutil.Process()
197 _CLUSTER = [this] + this.children(recursive=True)
198 # touch cpu_percent() at least once for all
199 [k.cpu_percent(interval=None) for k in _CLUSTER]
200 else:
201 # check all cluster components and update process list
202 # done so we can keep the cpu_percent() initialization
203 children = _CLUSTER[0].children()
204 stored_children = set(_CLUSTER[1:])
205 current_children = set(_CLUSTER[0].children())
206 keep_children = stored_children - current_children
207 new_children = current_children - stored_children
208 [k.cpu_percent(interval=None) for k in new_children]
209 _CLUSTER = _CLUSTER[:1] + list(keep_children) + list(new_children)
210
211 memory_info = [k.memory_info() for k in _CLUSTER]
212
213 return (
214 ("cpu_memory_used", psutil.virtual_memory().used / GB),
215 ("cpu_rss", sum([k.rss for k in memory_info]) / GB),
216 ("cpu_vms", sum([k.vms for k in memory_info]) / GB),
217 ("cpu_percent", sum(k.cpu_percent(interval=None) for k in _CLUSTER)),
218 ("cpu_processes", len(_CLUSTER)),
219 ("cpu_open_files", sum(len(k.open_files()) for k in _CLUSTER)),
220 )