Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# vim: set fileencoding=utf-8 :
4"""Tools for interacting with the running computer or GPU"""
6import logging
7import shutil
8import subprocess
10import psutil
12logger = logging.getLogger(__name__)
14_nvidia_smi = shutil.which("nvidia-smi")
15"""Location of the nvidia-smi program, if one exists"""
18GB = float(2 ** 30)
19"""The number of bytes in a gigabyte"""
22def run_nvidia_smi(query, rename=None):
23 """Returns GPU information from query
25 For a comprehensive list of options and help, execute ``nvidia-smi
26 --help-query-gpu`` on a host with a GPU
29 Parameters
30 ----------
32 query : list
33 A list of query strings as defined by ``nvidia-smi --help-query-gpu``
35 rename : :py:class:`list`, Optional
36 A list of keys to yield in the return value for each entry above. It
37 gives you the opportunity to rewrite some key names for convenience.
38 This list, if provided, must be of the same length as ``query``.
41 Returns
42 -------
44 data : :py:class:`tuple`, None
45 An ordered dictionary (organized as 2-tuples) containing the queried
46 parameters (``rename`` versions). If ``nvidia-smi`` is not available,
47 returns ``None``. Percentage information is left alone,
48 memory information is transformed to gigabytes (floating-point).
50 """
52 if _nvidia_smi is not None:
54 if rename is None:
55 rename = query
56 else:
57 assert len(rename) == len(query)
59 values = subprocess.getoutput(
60 "%s --query-gpu=%s --format=csv,noheader"
61 % (_nvidia_smi, ",".join(query))
62 )
63 values = [k.strip() for k in values.split(",")]
64 t_values = []
65 for k in values:
66 if k.endswith("%"):
67 t_values.append(float(k[:-1].strip()))
68 elif k.endswith("MiB"):
69 t_values.append(float(k[:-3].strip()) / 1024)
70 else:
71 t_values.append(k) # unchanged
72 return tuple(zip(rename, t_values))
75def gpu_constants():
76 """Returns GPU (static) information using nvidia-smi
78 See :py:func:`run_nvidia_smi` for operational details.
80 Returns
81 -------
83 data : :py:class:`tuple`, None
84 If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
85 return an ordered dictionary (organized as 2-tuples) containing the
86 following ``nvidia-smi`` query information:
88 * ``gpu_name``, as ``gpu_name`` (:py:class:`str`)
89 * ``driver_version``, as ``gpu_driver_version`` (:py:class:`str`)
90 * ``memory.total``, as ``gpu_memory_total`` (transformed to gigabytes,
91 :py:class:`float`)
93 """
95 return run_nvidia_smi(
96 ("gpu_name", "driver_version", "memory.total"),
97 ("gpu_name", "gpu_driver_version", "gpu_memory_total"),
98 )
101def gpu_log():
102 """Returns GPU information about current non-static status using nvidia-smi
104 See :py:func:`run_nvidia_smi` for operational details.
106 Returns
107 -------
109 data : :py:class:`tuple`, None
110 If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
111 return an ordered dictionary (organized as 2-tuples) containing the
112 following ``nvidia-smi`` query information:
114 * ``memory.used``, as ``gpu_memory_used`` (transformed to gigabytes,
115 :py:class:`float`)
116 * ``memory.free``, as ``gpu_memory_free`` (transformed to gigabytes,
117 :py:class:`float`)
118 * ``100*memory.used/memory.total``, as ``gpu_memory_percent``,
119 (:py:class:`float`, in percent)
120 * ``utilization.gpu``, as ``gpu_percent``,
121 (:py:class:`float`, in percent)
123 """
125 retval = run_nvidia_smi(
126 (
127 "memory.total",
128 "memory.used",
129 "memory.free",
130 "utilization.gpu",
131 ),
132 (
133 "gpu_memory_total",
134 "gpu_memory_used",
135 "gpu_memory_free",
136 "gpu_percent",
137 ),
138 )
140 # re-compose the output to generate expected values
141 return (
142 retval[1], # gpu_memory_used
143 retval[2], # gpu_memory_free
144 ("gpu_memory_percent", 100 * (retval[1][1] / retval[0][1])),
145 retval[3], # gpu_percent
146 )
149_CLUSTER = []
150"""List of processes currently being monitored"""
153def cpu_constants():
154 """Returns static CPU information about the current system.
157 Returns
158 -------
160 data : tuple
161 An ordered dictionary (organized as 2-tuples) containing these entries:
163 0. ``cpu_memory_total`` (:py:class:`float`): total memory available,
164 in gigabytes
165 1. ``cpu_count`` (:py:class:`int`): number of logical CPUs available
167 """
169 return (
170 ("cpu_memory_total", psutil.virtual_memory().total / GB),
171 ("cpu_count", psutil.cpu_count(logical=True)),
172 )
175def cpu_log():
176 """Returns process (+child) information using ``psutil``.
178 This call examines the current process plus any spawn child and returns the
179 combined resource usage summary for the process group.
182 Returns
183 -------
185 data : tuple
186 An ordered dictionary (organized as 2-tuples) containing these entries:
188 0. ``cpu_memory_used`` (:py:class:`float`): total memory used from
189 the system, in gigabytes
190 1. ``cpu_rss`` (:py:class:`float`): RAM currently used by
191 process and children, in gigabytes
192 2. ``cpu_vms`` (:py:class:`float`): total memory (RAM + swap) currently
193 used by process and children, in gigabytes
194 3. ``cpu_percent`` (:py:class:`float`): percentage of the total CPU
195 used by this process and children (recursively) since last call
196 (first time called should be ignored). This number depends on the
197 number of CPUs in the system and can be greater than 100%
198 4. ``cpu_processes`` (:py:class:`int`): total number of processes
199 including self and children (recursively)
200 5. ``cpu_open_files`` (:py:class:`int`): total number of open files by
201 self and children
203 """
205 global _CLUSTER
206 if (not _CLUSTER) or (_CLUSTER[0] != psutil.Process()): # initialization
207 this = psutil.Process()
208 _CLUSTER = [this] + this.children(recursive=True)
209 # touch cpu_percent() at least once for all
210 [k.cpu_percent(interval=None) for k in _CLUSTER]
211 else:
212 # check all cluster components and update process list
213 # done so we can keep the cpu_percent() initialization
214 stored_children = set(_CLUSTER[1:])
215 current_children = set(_CLUSTER[0].children())
216 keep_children = stored_children - current_children
217 new_children = current_children - stored_children
218 [k.cpu_percent(interval=None) for k in new_children]
219 _CLUSTER = _CLUSTER[:1] + list(keep_children) + list(new_children)
221 memory_info = [k.memory_info() for k in _CLUSTER]
223 return (
224 ("cpu_memory_used", psutil.virtual_memory().used / GB),
225 ("cpu_rss", sum([k.rss for k in memory_info]) / GB),
226 ("cpu_vms", sum([k.vms for k in memory_info]) / GB),
227 ("cpu_percent", sum(k.cpu_percent(interval=None) for k in _CLUSTER)),
228 ("cpu_processes", len(_CLUSTER)),
229 ("cpu_open_files", sum(len(k.open_files()) for k in _CLUSTER)),
230 )