Coverage for /scratch/builds/bob/bob.med.tb/miniconda/conda-bld/bob.med.tb_1637571489937/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho/lib/python3.8/site-packages/bob/med/tb/utils/resources.py: 64%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

45 statements  

1#!/usr/bin/env python 

2# vim: set fileencoding=utf-8 : 

3 

4"""Tools for interacting with the running computer or GPU""" 

5 

6import os 

7import subprocess 

8import shutil 

9 

10import psutil 

11 

12import logging 

13 

14logger = logging.getLogger(__name__) 

15 

16_nvidia_smi = shutil.which("nvidia-smi") 

17"""Location of the nvidia-smi program, if one exists""" 

18 

19 

20GB = float(2 ** 30) 

21"""The number of bytes in a gigabyte""" 

22 

23 

24def run_nvidia_smi(query, rename=None): 

25 """Returns GPU information from query 

26 

27 For a comprehensive list of options and help, execute ``nvidia-smi 

28 --help-query-gpu`` on a host with a GPU 

29 

30 

31 Parameters 

32 ---------- 

33 

34 query : list 

35 A list of query strings as defined by ``nvidia-smi --help-query-gpu`` 

36 

37 rename : :py:class:`list`, Optional 

38 A list of keys to yield in the return value for each entry above. It 

39 gives you the opportunity to rewrite some key names for convenience. 

40 This list, if provided, must be of the same length as ``query``. 

41 

42 

43 Returns 

44 ------- 

45 

46 data : :py:class:`tuple`, None 

47 An ordered dictionary (organized as 2-tuples) containing the queried 

48 parameters (``rename`` versions). If ``nvidia-smi`` is not available, 

49 returns ``None``. Percentage information is left alone, 

50 memory information is transformed to gigabytes (floating-point). 

51 

52 """ 

53 

54 if _nvidia_smi is not None: 

55 

56 if rename is None: 

57 rename = query 

58 else: 

59 assert len(rename) == len(query) 

60 

61 values = subprocess.getoutput( 

62 "%s --query-gpu=%s --format=csv,noheader" 

63 % (_nvidia_smi, ",".join(query)) 

64 ) 

65 values = [k.strip() for k in values.split(",")] 

66 t_values = [] 

67 for k in values: 

68 if k.endswith("%"): 

69 t_values.append(float(k[:-1].strip())) 

70 elif k.endswith("MiB"): 

71 t_values.append(float(k[:-3].strip()) / 1024) 

72 else: 

73 t_values.append(k) #unchanged 

74 return tuple(zip(rename, t_values)) 

75 

76 

77def gpu_constants(): 

78 """Returns GPU (static) information using nvidia-smi 

79 

80 See :py:func:`run_nvidia_smi` for operational details. 

81 

82 Returns 

83 ------- 

84 

85 data : :py:class:`tuple`, None 

86 If ``nvidia-smi`` is not available, returns ``None``, otherwise, we 

87 return an ordered dictionary (organized as 2-tuples) containing the 

88 following ``nvidia-smi`` query information: 

89 

90 * ``gpu_name``, as ``gpu_name`` (:py:class:`str`) 

91 * ``driver_version``, as ``gpu_driver_version`` (:py:class:`str`) 

92 * ``memory.total``, as ``gpu_memory_total`` (transformed to gigabytes, 

93 :py:class:`float`) 

94 

95 """ 

96 

97 return run_nvidia_smi( 

98 ("gpu_name", "driver_version", "memory.total"), 

99 ("gpu_name", "gpu_driver_version", "gpu_memory_total"), 

100 ) 

101 

102 

103def gpu_log(): 

104 """Returns GPU information about current non-static status using nvidia-smi 

105 

106 See :py:func:`run_nvidia_smi` for operational details. 

107 

108 Returns 

109 ------- 

110 

111 data : :py:class:`tuple`, None 

112 If ``nvidia-smi`` is not available, returns ``None``, otherwise, we 

113 return an ordered dictionary (organized as 2-tuples) containing the 

114 following ``nvidia-smi`` query information: 

115 

116 * ``memory.used``, as ``gpu_memory_used`` (transformed to gigabytes, 

117 :py:class:`float`) 

118 * ``memory.free``, as ``gpu_memory_free`` (transformed to gigabytes, 

119 :py:class:`float`) 

120 * ``utilization.memory``, as ``gpu_memory_percent``, 

121 (:py:class:`float`, in percent) 

122 * ``utilization.gpu``, as ``gpu_utilization``, 

123 (:py:class:`float`, in percent) 

124 

125 """ 

126 

127 return run_nvidia_smi( 

128 ("memory.used", "memory.free", "utilization.memory", "utilization.gpu"), 

129 ( 

130 "gpu_memory_used", 

131 "gpu_memory_free", 

132 "gpu_memory_percent", 

133 "gpu_percent", 

134 ), 

135 ) 

136 

137 

138_CLUSTER = [] 

139"""List of processes currently being monitored""" 

140 

141 

142def cpu_constants(): 

143 """Returns static CPU information about the current system. 

144 

145 

146 Returns 

147 ------- 

148 

149 data : tuple 

150 An ordered dictionary (organized as 2-tuples) containing these entries: 

151 

152 0. ``cpu_memory_total`` (:py:class:`float`): total memory available, 

153 in gigabytes 

154 1. ``cpu_count`` (:py:class:`int`): number of logical CPUs available 

155 

156 """ 

157 

158 return ( 

159 ("cpu_memory_total", psutil.virtual_memory().total / GB), 

160 ("cpu_count", psutil.cpu_count(logical=True)), 

161 ) 

162 

163 

164def cpu_log(): 

165 """Returns process (+child) information using ``psutil``. 

166 

167 This call examines the current process plus any spawn child and returns the 

168 combined resource usage summary for the process group. 

169 

170 

171 Returns 

172 ------- 

173 

174 data : tuple 

175 An ordered dictionary (organized as 2-tuples) containing these entries: 

176 

177 0. ``cpu_memory_used`` (:py:class:`float`): total memory used from 

178 the system, in gigabytes 

179 1. ``cpu_rss`` (:py:class:`float`): RAM currently used by 

180 process and children, in gigabytes 

181 2. ``cpu_vms`` (:py:class:`float`): total memory (RAM + swap) currently 

182 used by process and children, in gigabytes 

183 3. ``cpu_percent`` (:py:class:`float`): percentage of the total CPU 

184 used by this process and children (recursively) since last call 

185 (first time called should be ignored). This number depends on the 

186 number of CPUs in the system and can be greater than 100% 

187 4. ``cpu_processes`` (:py:class:`int`): total number of processes 

188 including self and children (recursively) 

189 5. ``cpu_open_files`` (:py:class:`int`): total number of open files by 

190 self and children 

191 

192 """ 

193 

194 global _CLUSTER 

195 if (not _CLUSTER) or (_CLUSTER[0] != psutil.Process()): # initialization 

196 this = psutil.Process() 

197 _CLUSTER = [this] + this.children(recursive=True) 

198 # touch cpu_percent() at least once for all 

199 [k.cpu_percent(interval=None) for k in _CLUSTER] 

200 else: 

201 # check all cluster components and update process list 

202 # done so we can keep the cpu_percent() initialization 

203 children = _CLUSTER[0].children() 

204 stored_children = set(_CLUSTER[1:]) 

205 current_children = set(_CLUSTER[0].children()) 

206 keep_children = stored_children - current_children 

207 new_children = current_children - stored_children 

208 [k.cpu_percent(interval=None) for k in new_children] 

209 _CLUSTER = _CLUSTER[:1] + list(keep_children) + list(new_children) 

210 

211 memory_info = [k.memory_info() for k in _CLUSTER] 

212 

213 return ( 

214 ("cpu_memory_used", psutil.virtual_memory().used / GB), 

215 ("cpu_rss", sum([k.rss for k in memory_info]) / GB), 

216 ("cpu_vms", sum([k.vms for k in memory_info]) / GB), 

217 ("cpu_percent", sum(k.cpu_percent(interval=None) for k in _CLUSTER)), 

218 ("cpu_processes", len(_CLUSTER)), 

219 ("cpu_open_files", sum(len(k.open_files()) for k in _CLUSTER)), 

220 )