Coverage for src/bob/measure/utils.py: 79%

1""" utility functions for bob.measure """

3import logging

5import numpy

6import scipy.stats

8LOGGER = logging.getLogger(__name__)

11def remove_nan(scores):

12 """remove_nan

14 Remove NaN(s) in the given array

16 Parameters

17 ----------

18 scores :

19 :py:class:`numpy.ndarray` : array

21 Returns

22 -------

23 :py:class:`numpy.ndarray` : array without NaN(s)

24 :py:class:`int` : number of NaN(s) in the input array

25 :py:class:`int` : length of the input array

26 """

27 nans = numpy.isnan(scores)

28 sum_nans = sum(nans)

29 total = len(scores)

30 if sum_nans > 0:

31 LOGGER.warning("Found {} NaNs in {} scores".format(sum_nans, total))

32 return scores[~nans], sum_nans, total

35def get_fta(scores):

36 """get_fta

37 calculates the Failure To Acquire (FtA) rate, i.e. proportion of NaN(s)

38 in the input scores

40 Parameters

41 ----------

42 scores :

43 Tuple of (``positive``, ``negative``) :py:class:`numpy.ndarray`.

45 Returns

46 -------

47 (:py:class:`numpy.ndarray`, :py:class:`numpy.ndarray`): scores without

48 NaN(s)

49 :py:class:`float` : failure to acquire rate

50 """

51 fta_sum, fta_total = 0.0, 0.0

52 neg, sum_nans, total = remove_nan(scores[0])

53 fta_sum += sum_nans

54 fta_total += total

55 pos, sum_nans, total = remove_nan(scores[1])

56 fta_sum += sum_nans

57 fta_total += total

58 return ((neg, pos), fta_sum / fta_total)

61def get_fta_list(scores):

62 """Get FTAs for a list of scores

64 Parameters

65 ----------

66 scores: :any:`list`

67 list of scores

69 Returns

70 -------

71 neg_list: :any:`list`

72 list of negatives

73 pos_list: :any:`list`

74 list of positives

75 fta_list: :any:`list`

76 list of FTAs

77 """

78 neg_list = []

79 pos_list = []

80 fta_list = []

81 for score in scores:

82 neg = pos = fta = None

83 if score is not None:

84 (neg, pos), fta = get_fta(score)

85 if neg is None:

86 raise ValueError("While loading dev-score file")

87 neg_list.append(neg)

88 pos_list.append(pos)

89 fta_list.append(fta)

90 return (neg_list, pos_list, fta_list)

93def get_thres(criter, neg, pos, far=None):

94 """Get threshold for the given positive/negatives scores and criterion

96 Parameters

97 ----------

98 criter :

99 Criterion (`eer` or `hter` or `far`)

100 neg : :py:class:`numpy.ndarray`:

101 array of negative scores

102 pos : :py:class:`numpy.ndarray`::

103 array of positive scores

104

105 Returns

106 -------

107 :py:obj:`float`

108 threshold

109 """

110 if criter == "eer":

111 from . import eer_threshold

112

113 return eer_threshold(neg, pos)

114 elif criter == "min-hter":

115 from . import min_hter_threshold

116

117 return min_hter_threshold(neg, pos)

118 elif criter == "far":

119 if far is None:

120 raise ValueError(

121 "FAR value must be provided through "

122 "``--far-value`` or ``--fpr-value`` option."

123 )

124 from . import far_threshold

125

126 return far_threshold(neg, pos, far)

127 else:

128 raise ValueError("Incorrect plotting criterion: ``%s``" % criter)

129

130

131def get_colors(n):

132 """get_colors

133 Get a list of matplotlib colors

134

135 Parameters

136 ----------

137 n : :obj:`int`

138 Number of colors to output

139

140 Returns

141 -------

142 :any:`list`

143 list of colors

144 """

145 if n > 10:

146 from matplotlib import pyplot

147

148 cmap = pyplot.cm.get_cmap(name="magma")

149 return [cmap(i) for i in numpy.linspace(0, 1.0, n + 1)]

150

151 return ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"]

152

153

154def get_linestyles(n, on=True):

155 """Get a list of matplotlib linestyles

156

157 Parameters

158 ----------

159 n : :obj:`int`

160 Number of linestyles to output

161

162 Returns

163 -------

164 :any:`list`

165 list of linestyles

166 """

167 if not on:

168 return [None] * n

169

170 list_linestyles = [

171 (0, ()), # solid

172 (0, (1, 1)), # densely dotted

173 (0, (5, 5)), # dashed

174 (0, (5, 1)), # densely dashed

175 (0, (3, 1, 1, 1, 1, 1)), # densely dashdotdotted

176 (0, (3, 10, 1, 10, 1, 10)), # loosely dashdotdotted

177 (0, (3, 5, 1, 5, 1, 5)), # dashdotdotted

178 (0, (3, 1, 1, 1)), # densely dashdotted

179 (0, (1, 5)), # dotted

180 (0, (3, 5, 1, 5)), # dashdotted

181 (0, (5, 10)), # loosely dashed

182 (0, (3, 10, 1, 10)), # loosely dashdotted

183 (0, (1, 10)), # loosely dotted

184 ]

185 while n > len(list_linestyles):

186 list_linestyles += list_linestyles

187 return list_linestyles

188

189

190def confidence_for_indicator_variable(x, n, alpha=0.05):

191 """Calculates the confidence interval for proportion estimates

192 The Clopper-Pearson interval method is used for estimating the confidence

193 intervals.

194

195 Parameters

196 ----------

197 x : int

198 The number of successes.

199 n : int

200 The number of trials.

201 alpha : :obj:`float`, optional

202 The 1-confidence value that you want. For example, alpha should be 0.05

203 to obtain 95% confidence intervals.

204

205 Returns

206 -------

207 (:obj:`float`, :obj:`float`)

208 a tuple of (lower_bound, upper_bound) which

209 shows the limit of your success rate: lower_bound < x/n < upper_bound

210 """

211 lower_bound = scipy.stats.beta.ppf(alpha / 2.0, x, n - x + 1)

212 upper_bound = scipy.stats.beta.ppf(1 - alpha / 2.0, x + 1, n - x)

213 if numpy.isnan(lower_bound):

214 lower_bound = 0

215 if numpy.isnan(upper_bound):

216 upper_bound = 1

217 return (lower_bound, upper_bound)