Coverage for src/bob/fusion/base/tools/common.py: 94%

87 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-13 01:00 +0200

1import logging 

2 

3from collections import defaultdict 

4 

5import numpy as np 

6 

7logger = logging.getLogger(__name__) 

8 

9 

10def get_2negatives_1positive(score_lines): 

11 gen_mask = score_lines["claimed_id"] == score_lines["real_id"] 

12 atk_mask = np.logical_or( 

13 np.char.count(score_lines["real_id"], "spoof") > 0, 

14 np.char.count(score_lines["real_id"], "attack") > 0, 

15 ) 

16 zei_mask = np.logical_and( 

17 np.logical_not(gen_mask), np.logical_not(atk_mask) 

18 ) 

19 gen = score_lines[gen_mask] 

20 zei = score_lines[zei_mask] 

21 atk = score_lines[atk_mask] 

22 return (gen, zei, atk, gen_mask, zei_mask, atk_mask) 

23 

24 

25def check_consistency(gen_l, zei_l, atk_l): 

26 if len(gen_l) < 2: 

27 raise ValueError( 

28 "Check failed since less than two system is available." 

29 ) 

30 for score_lines_list in (gen_l, zei_l, atk_l): 

31 if not score_lines_list: 

32 continue 

33 score_lines0 = score_lines_list[0] 

34 for score_lines in score_lines_list[1:]: 

35 match = np.all( 

36 score_lines["claimed_id"] == score_lines0["claimed_id"] 

37 ) 

38 if not match: 

39 raise ValueError("claimed ids do not match between score files") 

40 

41 match = np.all(score_lines["real_id"] == score_lines0["real_id"]) 

42 if not match: 

43 raise ValueError("real ids do not match between score files") 

44 

45 

46def get_scores(*args): 

47 scores = [] 

48 for temp in zip(*args): 

49 scores.append(np.concatenate([a["score"] for a in temp], axis=0)) 

50 return np.vstack(scores).T 

51 

52 

53def get_score_lines(*args): 

54 # get the dtype names 

55 names = list(args[0][0].dtype.names) 

56 if len(names) != 4: 

57 names = [n for n in names if "model_label" not in n] 

58 logger.debug(names) 

59 

60 # find the (max) size of strigns 

61 dtypes = [a.dtype for temp in zip(*args) for a in temp] 

62 lengths = defaultdict(list) 

63 for name in names: 

64 for d in dtypes: 

65 lengths[name].append(d[name].itemsize // 4) 

66 

67 # make a new dtype 

68 new_dtype = [] 

69 for name in names[:-1]: 

70 new_dtype.append((name, "U{}".format(max(lengths[name])))) 

71 new_dtype.append((names[-1], float)) 

72 

73 score_lines = [] 

74 for temp in zip(*args): 

75 for a in temp: 

76 score_lines.extend(a[names].tolist()) 

77 score_lines = np.array(score_lines, dtype=new_dtype) 

78 return score_lines 

79 

80 

81def remove_nan(samples, found_nan): 

82 ncls = samples.shape[1] 

83 nans = np.isnan(samples[:, 0]) 

84 for i in range(1, ncls): 

85 nans = np.logical_or(nans, np.isnan(samples[:, i])) 

86 return np.any(nans) or found_nan, nans, samples[~nans, :] 

87 

88 

89def get_gza_from_lines_list(score_lines_list): 

90 gen_l, zei_l, atk_l = [], [], [] 

91 for score_lines in score_lines_list: 

92 gen, zei, atk, _, _, _ = get_2negatives_1positive(score_lines) 

93 gen_l.append(gen) 

94 zei_l.append(zei) 

95 atk_l.append(atk) 

96 zei_lengths = [] 

97 for zei in zei_l: 

98 zei_lengths.append(zei.size) 

99 zei_lengths = np.array(zei_lengths) 

100 idx1 = 0 # used later if it does not enter the if. 

101 if not (np.all(zei_lengths == 0) or np.all(zei_lengths > 0)): 

102 logger.info( 

103 "Trying to fill-in the missing zero effort impostor scores" 

104 " for pad systems. If you see a numpy index error below, " 

105 "your biometric scores do not match your pad scores." 

106 ) 

107 # generate the missing ones 

108 # find one that has zei 

109 idx1 = zei_lengths.nonzero()[0][0] 

110 zei_full = zei_l[idx1] 

111 for idx2 in np.where(zei_lengths == 0)[0]: 

112 if zei_l[idx2] is None: 

113 continue 

114 temp = np.array(zei_full) 

115 # make sure we replace all scores. 

116 temp["score"] = np.nan 

117 # get the list of ids 

118 real_ids = np.unique(temp["real_id"]) 

119 # find pad score of that id and replace the score 

120 for real_id in real_ids: 

121 # get the list of test_labels 

122 test_labels = np.unique( 

123 temp["test_label"][temp["real_id"] == real_id] 

124 ) 

125 for test_label in test_labels: 

126 idx3 = np.logical_and( 

127 temp["real_id"] == real_id, 

128 temp["test_label"] == test_label, 

129 ) 

130 idx4 = np.logical_and( 

131 gen_l[idx2]["real_id"] == real_id, 

132 gen_l[idx2]["test_label"] == test_label, 

133 ) 

134 temp["score"][idx3] = gen_l[idx2]["score"][idx4] 

135 zei_l[idx2] = temp 

136 return idx1, gen_l, zei_l, atk_l