Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# coding=utf-8 

3 

4"""Standard configurations for dataset setup""" 

5 

6 

7from ...data.transforms import ColorJitter as _jitter 

8from ...data.transforms import RandomHorizontalFlip as _hflip 

9from ...data.transforms import RandomRotation as _rotation 

10from ...data.transforms import RandomVerticalFlip as _vflip 

11 

12RANDOM_ROTATION = [_rotation()] 

13"""Shared data augmentation based on random rotation only""" 

14 

15 

16RANDOM_FLIP_JITTER = [_hflip(), _vflip(), _jitter()] 

17"""Shared data augmentation transforms without random rotation""" 

18 

19 

20def make_subset(l, transforms, prefixes=[], suffixes=[]): 

21 """Creates a new data set, applying transforms 

22 

23 .. note:: 

24 

25 This is a convenience function for our own dataset definitions inside 

26 this module, guaranteeting homogenity between dataset definitions 

27 provided in this package. It assumes certain strategies for data 

28 augmentation that may not be translatable to other applications. 

29 

30 

31 Parameters 

32 ---------- 

33 

34 l : list 

35 List of delayed samples 

36 

37 transforms : list 

38 A list of transforms that needs to be applied to all samples in the set 

39 

40 prefixes : list 

41 A list of data augmentation operations that needs to be applied 

42 **before** the transforms above 

43 

44 suffixes : list 

45 A list of data augmentation operations that needs to be applied 

46 **after** the transforms above 

47 

48 

49 Returns 

50 ------- 

51 

52 subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset` 

53 A pre-formatted dataset that can be fed to one of our engines 

54 

55 """ 

56 

57 from ...data.utils import SampleListDataset as wrapper 

58 

59 return wrapper(l, prefixes + transforms + suffixes) 

60 

61 

62def augment_subset(s, rotation_before=False): 

63 """Creates a new subset set, **with data augmentation** 

64 

65 Typically, the transforms are chained to a default set of data augmentation 

66 operations (random rotation, horizontal and vertical flips, and color 

67 jitter), but a flag allows prefixing the rotation specially (useful for 

68 some COVD training sets). 

69 

70 .. note:: 

71 

72 This is a convenience function for our own dataset definitions inside 

73 this module, guaranteeting homogenity between dataset definitions 

74 provided in this package. It assumes certain strategies for data 

75 augmentation that may not be translatable to other applications. 

76 

77 

78 Parameters 

79 ---------- 

80 

81 s : bob.ip.binseg.data.utils.SampleListDataset 

82 A dataset that will be augmented 

83 

84 rotation_before : py:class:`bool`, Optional 

85 A optional flag allowing you to do a rotation augmentation transform 

86 **before** the sequence of transforms for this dataset, that will be 

87 augmented. 

88 

89 

90 Returns 

91 ------- 

92 

93 subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset` 

94 A pre-formatted dataset that can be fed to one of our engines 

95 

96 """ 

97 

98 if rotation_before: 

99 return s.copy(RANDOM_ROTATION + s.transforms + RANDOM_FLIP_JITTER) 

100 

101 return s.copy(s.transforms + RANDOM_ROTATION + RANDOM_FLIP_JITTER) 

102 

103 

104def make_dataset(subsets, transforms): 

105 """Creates a new configuration dataset from dictionary and transforms 

106 

107 This function takes as input a dictionary as those that can be returned by 

108 :py:meth:`bob.ip.binseg.data.dataset.JSONDataset.subsets`, or 

109 :py:meth:`bob.ip.binseg.data.dataset.CSVDataset.subsets`, mapping protocol 

110 names (such as ``train``, ``dev`` and ``test``) to 

111 :py:class:`bob.ip.binseg.data.sample.DelayedSample` lists, and a set of 

112 transforms, and returns a dictionary applying 

113 :py:class:`bob.ip.binseg.data.utils.SampleListDataset` to these 

114 lists, and our standard data augmentation if a ``train`` set exists. 

115 

116 For example, if ``subsets`` is composed of two sets named ``train`` and 

117 ``test``, this function will yield a dictionary with the following entries: 

118 

119 * ``__train__``: Wraps the ``train`` subset, includes data augmentation 

120 (note: datasets with names starting with ``_`` (underscore) are excluded 

121 from prediction and evaluation by default, as they contain data 

122 augmentation transformations.) 

123 * ``train``: Wraps the ``train`` subset, **without** data augmentation 

124 * ``train``: Wraps the ``test`` subset, **without** data augmentation 

125 

126 .. note:: 

127 

128 This is a convenience function for our own dataset definitions inside 

129 this module, guaranteeting homogenity between dataset definitions 

130 provided in this package. It assumes certain strategies for data 

131 augmentation that may not be translatable to other applications. 

132 

133 

134 Parameters 

135 ---------- 

136 

137 subsets : dict 

138 A dictionary that contains the delayed sample lists for a number of 

139 named lists. If one of the keys is ``train``, our standard dataset 

140 augmentation transforms are appended to the definition of that subset. 

141 All other subsets remain un-augmented. If one of the keys is 

142 ``validation``, then this dataset will be also copied to the 

143 ``__valid__`` hidden dataset and will be used for validation during 

144 training. Otherwise, if no ``valid`` subset is available, we set 

145 ``__valid__`` to be the same as the unaugmented ``train`` subset, if 

146 one is available. 

147 

148 transforms : list 

149 A list of transforms that needs to be applied to all samples in the set 

150 

151 

152 Returns 

153 ------- 

154 

155 dataset : dict 

156 A pre-formatted dataset that can be fed to one of our engines. It maps 

157 string names to 

158 :py:class:`bob.ip.binseg.data.utils.SampleListDataset`'s. 

159 

160 """ 

161 

162 retval = {} 

163 

164 for key in subsets.keys(): 

165 retval[key] = make_subset(subsets[key], transforms=transforms) 

166 if key == "train": 

167 retval["__train__"] = make_subset( 

168 subsets[key], 

169 transforms=transforms, 

170 suffixes=(RANDOM_ROTATION + RANDOM_FLIP_JITTER), 

171 ) 

172 if key == "validation": 

173 # also use it for validation during training 

174 retval["__valid__"] = retval[key] 

175 

176 if ( 

177 ("__train__" in retval) 

178 and ("train" in retval) 

179 and ("__valid__" not in retval) 

180 ): 

181 # if the dataset does not have a validation set, we use the unaugmented 

182 # training set as validation set 

183 retval["__valid__"] = retval["train"] 

184 

185 return retval