Coverage for /scratch/builds/bob/bob.ip.binseg/miniconda/conda-bld/bob.ip.binseg_1635977648782/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_p/lib/python3.8/site-packages/bob/ip/binseg/configs/datasets/init.py : 100%

25 statements

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1#!/usr/bin/env python

2# coding=utf-8

4"""Standard configurations for dataset setup"""

7from ...data.transforms import ColorJitter as _jitter

8from ...data.transforms import RandomHorizontalFlip as _hflip

9from ...data.transforms import RandomRotation as _rotation

10from ...data.transforms import RandomVerticalFlip as _vflip

12RANDOM_ROTATION = [_rotation()]

13"""Shared data augmentation based on random rotation only"""

16RANDOM_FLIP_JITTER = [_hflip(), _vflip(), _jitter()]

17"""Shared data augmentation transforms without random rotation"""

20def make_subset(l, transforms, prefixes=[], suffixes=[]):

21 """Creates a new data set, applying transforms

23 .. note::

25 This is a convenience function for our own dataset definitions inside

26 this module, guaranteeting homogenity between dataset definitions

27 provided in this package. It assumes certain strategies for data

28 augmentation that may not be translatable to other applications.

31 Parameters

32 ----------

34 l : list

35 List of delayed samples

37 transforms : list

38 A list of transforms that needs to be applied to all samples in the set

40 prefixes : list

41 A list of data augmentation operations that needs to be applied

42 **before** the transforms above

44 suffixes : list

45 A list of data augmentation operations that needs to be applied

46 **after** the transforms above

49 Returns

50 -------

52 subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset`

53 A pre-formatted dataset that can be fed to one of our engines

55 """

57 from ...data.utils import SampleListDataset as wrapper

59 return wrapper(l, prefixes + transforms + suffixes)

62def augment_subset(s, rotation_before=False):

63 """Creates a new subset set, **with data augmentation**

65 Typically, the transforms are chained to a default set of data augmentation

66 operations (random rotation, horizontal and vertical flips, and color

67 jitter), but a flag allows prefixing the rotation specially (useful for

68 some COVD training sets).

70 .. note::

72 This is a convenience function for our own dataset definitions inside

73 this module, guaranteeting homogenity between dataset definitions

74 provided in this package. It assumes certain strategies for data

75 augmentation that may not be translatable to other applications.

78 Parameters

79 ----------

81 s : bob.ip.binseg.data.utils.SampleListDataset

82 A dataset that will be augmented

84 rotation_before : py:class:`bool`, Optional

85 A optional flag allowing you to do a rotation augmentation transform

86 **before** the sequence of transforms for this dataset, that will be

87 augmented.

90 Returns

91 -------

93 subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset`

94 A pre-formatted dataset that can be fed to one of our engines

96 """

98 if rotation_before:

99 return s.copy(RANDOM_ROTATION + s.transforms + RANDOM_FLIP_JITTER)

100

101 return s.copy(s.transforms + RANDOM_ROTATION + RANDOM_FLIP_JITTER)

102

103

104def make_dataset(subsets, transforms):

105 """Creates a new configuration dataset from dictionary and transforms

106

107 This function takes as input a dictionary as those that can be returned by

108 :py:meth:`bob.ip.binseg.data.dataset.JSONDataset.subsets`, or

109 :py:meth:`bob.ip.binseg.data.dataset.CSVDataset.subsets`, mapping protocol

110 names (such as ``train``, ``dev`` and ``test``) to

111 :py:class:`bob.ip.binseg.data.sample.DelayedSample` lists, and a set of

112 transforms, and returns a dictionary applying

113 :py:class:`bob.ip.binseg.data.utils.SampleListDataset` to these

114 lists, and our standard data augmentation if a ``train`` set exists.

115

116 For example, if ``subsets`` is composed of two sets named ``train`` and

117 ``test``, this function will yield a dictionary with the following entries:

118

119 * ``__train__``: Wraps the ``train`` subset, includes data augmentation

120 (note: datasets with names starting with ``_`` (underscore) are excluded

121 from prediction and evaluation by default, as they contain data

122 augmentation transformations.)

123 * ``train``: Wraps the ``train`` subset, **without** data augmentation

124 * ``train``: Wraps the ``test`` subset, **without** data augmentation

125

126 .. note::

127

128 This is a convenience function for our own dataset definitions inside

129 this module, guaranteeting homogenity between dataset definitions

130 provided in this package. It assumes certain strategies for data

131 augmentation that may not be translatable to other applications.

132

133

134 Parameters

135 ----------

136

137 subsets : dict

138 A dictionary that contains the delayed sample lists for a number of

139 named lists. If one of the keys is ``train``, our standard dataset

140 augmentation transforms are appended to the definition of that subset.

141 All other subsets remain un-augmented. If one of the keys is

142 ``validation``, then this dataset will be also copied to the

143 ``__valid__`` hidden dataset and will be used for validation during

144 training. Otherwise, if no ``valid`` subset is available, we set

145 ``__valid__`` to be the same as the unaugmented ``train`` subset, if

146 one is available.

147

148 transforms : list

149 A list of transforms that needs to be applied to all samples in the set

150

151

152 Returns

153 -------

154

155 dataset : dict

156 A pre-formatted dataset that can be fed to one of our engines. It maps

157 string names to

158 :py:class:`bob.ip.binseg.data.utils.SampleListDataset`'s.

159

160 """

161

162 retval = {}

163

164 for key in subsets.keys():

165 retval[key] = make_subset(subsets[key], transforms=transforms)

166 if key == "train":

167 retval["__train__"] = make_subset(

168 subsets[key],

169 transforms=transforms,

170 suffixes=(RANDOM_ROTATION + RANDOM_FLIP_JITTER),

171 )

172 if key == "validation":

173 # also use it for validation during training

174 retval["__valid__"] = retval[key]

175

176 if (

177 ("__train__" in retval)

178 and ("train" in retval)

179 and ("__valid__" not in retval)

180 ):

181 # if the dataset does not have a validation set, we use the unaugmented

182 # training set as validation set

183 retval["__valid__"] = retval["train"]

184

185 return retval

25 statements 25 run 0 missing 0 excluded

25 statements