Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# coding=utf-8
4"""Standard configurations for dataset setup"""
7from ...data.transforms import ColorJitter as _jitter
8from ...data.transforms import RandomHorizontalFlip as _hflip
9from ...data.transforms import RandomRotation as _rotation
10from ...data.transforms import RandomVerticalFlip as _vflip
12RANDOM_ROTATION = [_rotation()]
13"""Shared data augmentation based on random rotation only"""
16RANDOM_FLIP_JITTER = [_hflip(), _vflip(), _jitter()]
17"""Shared data augmentation transforms without random rotation"""
20def make_subset(l, transforms, prefixes=[], suffixes=[]):
21 """Creates a new data set, applying transforms
23 .. note::
25 This is a convenience function for our own dataset definitions inside
26 this module, guaranteeting homogenity between dataset definitions
27 provided in this package. It assumes certain strategies for data
28 augmentation that may not be translatable to other applications.
31 Parameters
32 ----------
34 l : list
35 List of delayed samples
37 transforms : list
38 A list of transforms that needs to be applied to all samples in the set
40 prefixes : list
41 A list of data augmentation operations that needs to be applied
42 **before** the transforms above
44 suffixes : list
45 A list of data augmentation operations that needs to be applied
46 **after** the transforms above
49 Returns
50 -------
52 subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset`
53 A pre-formatted dataset that can be fed to one of our engines
55 """
57 from ...data.utils import SampleListDataset as wrapper
59 return wrapper(l, prefixes + transforms + suffixes)
62def augment_subset(s, rotation_before=False):
63 """Creates a new subset set, **with data augmentation**
65 Typically, the transforms are chained to a default set of data augmentation
66 operations (random rotation, horizontal and vertical flips, and color
67 jitter), but a flag allows prefixing the rotation specially (useful for
68 some COVD training sets).
70 .. note::
72 This is a convenience function for our own dataset definitions inside
73 this module, guaranteeting homogenity between dataset definitions
74 provided in this package. It assumes certain strategies for data
75 augmentation that may not be translatable to other applications.
78 Parameters
79 ----------
81 s : bob.ip.binseg.data.utils.SampleListDataset
82 A dataset that will be augmented
84 rotation_before : py:class:`bool`, Optional
85 A optional flag allowing you to do a rotation augmentation transform
86 **before** the sequence of transforms for this dataset, that will be
87 augmented.
90 Returns
91 -------
93 subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset`
94 A pre-formatted dataset that can be fed to one of our engines
96 """
98 if rotation_before:
99 return s.copy(RANDOM_ROTATION + s.transforms + RANDOM_FLIP_JITTER)
101 return s.copy(s.transforms + RANDOM_ROTATION + RANDOM_FLIP_JITTER)
104def make_dataset(subsets, transforms):
105 """Creates a new configuration dataset from dictionary and transforms
107 This function takes as input a dictionary as those that can be returned by
108 :py:meth:`bob.ip.binseg.data.dataset.JSONDataset.subsets`, or
109 :py:meth:`bob.ip.binseg.data.dataset.CSVDataset.subsets`, mapping protocol
110 names (such as ``train``, ``dev`` and ``test``) to
111 :py:class:`bob.ip.binseg.data.sample.DelayedSample` lists, and a set of
112 transforms, and returns a dictionary applying
113 :py:class:`bob.ip.binseg.data.utils.SampleListDataset` to these
114 lists, and our standard data augmentation if a ``train`` set exists.
116 For example, if ``subsets`` is composed of two sets named ``train`` and
117 ``test``, this function will yield a dictionary with the following entries:
119 * ``__train__``: Wraps the ``train`` subset, includes data augmentation
120 (note: datasets with names starting with ``_`` (underscore) are excluded
121 from prediction and evaluation by default, as they contain data
122 augmentation transformations.)
123 * ``train``: Wraps the ``train`` subset, **without** data augmentation
124 * ``train``: Wraps the ``test`` subset, **without** data augmentation
126 .. note::
128 This is a convenience function for our own dataset definitions inside
129 this module, guaranteeting homogenity between dataset definitions
130 provided in this package. It assumes certain strategies for data
131 augmentation that may not be translatable to other applications.
134 Parameters
135 ----------
137 subsets : dict
138 A dictionary that contains the delayed sample lists for a number of
139 named lists. If one of the keys is ``train``, our standard dataset
140 augmentation transforms are appended to the definition of that subset.
141 All other subsets remain un-augmented. If one of the keys is
142 ``validation``, then this dataset will be also copied to the
143 ``__valid__`` hidden dataset and will be used for validation during
144 training. Otherwise, if no ``valid`` subset is available, we set
145 ``__valid__`` to be the same as the unaugmented ``train`` subset, if
146 one is available.
148 transforms : list
149 A list of transforms that needs to be applied to all samples in the set
152 Returns
153 -------
155 dataset : dict
156 A pre-formatted dataset that can be fed to one of our engines. It maps
157 string names to
158 :py:class:`bob.ip.binseg.data.utils.SampleListDataset`'s.
160 """
162 retval = {}
164 for key in subsets.keys():
165 retval[key] = make_subset(subsets[key], transforms=transforms)
166 if key == "train":
167 retval["__train__"] = make_subset(
168 subsets[key],
169 transforms=transforms,
170 suffixes=(RANDOM_ROTATION + RANDOM_FLIP_JITTER),
171 )
172 if key == "validation":
173 # also use it for validation during training
174 retval["__valid__"] = retval[key]
176 if (
177 ("__train__" in retval)
178 and ("train" in retval)
179 and ("__valid__" not in retval)
180 ):
181 # if the dataset does not have a validation set, we use the unaugmented
182 # training set as validation set
183 retval["__valid__"] = retval["train"]
185 return retval