1#!/usr/bin/env python
2# coding=utf-8
3
4import os
5import re
6import shutil
7
8import click
9import numpy
10import torch
11import pandas
12
13import logging
14logger = logging.getLogger(__name__)
15
16from bob.extension.scripts.click_helper import (
17 verbosity_option,
18 AliasedGroup,
19)
20
21
22def _load(data):
23 """Load prediction.csv files
24
25 Parameters
26 ----------
27
28 data : dict
29 A dict in which keys are the names of the systems and the values are
30 paths to ``predictions.csv`` style files.
31
32
33 Returns
34 -------
35
36 data : dict
37 A dict in which keys are the names of the systems and the values are
38 dictionaries that contain two keys:
39
40 * ``df``: A :py:class:`pandas.DataFrame` with the predictions data
41 loaded to
42
43 """
44
45 def _to_double_tensor(col):
46 """Converts a column in a dataframe to a tensor array"""
47
48 pattern = re.compile(" +")
49 return col.apply(lambda cell: numpy.array(eval(pattern.sub(",", cell))))
50
51 # loads all data
52 retval = {}
53 for name, predictions_path in data.items():
54
55 # Load predictions
56 logger.info(f"Loading predictions from {predictions_path}...")
57 pred_data = pandas.read_csv(predictions_path)
58 pred_data['likelihood'] = _to_double_tensor(pred_data['likelihood'])
59 pred_data['ground_truth'] = _to_double_tensor(pred_data['ground_truth'])
60 retval[name] = dict(df=pred_data)
61
62 return retval
63
64
65@click.command(
66 epilog="""Examples:
67
68\b
69 1. Convert predictions of radiological signs to a JSON dataset file_
70\b
71 $ bob tb predtojson -vv train path/to/train/predictions.csv test path/to/test/predictions.csv
72""",
73)
74@click.argument(
75 'label_path',
76 nargs=-1,
77 )
78@click.option(
79 "--output-folder",
80 "-f",
81 help="Path where to store the json file (created if does not exist)",
82 required=False,
83 default=None,
84 type=click.Path(dir_okay=True, file_okay=False),
85)
86@verbosity_option()
87def predtojson(label_path, output_folder, **kwargs):
88 """Convert predictions to dataset"""
89
90 # hack to get a dictionary from arguments passed to input
91 if len(label_path) % 2 != 0:
92 raise click.ClickException("Input label-paths should be doubles"
93 " composed of name-path entries")
94 data = dict(zip(label_path[::2], label_path[1::2]))
95
96 # load all data measures
97 data = _load(data)
98
99 logger.info(f"Output folder: {output_folder}")
100 os.makedirs(output_folder, exist_ok=True)
101
102 output_file = os.path.join(output_folder, "dataset.json")
103 if os.path.exists(output_file):
104 backup = output_file + "~"
105 if os.path.exists(backup):
106 os.unlink(backup)
107 shutil.move(output_file, backup)
108
109 logger.info("Saving JSON file...")
110 with open(output_file, "a+", newline="") as f:
111
112 f.write('{')
113 for i, (name, value) in enumerate(data.items()):
114 if i > 0:
115 f.write(',')
116
117 df = value["df"]
118 f.write('"'+name+'": [')
119 for index, row in df.iterrows():
120 if index > 0:
121 f.write(',')
122 f.write('["' + row['filename'] + '", ')
123 f.write(str(row['ground_truth'][0].item()))
124 f.write(',')
125 f.write(str([format(x, '.20f') for x in torch.tensor(row['likelihood']).tolist()]).replace("'", ""))
126 f.write(']')
127 f.write(']')
128 f.write('}')
129