Coverage for /scratch/builds/bob/bob.med.tb/miniconda/conda-bld/bob.med.tb_1637571489937/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho/lib/python3.8/site-packages/bob/med/tb/scripts/predtojson.py: 91%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

57 statements  

1#!/usr/bin/env python 

2# coding=utf-8 

3 

4import os 

5import re 

6import shutil 

7 

8import click 

9import numpy 

10import torch 

11import pandas 

12 

13import logging 

14logger = logging.getLogger(__name__) 

15 

16from bob.extension.scripts.click_helper import ( 

17 verbosity_option, 

18 AliasedGroup, 

19) 

20 

21 

22def _load(data): 

23 """Load prediction.csv files 

24 

25 Parameters 

26 ---------- 

27 

28 data : dict 

29 A dict in which keys are the names of the systems and the values are 

30 paths to ``predictions.csv`` style files. 

31 

32 

33 Returns 

34 ------- 

35 

36 data : dict 

37 A dict in which keys are the names of the systems and the values are 

38 dictionaries that contain two keys: 

39 

40 * ``df``: A :py:class:`pandas.DataFrame` with the predictions data 

41 loaded to 

42 

43 """ 

44 

45 def _to_double_tensor(col): 

46 """Converts a column in a dataframe to a tensor array""" 

47 

48 pattern = re.compile(" +") 

49 return col.apply(lambda cell: numpy.array(eval(pattern.sub(",", cell)))) 

50 

51 # loads all data 

52 retval = {} 

53 for name, predictions_path in data.items(): 

54 

55 # Load predictions 

56 logger.info(f"Loading predictions from {predictions_path}...") 

57 pred_data = pandas.read_csv(predictions_path) 

58 pred_data['likelihood'] = _to_double_tensor(pred_data['likelihood']) 

59 pred_data['ground_truth'] = _to_double_tensor(pred_data['ground_truth']) 

60 retval[name] = dict(df=pred_data) 

61 

62 return retval 

63 

64 

65@click.command( 

66 epilog="""Examples: 

67 

68\b 

69 1. Convert predictions of radiological signs to a JSON dataset file_ 

70\b 

71 $ bob tb predtojson -vv train path/to/train/predictions.csv test path/to/test/predictions.csv 

72""", 

73) 

74@click.argument( 

75 'label_path', 

76 nargs=-1, 

77 ) 

78@click.option( 

79 "--output-folder", 

80 "-f", 

81 help="Path where to store the json file (created if does not exist)", 

82 required=False, 

83 default=None, 

84 type=click.Path(dir_okay=True, file_okay=False), 

85) 

86@verbosity_option() 

87def predtojson(label_path, output_folder, **kwargs): 

88 """Convert predictions to dataset""" 

89 

90 # hack to get a dictionary from arguments passed to input 

91 if len(label_path) % 2 != 0: 

92 raise click.ClickException("Input label-paths should be doubles" 

93 " composed of name-path entries") 

94 data = dict(zip(label_path[::2], label_path[1::2])) 

95 

96 # load all data measures 

97 data = _load(data) 

98 

99 logger.info(f"Output folder: {output_folder}") 

100 os.makedirs(output_folder, exist_ok=True) 

101 

102 output_file = os.path.join(output_folder, "dataset.json") 

103 if os.path.exists(output_file): 

104 backup = output_file + "~" 

105 if os.path.exists(backup): 

106 os.unlink(backup) 

107 shutil.move(output_file, backup) 

108 

109 logger.info("Saving JSON file...") 

110 with open(output_file, "a+", newline="") as f: 

111 

112 f.write('{') 

113 for i, (name, value) in enumerate(data.items()): 

114 if i > 0: 

115 f.write(',') 

116 

117 df = value["df"] 

118 f.write('"'+name+'": [') 

119 for index, row in df.iterrows(): 

120 if index > 0: 

121 f.write(',') 

122 f.write('["' + row['filename'] + '", ') 

123 f.write(str(row['ground_truth'][0].item())) 

124 f.write(',') 

125 f.write(str([format(x, '.20f') for x in torch.tensor(row['likelihood']).tolist()]).replace("'", "")) 

126 f.write(']') 

127 f.write(']') 

128 f.write('}') 

129