Coverage for src/bob/pad/base/script/finalize_scores.py: 0%

26 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 21:56 +0100

1"""Finalizes the scores that are produced by bob pad run-pipeline. 

2""" 

3import logging 

4 

5import click 

6 

7from clapper.click import log_parameters, verbosity_option 

8 

9logger = logging.getLogger(__name__) 

10 

11 

12@click.command( 

13 name="finalize-scores", 

14 epilog="""\b 

15Examples: 

16 $ bob pad finalize_scores /path/to/scores-dev.csv 

17 $ bob pad finalize_scores /path/to/scores-{dev,eval}.csv 

18""", 

19) 

20@click.argument( 

21 "scores", type=click.Path(exists=True, dir_okay=False), nargs=-1 

22) 

23@click.option( 

24 "-m", 

25 "--method", 

26 default="mean", 

27 type=click.Choice(["mean", "median", "min", "max"]), 

28 show_default=True, 

29 help="The method to use when finalizing the scores.", 

30) 

31@click.option( 

32 "--backup/--no-backup", default=True, help="Whether to backup scores." 

33) 

34@verbosity_option(logger) 

35def finalize_scores(scores, method, backup, verbose): 

36 """Finalizes the scores given by bob pad run-pipeline 

37 When using bob.pad.base, Algorithms can produce several score values for 

38 each unique sample. You can use this script to average (or min/max) these 

39 scores to have one final score per sample. 

40 

41 The conversion is done in-place (original files will be backed up). 

42 The order of scores will change. 

43 """ 

44 import shutil 

45 

46 import numpy 

47 import pandas as pd 

48 

49 log_parameters(logger) 

50 

51 mean = { 

52 "mean": numpy.nanmean, 

53 "median": numpy.nanmedian, 

54 "max": numpy.nanmax, 

55 "min": numpy.nanmin, 

56 }[method] 

57 

58 for path in scores: 

59 logger.info("Finalizing scores in %s", path) 

60 

61 if backup: 

62 logger.info("Backing up %s", path) 

63 shutil.copy(path, path + ".bak") 

64 

65 df = pd.read_csv(path) 

66 

67 # average the scores of each frame 

68 df["score"] = df.groupby("video_key")["score"].transform(mean) 

69 

70 # remove frame_id column if it exists 

71 if "frame_id" in df.columns: 

72 df.drop("frame_id", axis=1, inplace=True) 

73 

74 # make rows unique based on test_label 

75 df.drop_duplicates(subset=["video_key"], inplace=True) 

76 

77 df.to_csv(path, index=False)