Coverage for src/bob/pad/base/script/finalize_scores.py: 0%
26 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 21:56 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 21:56 +0100
1"""Finalizes the scores that are produced by bob pad run-pipeline.
2"""
3import logging
5import click
7from clapper.click import log_parameters, verbosity_option
9logger = logging.getLogger(__name__)
12@click.command(
13 name="finalize-scores",
14 epilog="""\b
15Examples:
16 $ bob pad finalize_scores /path/to/scores-dev.csv
17 $ bob pad finalize_scores /path/to/scores-{dev,eval}.csv
18""",
19)
20@click.argument(
21 "scores", type=click.Path(exists=True, dir_okay=False), nargs=-1
22)
23@click.option(
24 "-m",
25 "--method",
26 default="mean",
27 type=click.Choice(["mean", "median", "min", "max"]),
28 show_default=True,
29 help="The method to use when finalizing the scores.",
30)
31@click.option(
32 "--backup/--no-backup", default=True, help="Whether to backup scores."
33)
34@verbosity_option(logger)
35def finalize_scores(scores, method, backup, verbose):
36 """Finalizes the scores given by bob pad run-pipeline
37 When using bob.pad.base, Algorithms can produce several score values for
38 each unique sample. You can use this script to average (or min/max) these
39 scores to have one final score per sample.
41 The conversion is done in-place (original files will be backed up).
42 The order of scores will change.
43 """
44 import shutil
46 import numpy
47 import pandas as pd
49 log_parameters(logger)
51 mean = {
52 "mean": numpy.nanmean,
53 "median": numpy.nanmedian,
54 "max": numpy.nanmax,
55 "min": numpy.nanmin,
56 }[method]
58 for path in scores:
59 logger.info("Finalizing scores in %s", path)
61 if backup:
62 logger.info("Backing up %s", path)
63 shutil.copy(path, path + ".bak")
65 df = pd.read_csv(path)
67 # average the scores of each frame
68 df["score"] = df.groupby("video_key")["score"].transform(mean)
70 # remove frame_id column if it exists
71 if "frame_id" in df.columns:
72 df.drop("frame_id", axis=1, inplace=True)
74 # make rows unique based on test_label
75 df.drop_duplicates(subset=["video_key"], inplace=True)
77 df.to_csv(path, index=False)