Source code for bob.ip.binseg.utils.plot

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import contextlib
import logging

from itertools import cycle

import matplotlib
import matplotlib.pyplot as plt
import numpy

matplotlib.use("agg")
logger = logging.getLogger(__name__)


def _concave_hull(x, y, lx, ux, ly, uy):
    """Calculates a approximate (concave) hull from arc centers and sizes

    Each ellipse is approximated as a number of discrete points distributed
    over the ellipse border following an homogeneous angle distribution.


    Parameters
    ----------

    x : numpy.ndarray
        1D array with x coordinates of ellipse centers

    y : numpy.ndarray
        1D array with y coordinates of ellipse centers

    lx, ux, ly, uy : numpy.ndarray
        1D array(s) with upper and lower widths and heights for your deformed
        ellipse


    Returns
    -------

    points : numpy.ndarray
        2D array containing the ``(x, y)`` coordinates of the concave hull
        encompassing all defined arcs.

    """

    def _irregular_ellipse_points(_x, _y, _lx, _ux, _ly, _uy, steps=100):
        """Generates border points for an irregular ellipse

        This functions distributes points according to a rotation angle rather
        than uniformily with respect to a particular axis.  The result is a
        more homogeneous border representation for the ellipse.
        """
        up = _uy - _y
        down = _y - _ly
        left = _x - _lx
        right = _ux - _x

        angles = numpy.arange(0, numpy.pi / 2, step=2 * numpy.pi / steps)
        points = numpy.ndarray((0, 2))

        # upper left part (90 -> 180 degrees)
        px = 2 * left * numpy.cos(angles)
        py = (up / left) * numpy.sqrt(numpy.square(2 * left) - numpy.square(px))
        # order: x and y increase
        points = numpy.vstack((points, numpy.array([_x - px, _y + py]).T))

        # upper right part (0 -> 90 degrees)
        px = 2 * right * numpy.cos(angles)
        py = (up / right) * numpy.sqrt(
            numpy.square(2 * right) - numpy.square(px)
        )
        # order: x increases and y decreases
        points = numpy.vstack(
            (points, numpy.flipud(numpy.array([_x + px, _y + py]).T))
        )

        # lower right part (180 -> 270 degrees)
        px = 2 * right * numpy.cos(angles)
        py = (down / right) * numpy.sqrt(
            numpy.square(2 * right) - numpy.square(px)
        )
        # order: x increases and y decreases
        points = numpy.vstack((points, numpy.array([_x + px, _y - py]).T))

        # lower left part (180 -> 270 degrees)
        px = 2 * left * numpy.cos(angles)
        py = (down / left) * numpy.sqrt(
            numpy.square(2 * left) - numpy.square(px)
        )
        # order: x decreases and y increases
        points = numpy.vstack(
            (points, numpy.flipud(numpy.array([_x - px, _y - py]).T))
        )

        return points

    retval = numpy.ndarray((0, 2))
    for (k, l, m, n, o, p) in zip(x, y, lx, ux, ly, uy):
        retval = numpy.vstack(
            (
                retval,
                [numpy.nan, numpy.nan],
                _irregular_ellipse_points(k, l, m, n, o, p),
            )
        )
    return retval


@contextlib.contextmanager
def _precision_recall_canvas(title=None):
    """Generates a canvas to draw precision-recall curves

    Works like a context manager, yielding a figure and an axes set in which
    the precision-recall curves should be added to.  The figure already
    contains F1-ISO lines and is preset to a 0-1 square region.  Once the
    context is finished, ``fig.tight_layout()`` is called.


    Parameters
    ----------

    title : :py:class:`str`, Optional
        Optional title to add to this plot


    Yields
    ------

    figure : matplotlib.figure.Figure
        The figure that should be finally returned to the user

    axes : matplotlib.figure.Axes
        An axis set where to precision-recall plots should be added to

    """

    fig, axes1 = plt.subplots(1)

    # Names and bounds
    axes1.set_xlabel("Recall")
    axes1.set_ylabel("Precision")
    axes1.set_xlim([0.0, 1.0])
    axes1.set_ylim([0.0, 1.0])

    if title is not None:
        axes1.set_title(title)

    axes1.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2)
    axes2 = axes1.twinx()

    # Annotates plot with F1-score iso-lines
    f_scores = numpy.linspace(0.1, 0.9, num=9)
    tick_locs = []
    tick_labels = []
    for f_score in f_scores:
        x = numpy.linspace(0.01, 1)
        y = f_score * x / (2 * x - f_score)
        (l,) = plt.plot(x[y >= 0], y[y >= 0], color="green", alpha=0.1)
        tick_locs.append(y[-1])
        tick_labels.append("%.1f" % f_score)
    axes2.tick_params(axis="y", which="both", pad=0, right=False, left=False)
    axes2.set_ylabel("iso-F", color="green", alpha=0.3)
    axes2.set_ylim([0.0, 1.0])
    axes2.yaxis.set_label_coords(1.015, 0.97)
    axes2.set_yticks(tick_locs)  # notice these are invisible
    for k in axes2.set_yticklabels(tick_labels):
        k.set_color("green")
        k.set_alpha(0.3)
        k.set_size(8)

    # we should see some of axes 1 axes
    axes1.spines["right"].set_visible(False)
    axes1.spines["top"].set_visible(False)
    axes1.spines["left"].set_position(("data", -0.015))
    axes1.spines["bottom"].set_position(("data", -0.015))

    # we shouldn't see any of axes 2 axes
    axes2.spines["right"].set_visible(False)
    axes2.spines["top"].set_visible(False)
    axes2.spines["left"].set_visible(False)
    axes2.spines["bottom"].set_visible(False)

    # yield execution, lets user draw precision-recall plots, and the legend
    # before tighteneing the layout
    yield fig, axes1

    plt.tight_layout()


[docs]def precision_recall_f1iso(data, credible=True): """Creates a precision-recall plot with credible intervals This function creates and returns a Matplotlib figure with a precision-recall plot containing shaded credible intervals (on the precision-recall measurements). The plot will be annotated with F1-score iso-lines (in which the F1-score maintains the same value). This function specially supports "second-annotator" entries by plotting a line showing the comparison between the default annotator being analyzed and a second "opinion". Second annotator dataframes contain a single entry (threshold=0.5), given the nature of the binary map comparisons. Parameters ---------- data : dict A dictionary in which keys are strings defining plot labels and values are dictionaries with two entries: * ``df``: :py:class:`pandas.DataFrame` A dataframe that is produced by our evaluator engine, indexed by integer "thresholds", containing the following columns: ``threshold``, ``tp``, ``fp``, ``tn``, ``fn``, ``mean_precision``, ``mode_precision``, ``lower_precision``, ``upper_precision``, ``mean_recall``, ``mode_recall``, ``lower_recall``, ``upper_recall``, ``mean_specificity``, ``mode_specificity``, ``lower_specificity``, ``upper_specificity``, ``mean_accuracy``, ``mode_accuracy``, ``lower_accuracy``, ``upper_accuracy``, ``mean_jaccard``, ``mode_jaccard``, ``lower_jaccard``, ``upper_jaccard``, ``mean_f1_score``, ``mode_f1_score``, ``lower_f1_score``, ``upper_f1_score``, ``frequentist_precision``, ``frequentist_recall``, ``frequentist_specificity``, ``frequentist_accuracy``, ``frequentist_jaccard``, ``frequentist_f1_score``. * ``threshold``: :py:class:`list` A threshold to graph with a dot for each set. Specific threshold values do not affect "second-annotator" dataframes. credible : :py:class:`bool`, Optional If set, draw credible intervals for each line, using ``upper_*`` and ``lower_*`` entries. Returns ------- figure : matplotlib.figure.Figure A matplotlib figure you can save or display (uses an ``agg`` backend) """ lines = ["-", "--", "-.", ":"] colors = [ "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf", ] colorcycler = cycle(colors) linecycler = cycle(lines) with _precision_recall_canvas(title=None) as (fig, axes): legend = [] for name, value in data.items(): df = value["df"] threshold = value["threshold"] # plots only from the point where recall reaches its maximum, # otherwise, we don't see a curve... max_recall = df.mean_recall.idxmax() pi = df.mean_precision[max_recall:] ri = df.mean_recall[max_recall:] # valid = (pi + ri) > 0 # optimal point along the curve bins = len(df) index = int(round(bins * threshold)) index = min(index, len(df) - 1) # avoids out of range indexing # plots Recall/Precision as threshold changes label = f"{name} (F1={df.mean_f1_score[index]:.4f})" color = next(colorcycler) if len(df) == 1: # plot black dot for F1-score at select threshold (marker,) = axes.plot( df.mean_recall[index], df.mean_precision[index], marker="*", markersize=6, color=color, alpha=0.8, linestyle="None", ) (line,) = axes.plot( df.mean_recall[index], df.mean_precision[index], linestyle="None", color=color, alpha=0.2, ) legend.append(([marker, line], label)) else: # line first, so marker gets on top style = next(linecycler) (line,) = axes.plot( ri[pi > 0], pi[pi > 0], color=color, linestyle=style ) (marker,) = axes.plot( df.mean_recall[index], df.mean_precision[index], marker="o", linestyle=style, markersize=4, color=color, alpha=0.8, ) legend.append(([marker, line], label)) if credible: hull = _concave_hull( df.mean_recall, df.mean_precision, df.lower_recall, df.upper_recall, df.lower_precision, df.upper_precision, ) p = plt.Polygon( hull, facecolor=color, alpha=0.2, edgecolor="none", lw=0.2, closed=True, ) axes.add_patch(p) legend[-1][0].append(p) if len(label) > 1: axes.legend( [tuple(k[0]) for k in legend], [k[1] for k in legend], loc="lower left", fancybox=True, framealpha=0.7, ) return fig
[docs]def loss_curve(df): """Creates a loss curve in a Matplotlib figure. Parameters ---------- df : :py:class:`pandas.DataFrame` A dataframe containing, at least, "epoch", "median-loss" and "learning-rate" columns, that will be plotted. Returns ------- figure : matplotlib.figure.Figure A figure, that may be saved or displayed """ ax1 = df.plot(x="epoch", y="median-loss", grid=True) ax1.set_ylabel("Median Loss") ax1.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2) ax2 = df["learning-rate"].plot( secondary_y=True, legend=True, grid=True, ) ax2.set_ylabel("Learning Rate") ax1.set_xlabel("Epoch") plt.tight_layout() fig = ax1.get_figure() return fig