#!/usr/bin/env python
# -*- coding: utf-8 -*-
import contextlib
import logging
from itertools import cycle
import matplotlib
import matplotlib.pyplot as plt
import numpy
matplotlib.use("agg")
logger = logging.getLogger(__name__)
def _concave_hull(x, y, lx, ux, ly, uy):
"""Calculates a approximate (concave) hull from arc centers and sizes
Each ellipse is approximated as a number of discrete points distributed
over the ellipse border following an homogeneous angle distribution.
Parameters
----------
x : numpy.ndarray
1D array with x coordinates of ellipse centers
y : numpy.ndarray
1D array with y coordinates of ellipse centers
lx, ux, ly, uy : numpy.ndarray
1D array(s) with upper and lower widths and heights for your deformed
ellipse
Returns
-------
points : numpy.ndarray
2D array containing the ``(x, y)`` coordinates of the concave hull
encompassing all defined arcs.
"""
def _irregular_ellipse_points(_x, _y, _lx, _ux, _ly, _uy, steps=100):
"""Generates border points for an irregular ellipse
This functions distributes points according to a rotation angle rather
than uniformily with respect to a particular axis. The result is a
more homogeneous border representation for the ellipse.
"""
up = _uy - _y
down = _y - _ly
left = _x - _lx
right = _ux - _x
angles = numpy.arange(0, numpy.pi / 2, step=2 * numpy.pi / steps)
points = numpy.ndarray((0, 2))
# upper left part (90 -> 180 degrees)
px = 2 * left * numpy.cos(angles)
py = (up / left) * numpy.sqrt(numpy.square(2 * left) - numpy.square(px))
# order: x and y increase
points = numpy.vstack((points, numpy.array([_x - px, _y + py]).T))
# upper right part (0 -> 90 degrees)
px = 2 * right * numpy.cos(angles)
py = (up / right) * numpy.sqrt(
numpy.square(2 * right) - numpy.square(px)
)
# order: x increases and y decreases
points = numpy.vstack(
(points, numpy.flipud(numpy.array([_x + px, _y + py]).T))
)
# lower right part (180 -> 270 degrees)
px = 2 * right * numpy.cos(angles)
py = (down / right) * numpy.sqrt(
numpy.square(2 * right) - numpy.square(px)
)
# order: x increases and y decreases
points = numpy.vstack((points, numpy.array([_x + px, _y - py]).T))
# lower left part (180 -> 270 degrees)
px = 2 * left * numpy.cos(angles)
py = (down / left) * numpy.sqrt(
numpy.square(2 * left) - numpy.square(px)
)
# order: x decreases and y increases
points = numpy.vstack(
(points, numpy.flipud(numpy.array([_x - px, _y - py]).T))
)
return points
retval = numpy.ndarray((0, 2))
for (k, l, m, n, o, p) in zip(x, y, lx, ux, ly, uy):
retval = numpy.vstack(
(
retval,
[numpy.nan, numpy.nan],
_irregular_ellipse_points(k, l, m, n, o, p),
)
)
return retval
@contextlib.contextmanager
def _precision_recall_canvas(title=None):
"""Generates a canvas to draw precision-recall curves
Works like a context manager, yielding a figure and an axes set in which
the precision-recall curves should be added to. The figure already
contains F1-ISO lines and is preset to a 0-1 square region. Once the
context is finished, ``fig.tight_layout()`` is called.
Parameters
----------
title : :py:class:`str`, Optional
Optional title to add to this plot
Yields
------
figure : matplotlib.figure.Figure
The figure that should be finally returned to the user
axes : matplotlib.figure.Axes
An axis set where to precision-recall plots should be added to
"""
fig, axes1 = plt.subplots(1)
# Names and bounds
axes1.set_xlabel("Recall")
axes1.set_ylabel("Precision")
axes1.set_xlim([0.0, 1.0])
axes1.set_ylim([0.0, 1.0])
if title is not None:
axes1.set_title(title)
axes1.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2)
axes2 = axes1.twinx()
# Annotates plot with F1-score iso-lines
f_scores = numpy.linspace(0.1, 0.9, num=9)
tick_locs = []
tick_labels = []
for f_score in f_scores:
x = numpy.linspace(0.01, 1)
y = f_score * x / (2 * x - f_score)
(l,) = plt.plot(x[y >= 0], y[y >= 0], color="green", alpha=0.1)
tick_locs.append(y[-1])
tick_labels.append("%.1f" % f_score)
axes2.tick_params(axis="y", which="both", pad=0, right=False, left=False)
axes2.set_ylabel("iso-F", color="green", alpha=0.3)
axes2.set_ylim([0.0, 1.0])
axes2.yaxis.set_label_coords(1.015, 0.97)
axes2.set_yticks(tick_locs) # notice these are invisible
for k in axes2.set_yticklabels(tick_labels):
k.set_color("green")
k.set_alpha(0.3)
k.set_size(8)
# we should see some of axes 1 axes
axes1.spines["right"].set_visible(False)
axes1.spines["top"].set_visible(False)
axes1.spines["left"].set_position(("data", -0.015))
axes1.spines["bottom"].set_position(("data", -0.015))
# we shouldn't see any of axes 2 axes
axes2.spines["right"].set_visible(False)
axes2.spines["top"].set_visible(False)
axes2.spines["left"].set_visible(False)
axes2.spines["bottom"].set_visible(False)
# yield execution, lets user draw precision-recall plots, and the legend
# before tighteneing the layout
yield fig, axes1
plt.tight_layout()
[docs]def precision_recall_f1iso(data, credible=True):
"""Creates a precision-recall plot with credible intervals
This function creates and returns a Matplotlib figure with a
precision-recall plot containing shaded credible intervals (on the
precision-recall measurements). The plot will be annotated with F1-score
iso-lines (in which the F1-score maintains the same value).
This function specially supports "second-annotator" entries by plotting a
line showing the comparison between the default annotator being analyzed
and a second "opinion". Second annotator dataframes contain a single entry
(threshold=0.5), given the nature of the binary map comparisons.
Parameters
----------
data : dict
A dictionary in which keys are strings defining plot labels and values
are dictionaries with two entries:
* ``df``: :py:class:`pandas.DataFrame`
A dataframe that is produced by our evaluator engine, indexed by
integer "thresholds", containing the following columns:
``threshold``, ``tp``, ``fp``, ``tn``, ``fn``, ``mean_precision``,
``mode_precision``, ``lower_precision``, ``upper_precision``,
``mean_recall``, ``mode_recall``, ``lower_recall``, ``upper_recall``,
``mean_specificity``, ``mode_specificity``, ``lower_specificity``,
``upper_specificity``, ``mean_accuracy``, ``mode_accuracy``,
``lower_accuracy``, ``upper_accuracy``, ``mean_jaccard``,
``mode_jaccard``, ``lower_jaccard``, ``upper_jaccard``,
``mean_f1_score``, ``mode_f1_score``, ``lower_f1_score``,
``upper_f1_score``, ``frequentist_precision``,
``frequentist_recall``, ``frequentist_specificity``,
``frequentist_accuracy``, ``frequentist_jaccard``,
``frequentist_f1_score``.
* ``threshold``: :py:class:`list`
A threshold to graph with a dot for each set. Specific
threshold values do not affect "second-annotator" dataframes.
credible : :py:class:`bool`, Optional
If set, draw credible intervals for each line, using ``upper_*`` and
``lower_*`` entries.
Returns
-------
figure : matplotlib.figure.Figure
A matplotlib figure you can save or display (uses an ``agg`` backend)
"""
lines = ["-", "--", "-.", ":"]
colors = [
"#1f77b4",
"#ff7f0e",
"#2ca02c",
"#d62728",
"#9467bd",
"#8c564b",
"#e377c2",
"#7f7f7f",
"#bcbd22",
"#17becf",
]
colorcycler = cycle(colors)
linecycler = cycle(lines)
with _precision_recall_canvas(title=None) as (fig, axes):
legend = []
for name, value in data.items():
df = value["df"]
threshold = value["threshold"]
# plots only from the point where recall reaches its maximum,
# otherwise, we don't see a curve...
max_recall = df.mean_recall.idxmax()
pi = df.mean_precision[max_recall:]
ri = df.mean_recall[max_recall:]
# valid = (pi + ri) > 0
# optimal point along the curve
bins = len(df)
index = int(round(bins * threshold))
index = min(index, len(df) - 1) # avoids out of range indexing
# plots Recall/Precision as threshold changes
label = f"{name} (F1={df.mean_f1_score[index]:.4f})"
color = next(colorcycler)
if len(df) == 1:
# plot black dot for F1-score at select threshold
(marker,) = axes.plot(
df.mean_recall[index],
df.mean_precision[index],
marker="*",
markersize=6,
color=color,
alpha=0.8,
linestyle="None",
)
(line,) = axes.plot(
df.mean_recall[index],
df.mean_precision[index],
linestyle="None",
color=color,
alpha=0.2,
)
legend.append(([marker, line], label))
else:
# line first, so marker gets on top
style = next(linecycler)
(line,) = axes.plot(
ri[pi > 0], pi[pi > 0], color=color, linestyle=style
)
(marker,) = axes.plot(
df.mean_recall[index],
df.mean_precision[index],
marker="o",
linestyle=style,
markersize=4,
color=color,
alpha=0.8,
)
legend.append(([marker, line], label))
if credible:
hull = _concave_hull(
df.mean_recall,
df.mean_precision,
df.lower_recall,
df.upper_recall,
df.lower_precision,
df.upper_precision,
)
p = plt.Polygon(
hull,
facecolor=color,
alpha=0.2,
edgecolor="none",
lw=0.2,
closed=True,
)
axes.add_patch(p)
legend[-1][0].append(p)
if len(label) > 1:
axes.legend(
[tuple(k[0]) for k in legend],
[k[1] for k in legend],
loc="lower left",
fancybox=True,
framealpha=0.7,
)
return fig
[docs]def loss_curve(df):
"""Creates a loss curve in a Matplotlib figure.
Parameters
----------
df : :py:class:`pandas.DataFrame`
A dataframe containing, at least, "epoch", "median-loss" and
"learning-rate" columns, that will be plotted.
Returns
-------
figure : matplotlib.figure.Figure
A figure, that may be saved or displayed
"""
ax1 = df.plot(x="epoch", y="median-loss", grid=True)
ax1.set_ylabel("Median Loss")
ax1.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2)
ax2 = df["learning-rate"].plot(
secondary_y=True,
legend=True,
grid=True,
)
ax2.set_ylabel("Learning Rate")
ax1.set_xlabel("Epoch")
plt.tight_layout()
fig = ax1.get_figure()
return fig