Source code for grand_challenge_metrics.stats

from collections import namedtuple
from typing import Optional, Union

import numpy as np
from numpy import ndarray
from scipy.ndimage import (
    binary_erosion,
    convolve,
    distance_transform_edt,
    generate_binary_structure,
)

VOXELSPACING_TYPE = Optional[
    Union[tuple[Union[float, int], ...], list[Union[float, int]], float, int]
]



[docs]
def calculate_confusion_matrix(
    y_true: ndarray, y_pred: ndarray, labels: list[int]
) -> ndarray:
    """
    Efficient confusion matrix calculation, based on sklearn interface

    Parameters
    ----------
    y_true
             Target multi-object segmentation mask
    y_pred
             Predicted multi-object segmentation mask
    labels
             Inclusive list of N labels to compute the confusion matrix for.

    Returns
    -------
    N x N confusion matrix for Y_pred w.r.t. Y_true

    Notes
    -----
    By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`
    is equal to the number of observations known to be in group :math:`i` but
    predicted to be in group :math:`j`.

    """
    cm = np.zeros((len(labels), len(labels)), dtype=int)

    for i, x in enumerate(labels):
        for j, y in enumerate(labels):
            cm[i, j] = np.sum((y_true == x) & (y_pred == y))

    return cm




[docs]
def jaccard_to_dice(jacc: ndarray) -> int | float | ndarray:
    """
    Conversion computation from Jaccard to Dice

    Parameters
    ----------
    jacc
           1 or N Jaccard values within [0 .. 1]

    Returns
    -------
    1 or N Dice values within [0 .. 1]
    """
    if any(jacc < 0) or any(jacc > 1):
        raise RuntimeError("Invalid jaccard scores")

    return (jacc * 2.0) / (1.0 + jacc)




[docs]
def dice_to_jaccard(dice: ndarray) -> int | float | ndarray:
    """
    Conversion computation from Dice to Jaccard

    Parameters
    ----------
    dice
           1 or N Dice values within [0 .. 1]

    Returns
    -------
    1 or N Jaccard values within [0 .. 1]
    """
    if any(dice < 0) or any(dice > 1):
        raise RuntimeError("Invalid dice score")

    return dice / (2.0 - dice)




[docs]
def accuracies_from_confusion_matrix(cm: ndarray) -> ndarray:
    """
    Computes accuracy scores from a confusion matrix

    Parameters
    ----------
    cm
         N x N Input confusion matrix

    Returns
    -------
    1d ndarray containing accuracy scores for all N classes
    """
    results = np.zeros((len(cm)), dtype=np.float32)

    for i in range(len(cm)):
        mask = np.ones((len(cm)), dtype=bool)
        mask[i] = False
        results[i] = cm[i, i] + np.sum(cm[mask, :][:, mask])

    return results // float(np.sum(cm))




[docs]
def jaccard_from_confusion_matrix(cm: ndarray) -> ndarray:
    """
    Computes Jaccard scores from a confusion matrix a.k.a. intersection over
    union (IoU)

    Parameters
    ----------
    cm
         N x N Input confusion matrix

    Returns
    -------
    1d ndarray containing Jaccard scores for all N classes
    """
    if cm.ndim != 2 or cm.shape[0] != cm.shape[1]:
        raise RuntimeError("Invalid confusion matrices")

    jaccs = np.zeros((cm.shape[0]), dtype=np.float32)

    for i in range(cm.shape[0]):
        jaccs[i] = cm[i, i] / float(
            np.sum(cm[i, :]) + np.sum(cm[:, i]) - cm[i, i]
        )

    return jaccs




[docs]
def dice_from_confusion_matrix(cm: ndarray) -> ndarray:
    """
    Computes Dice scores from a confusion matrix

    Parameters
    ----------
    cm
         N x N Input confusion matrix

    Returns
    -------
    1d ndarray containing Dice scores for all N classes
    """
    if cm.ndim != 2 or cm.shape[0] != cm.shape[1]:
        raise RuntimeError("Invalid confusion matrices")

    dices = np.zeros((cm.shape[0]), dtype=np.float32)

    for i in range(cm.shape[0]):
        dices[i] = 2 * cm[i, i] / float(np.sum(cm[i, :]) + np.sum(cm[:, i]))

    return dices



def __surface_distances(
    s1: ndarray,
    s2: ndarray,
    voxelspacing: VOXELSPACING_TYPE = None,
    connectivity: int = 1,
) -> ndarray:
    """
    Computes set of surface distances.

    Retrieve set of distances for all elements from set s1 to s2
    With a connectivity of 1 or higher only the distances between
    the contours of s1 and s2 are used.

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.
    connectivity
        The neighbourhood/connectivity considered when determining the surface
        of the binary objects. This value is passed to
        `scipy.ndimage.generate_binary_structure` and should usually
        be :math:`> 1`.

    Returns
    -------
        The distances from all non-zero object(s) in ```s1``` to the nearest
        non zero object(s) in ```s2```. The distance unit is the same as for
        the spacing of elements along each dimension, which is usually given in
        mm.

    Notes
    -----
    This function is not symmetric.
    """
    s1_b = np.atleast_1d(s1.astype(bool))
    s2_b = np.atleast_1d(s2.astype(bool))

    if connectivity > 0:
        footprint = generate_binary_structure(s1.ndim, connectivity)
        s2_b = s2_b & ~binary_erosion(s2_b, structure=footprint, iterations=1)
        s1_b = s1_b & ~binary_erosion(s1_b, structure=footprint, iterations=1)

    return distance_transform_edt(~s2_b, sampling=voxelspacing)[s1_b]



[docs]
def hausdorff_distance(
    s1: ndarray,
    s2: ndarray,
    voxelspacing: VOXELSPACING_TYPE = None,
    connectivity: int = 1,
) -> float:
    """
    Computes the (symmetric) Hausdorff Distance (HD) between the binary objects
    in two images. It is defined as the maximum surface distance between the
    objects.

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.
    connectivity
        The neighbourhood/connectivity considered when determining the surface
        of the binary objects. This value is passed to
        `scipy.ndimage.generate_binary_structure` and should usually
        be :math:`> 1`.

    Returns
    -------
        The symmetric Hausdorff Distance between the object(s) in ```s1``` and
        the object(s) in ```s2```. The distance unit is the same as for the
        spacing of elements along each dimension, which is usually given in mm.

    Notes
    -----
    This is a real metric.
    Implementation inspired by medpy.metric.binary
    http://pythonhosted.org/MedPy/_modules/medpy/metric/binary.html
    """
    s1_dist = __surface_distances(s1, s2, voxelspacing, connectivity)
    s2_dist = __surface_distances(s2, s1, voxelspacing, connectivity)

    return max(s1_dist.max(), s2_dist.max())




[docs]
def percentile_hausdorff_distance(
    s1: ndarray,
    s2: ndarray,
    percentile: int | float = 0.95,
    voxelspacing: VOXELSPACING_TYPE = None,
    connectivity: int = 1,
) -> float:
    """
    Nth Percentile Hausdorff Distance.

    Computes a percentile for the (symmetric) Hausdorff Distance between the
    binary objects in two images. It is defined as the maximum surface distance
    between the objects at the nth percentile.

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    percentile
        The percentile to perform the comparison on the two sorted distance
        sets
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.
    connectivity
        The neighbourhood/connectivity considered when determining the surface
        of the binary objects. This value is passed to
        `scipy.ndimage.generate_binary_structure` and should usually
        be :math:`> 1`.

    Returns
    -------
        The maximum Percentile Hausdorff Distance between the object(s) in
        ```s1``` and the object(s) in ```s2``` at the ```percentile```
        percentile.
        The distance unit is the same as for the spacing of elements along each
        dimension, which is usually given in mm.

    See also
    --------
    :func:`hd`

    Notes
    -----
    This is a real metric.
    """
    s1_dist = __surface_distances(s1, s2, voxelspacing, connectivity)
    s2_dist = __surface_distances(s2, s1, voxelspacing, connectivity)

    s1_dist.sort()
    s2_dist.sort()

    return max(
        s1_dist[int((len(s1_dist) - 1) * percentile)],
        s2_dist[int((len(s2_dist) - 1) * percentile)],
    )




[docs]
def modified_hausdorff_distance(
    s1: ndarray,
    s2: ndarray,
    voxelspacing: VOXELSPACING_TYPE = None,
    connectivity: int = 1,
) -> float:
    """
    Computes the (symmetric) Modified Hausdorff Distance (MHD) between the
    binary objects in two images. It is defined as the maximum average surface
    distance between the objects.

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.
    connectivity
        The neighbourhood/connectivity considered when determining the surface
        of the binary objects. This value is passed to
        `scipy.ndimage.generate_binary_structure` and should usually
        be :math:`> 1`.

    Returns
    -------
        The symmetric Modified Hausdorff Distance between the object(s) in
        ```s1``` and the object(s) in ```s2```. The distance unit is the same
        as for the spacing of elements along each dimension, which is usually
        given in mm.

    Notes
    -----
    This is a real metric.
    """
    s1_dist = __surface_distances(s1, s2, voxelspacing, connectivity)
    s2_dist = __surface_distances(s2, s1, voxelspacing, connectivity)

    return max(s1_dist.mean(), s2_dist.mean())




[docs]
def relative_absolute_volume_difference(s1: ndarray, s2: ndarray) -> float:
    """
    Calculate relative absolute volume difference from s2 to s1

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else. s1 is taken
        to be the reference.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.

    Returns
    -------
        The relative absolute volume difference between the object(s) in
        ``input1`` and the object(s) in ``input2``. This is a percentage value
        in the range :math:`[0, +inf]` for which a :math:`0` denotes an ideal
        score.

    Notes
    -----
    This is not a real metric! it is asymmetric.
    """
    s1, s2 = s1 != 0, s2 != 0

    return abs(np.sum(s2) - np.sum(s1)) / float(np.sum(s1))




[docs]
def absolute_volume_difference(
    s1: ndarray, s2: ndarray, voxelspacing: VOXELSPACING_TYPE = None
) -> float:
    """
    Calculate absolute volume difference from s2 to s1

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else. s1 is taken
        to be the reference.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.

    Returns
    -------
        The absolute volume difference between the object(s) in ``input1``
        and the object(s) in ``input2``. This is a percentage value in the
        range :math:`[0, +inf]` for which a :math:`0` denotes an ideal score.

    Notes
    -----
    This is a real metric
    """
    s1, s2 = s1 != 0, s2 != 0

    if voxelspacing is None:
        voxelspacing = [1] * s1.ndim

    if isinstance(voxelspacing, float) or isinstance(voxelspacing, int):
        voxelspacing = [voxelspacing] * s1.ndim

    if len(voxelspacing) != s1.ndim:
        raise ValueError("Voxel spacing mismatch")

    if s1.ndim != s2.ndim:
        raise ValueError("Input matrices do not have the same dimensions")

    volume_per_voxel = np.prod(voxelspacing)

    return np.abs(np.sum(s2) - np.sum(s1)) * volume_per_voxel



def __directed_contour_distances(
    s1: ndarray,
    s2: ndarray,
    voxelspacing: VOXELSPACING_TYPE = None,
) -> ndarray:
    """
    Computes set of surface contour distances.
    This function always explicitly calculates the contour-set of s1.

    Retrieve set of distances for all elements from set s1 to s2
    The elements of the contour of s1 are determined by:
    1) whether elements in s1 are fully enclosed by other voxels.
          (in a 3x3x3 neighborhood)
    2) whether elements in s1 are ON.

     Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.

    Returns
    -------
        The distances from all non-zero object(s) in ```s1``` to the nearest
        non zero object(s) in ```s2```. The distance unit is the same as for
        the spacing of elements along each dimension, which is usually given in
        mm.

    Notes
    -----
    This function is not symmetric.

    For determining the contours, the border handling from ITK relies on
    `ZeroFluxNeumannBoundaryCondition`, which equals `nearest` mode in scipy.

    """
    s1_b = np.atleast_1d(s1.astype(bool))
    s2_b = np.atleast_1d(s2.astype(bool))

    # all elements in neighborhood are fully checked! equals np.ones((3,3,3))
    # for s1.ndim == 3
    footprint = generate_binary_structure(s1.ndim, s1.ndim)
    df = distance_transform_edt(~s2_b, sampling=voxelspacing)

    # generate mask for elements not entirly enclosed by mask s1_b
    # (contours & non-zero elements)
    # convolve mode ITK relies on ZeroFluxNeumannBoundaryCondition == nearest
    mask = convolve(s1_b.astype(int), footprint, mode="nearest") < np.sum(
        footprint
    )

    # return distance to contours only
    return df[mask & s1_b]



[docs]
def mean_contour_distance(
    s1: ndarray,
    s2: ndarray,
    voxelspacing: VOXELSPACING_TYPE = None,
) -> float:
    """
    Computes the (symmetric) Mean Contour Distance between the binary objects
    in two images. It is defined as the maximum average surface distance
    between the objects.

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.

    Returns
    -------
        The symmetric Mean Contour Distance between the object(s) in ```s1```
        and the object(s) in ```s2```. The distance unit is the same as for the
        spacing of elements along each dimension, which is usually given in mm.

    Notes
    -----
    This is a real metric that mimics the ITK MeanContourDistanceFilter.
    """
    s1_c_dist = __directed_contour_distances(s1, s2, voxelspacing)
    s2_c_dist = __directed_contour_distances(s2, s1, voxelspacing)

    return max(s1_c_dist.mean(), s2_c_dist.mean())



HausdorffMeasures = namedtuple(
    "HausdorffMeasures",
    ["distance", "modified_distance", "percentile_distance"],
)



[docs]
def hausdorff_distance_measures(
    s1: ndarray,
    s2: ndarray,
    voxelspacing: VOXELSPACING_TYPE = None,
    connectivity: int = 1,
    percentile: float = 0.95,
) -> HausdorffMeasures:
    """
    Returns multiple Hausdorff measures - (hd, modified_hd, percentile_hd)
    Since measures share common calculations,
    together the measures can be calculated more efficiently

    Parameters
    ----------
    s1
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    s2
        Input data containing objects. Can be any type but will be converted
        into binary: background where 0, object everywhere else.
    voxelspacing
        The voxelspacing in a distance unit i.e. spacing of elements
        along each dimension. If a sequence, must be of length equal to
        the input rank; if a single number, this is used for all axes. If
        not specified, a grid spacing of unity is implied.
    connectivity
        The neighbourhood/connectivity considered when determining the surface
        of the binary objects. This value is passed to
        `scipy.ndimage.generate_binary_structure` and should usually
        be :math:`> 1`.
    percentile
        The percentile at which to calculate the Hausdorff Distance

    Returns
    -------
        The hausdorff distance, modified hausdorff distance and percentile
        hausdorff distance

    Notes
    -----
    This returns real metrics.
    """
    s1_b = np.atleast_1d(s1.astype(bool))
    s2_b = np.atleast_1d(s2.astype(bool))

    if connectivity > 0:
        footprint = generate_binary_structure(s1.ndim, connectivity)
        s2_b = s2_b & ~binary_erosion(s2_b, structure=footprint, iterations=1)
        s1_b = s1_b & ~binary_erosion(s1_b, structure=footprint, iterations=1)

    s1_dist = distance_transform_edt(~s2_b, sampling=voxelspacing)[s1_b]
    s2_dist = distance_transform_edt(~s1_b, sampling=voxelspacing)[s2_b]

    s1_dist.sort()
    s2_dist.sort()

    # calculate all hausdorff statistics
    distance = max(s1_dist.max(), s2_dist.max())
    modified_distance = max(s1_dist.mean(), s2_dist.mean())
    percentile_distance = max(
        s1_dist[int((len(s1_dist) - 1) * percentile)],
        s2_dist[int((len(s2_dist) - 1) * percentile)],
    )

    return HausdorffMeasures(
        distance=distance,
        modified_distance=modified_distance,
        percentile_distance=percentile_distance,
    )
Source code for grand_challenge_metrics.stats

grand_challenge_metrics

Navigation

Related Topics